From 41bb787fb3e05aa0071d9b5105285c9d2f01e692 Mon Sep 17 00:00:00 2001
From: "Philip.Hazel"
+ --enable-jit-sealloc
+
+which enables the use of an execmem allocator in JIT that is compatible with
+SELinux. This has no effect if JIT is not enabled. See the
pcre2jit
documentation for a discussion of JIT usage. When JIT support is enabled,
pcre2grep automatically makes use of it, unless you add
@@ -516,7 +521,7 @@ contains a single function called LLVMFuzzerTestOneInput() whose arguments are
a pointer to a string and the length of the string. When called, this function
tries to compile the string as a pattern, and if that succeeds, to match it.
This is done both with no options and with some random options bits that are
-generated from the string.
+generated from the string.
Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck @@ -529,13 +534,13 @@ file are the test string.
-In versions of PCRE2 prior to 10.30, there were two ways of handling -backtracking in the pcre2_match() function. The default was to use the +In versions of PCRE2 prior to 10.30, there were two ways of handling +backtracking in the pcre2_match() function. The default was to use the system stack, but if
--disable-stack-for-recursion-was set, memory on the heap was used. From release 10.30 onwards this has +was set, memory on the heap was used. From release 10.30 onwards this has changed (the stack is no longer used) and this option now does nothing except give a warning. @@ -554,7 +559,7 @@ Cambridge, England.
-Last updated: 30 May 2017
+Last updated: 17 June 2017
Copyright © 1997-2017 University of Cambridge.
diff --git a/doc/html/pcre2pattern.html b/doc/html/pcre2pattern.html
index 3eccb3e..a582316 100644
--- a/doc/html/pcre2pattern.html
+++ b/doc/html/pcre2pattern.html
@@ -755,6 +755,7 @@ Those that are not part of an identified script are lumped together as
"Common". The current list of scripts is:
+Adlam, Ahom, Anatolian_Hieroglyphs, Arabic, @@ -765,6 +766,7 @@ Bamum, Bassa_Vah, Batak, Bengali, +Bhaiksuki, Bopomofo, Brahmi, Braille, @@ -826,6 +828,8 @@ Mahajani, Malayalam, Mandaic, Manichaean, +Marchen, +Masaram_Gondi, Meetei_Mayek, Mende_Kikakui, Meroitic_Cursive, @@ -838,7 +842,9 @@ Multani, Myanmar, Nabataean, New_Tai_Lue, +Newa, Nko, +Nushu, Ogham, Ol_Chiki, Old_Hungarian, @@ -849,6 +855,7 @@ Old_Persian, Old_South_Arabian, Old_Turkic, Oriya, +Osage, Osmanya, Pahawh_Hmong, Palmyrene, @@ -866,6 +873,7 @@ Siddham, SignWriting, Sinhala, Sora_Sompeng, +Soyombo, Sundanese, Syloti_Nagri, Syriac, @@ -876,6 +884,7 @@ Tai_Tham, Tai_Viet, Takri, Tamil, +Tangut, Telugu, Thaana, Thai, @@ -885,7 +894,8 @@ Tirhuta, Ugaritic, Vai, Warang_Citi, -Yi. +Yi, +Zanabazar_Square.
Each character has exactly one Unicode general category property, specified by @@ -3445,7 +3455,7 @@ Cambridge, England.
-Last updated: 30 May 2017
+Last updated: 02 July 2017
Copyright © 1997-2017 University of Cambridge.
diff --git a/doc/html/pcre2test.html b/doc/html/pcre2test.html
index 1b49ef8..aaf8336 100644
--- a/doc/html/pcre2test.html
+++ b/doc/html/pcre2test.html
@@ -568,7 +568,7 @@ Setting compilation options
The following modifiers set options for pcre2_compile(). Most of them set -bits in the options argument of that function, but those whose names start with +bits in the options argument of that function, but those whose names start with PCRE2_EXTRA are additional options that are set in the compile context. For the main options, there are some single-letter abbreviations that are the same as Perl options. There is special handling for /x: if a second x is present, @@ -579,25 +579,25 @@ way pcre2_compile() behaves. See for a description of the effects of these options.
allow_empty_class set PCRE2_ALLOW_EMPTY_CLASS - allow_surrogate_escapes set PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES + allow_surrogate_escapes set PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES alt_bsux set PCRE2_ALT_BSUX alt_circumflex set PCRE2_ALT_CIRCUMFLEX alt_verbnames set PCRE2_ALT_VERBNAMES anchored set PCRE2_ANCHORED auto_callout set PCRE2_AUTO_CALLOUT - bad_escape_is_literal set PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL + bad_escape_is_literal set PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL /i caseless set PCRE2_CASELESS dollar_endonly set PCRE2_DOLLAR_ENDONLY /s dotall set PCRE2_DOTALL dupnames set PCRE2_DUPNAMES endanchored set PCRE2_ENDANCHORED /x extended set PCRE2_EXTENDED - /xx extended_more set PCRE2_EXTENDED_MORE + /xx extended_more set PCRE2_EXTENDED_MORE firstline set PCRE2_FIRSTLINE - literal set PCRE2_LITERAL - match_line set PCRE2_EXTRA_MATCH_LINE + literal set PCRE2_LITERAL + match_line set PCRE2_EXTRA_MATCH_LINE match_unset_backref set PCRE2_MATCH_UNSET_BACKREF - match_word set PCRE2_EXTRA_MATCH_WORD + match_word set PCRE2_EXTRA_MATCH_WORD /m multiline set PCRE2_MULTILINE never_backslash_c set PCRE2_NEVER_BACKSLASH_C never_ucp set PCRE2_NEVER_UCP @@ -631,7 +631,7 @@ heavily used in the test files. /B bincode show binary code without lengths callout_info show callout information debug same as info,fullbincode - framesize show matching frame size + framesize show matching frame size fullbincode show binary code with lengths /I info show info about compiled pattern hex unquoted characters are hexadecimal @@ -649,7 +649,7 @@ heavily used in the test files. push push compiled pattern onto the stack pushcopy push a copy onto the stack stackguard=<number> test the stackguard feature - subject_literal treat all subject lines as literal + subject_literal treat all subject lines as literal tables=[0|1|2] select internal tables use_length do not zero-terminate the pattern utf8_input treat input as UTF-8 @@ -720,7 +720,7 @@ not necessarily the last character. These lines are omitted if no starting or ending code units are recorded.-The framesize modifier shows the size, in bytes, of the storage frames +The framesize modifier shows the size, in bytes, of the storage frames used by pcre2_match() for handling backtracking. The size depends on the number of capturing parentheses in the pattern.
@@ -972,8 +972,8 @@ below. All other modifiers are either ignored, with a warning message, or cause an error.-The pattern is passed to regcomp() as a zero-terminated string by -default, but if the use_length or hex modifiers are set, the +The pattern is passed to regcomp() as a zero-terminated string by +default, but if the use_length or hex modifiers are set, the REG_PEND extension is used to pass it by length.
@@ -1013,7 +1013,7 @@ are mutually exclusive. Setting certain match controls
-The following modifiers are really subject modifiers, and are described under +The following modifiers are really subject modifiers, and are described under "Subject Modifiers" below. However, they may be included in a pattern's modifier list, in which case they are applied to every subject line that is processed with that pattern. They may not appear in #pattern commands. @@ -1040,9 +1040,9 @@ defaults, set them in a #subject command. Specifying literal subject lines
-If the subject_literal modifier is present on a pattern, all the subject -lines that it matches are taken as literal strings, with no interpretation of -backslashes. It is not possible to set subject modifiers on such lines, but any +If the subject_literal modifier is present on a pattern, all the subject +lines that it matches are taken as literal strings, with no interpretation of +backslashes. It is not possible to set subject modifiers on such lines, but any that are set as defaults by a #subject command are recognized.
@@ -1054,7 +1054,8 @@ pushed onto a stack of compiled patterns, and pcre2test expects the next line to contain a new pattern (or a command) instead of a subject line. This facility is used when saving compiled patterns to a file, as described in the section entitled "Saving and restoring compiled patterns" -below. If pushcopy is used instead of push, a copy of the compiled +below. +If pushcopy is used instead of push, a copy of the compiled pattern is stacked, leaving the original as current, ready to match the following input lines. This provides a way of testing the pcre2_code_copy() function. @@ -1103,18 +1104,18 @@ causing REG_NOTBOL, REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to regexec(). The other modifiers are ignored, with a warning message.-There is one additional modifier that can be used with the POSIX wrapper. It is +There is one additional modifier that can be used with the POSIX wrapper. It is ignored (with a warning) if used for non-POSIX matching.
- posix_startend=<n>[:<m>] + posix_startend=<n>[:<m>]This causes the subject string to be passed to regexec() using the REG_STARTEND option, which uses offsets to specify which part of the string is searched. If only one number is given, the end offset is passed as the end of the subject string. For more detail of REG_STARTEND, see the pcre2posix -documentation. If the subject string contains binary zeros (coded as escapes -such as \x{00} because pcre2test does not support actual binary zeros in +documentation. If the subject string contains binary zeros (coded as escapes +such as \x{00} because pcre2test does not support actual binary zeros in its input), you must use posix_startend to specify its length.
@@ -1135,6 +1136,7 @@ pattern. callout_data=<n> set a value to pass via callouts callout_error=<n>[:<m>] control callout error callout_fail=<n>[:<m>] control callout failure + callout_no_where do not show position of a callout callout_none do not supply a callout function copy=<number or name> copy captured substring depth_limit=<n> set a depth limit @@ -1230,29 +1232,10 @@ Testing callouts
A callout function is supplied when pcre2test calls the library matching -functions, unless callout_none is specified. If callout_capture is -set, the current captured groups are output when a callout occurs. The default -return from the callout function is zero, which allows matching to continue. -
--The callout_fail modifier can be given one or two numbers. If there is -only one number, 1 is returned instead of 0 (causing matching to backtrack) -when a callout of that number is reached. If two numbers (<n>:<m>) are given, 1 -is returned when callout <n> is reached and there have been at least <m> -callouts. The callout_error modifier is similar, except that -PCRE2_ERROR_CALLOUT is returned, causing the entire matching process to be -aborted. If both these modifiers are set for the same callout number, -callout_error takes precedence. -
--Note that callouts with string arguments are always given the number zero. See -"Callouts" below for a description of the output when a callout it taken. -
--The callout_data modifier can be given an unsigned or a negative number. -This is set as the "user data" that is passed to the matching function, and -passed back when the callout function is invoked. Any value other than zero is -used as a return from pcre2test's callout function. +functions, unless callout_none is specified. Its behaviour can be +controlled by various modifiers listed above whose names begin with +callout_. Details are given in the section entitled "Callouts" +below.
Finding all matches in a string @@ -1384,7 +1367,7 @@ that is used by the just-in-time optimization code. It is ignored if JIT optimization is not being used. The value is a number of kilobytes. Setting zero reverts to the default of 32K. Providing a stack that is larger than the default is necessary only for very complicated patterns. If jitstack is -set non-zero on a subject line it overrides any value that was set on the +set non-zero on a subject line it overrides any value that was set on the pattern.
@@ -1414,7 +1397,7 @@ The match_limit number is a measure of the amount of backtracking that takes place, and learning the minimum value can be instructive. For most simple matches, the number is quite small, but for patterns with very large numbers of matching possibilities, it can become large very quickly with -increasing length of subject string. +increasing length of subject string.For non-DFA matching, the minimum depth_limit number is a measure of how @@ -1660,7 +1643,7 @@ restart the match with additional subject data by means of the For further information about partial matching, see the pcre2partial documentation. -
+
CALLOUTS
If the pattern contains any callout requests, pcre2test's callout @@ -1669,8 +1652,33 @@ This works with both matching functions.
The callout function in pcre2test returns zero (carry on matching) by -default, but you can use a callout_fail modifier in a subject line (as -described above) to change this and other parameters of the callout. +default, but you can use a callout_fail modifier in a subject line to +change this and other parameters of the callout. +
++If callout_capture is set, the current captured groups are output when a +callout occurs. By default, the callout function then generates output that +indicates where the current match start and matching points are in the subject, +and what the next pattern item is. This output is suppressed if the +callout_no_where modifier is set. +
++The default return from the callout function is zero, which allows matching to +continue. The callout_fail modifier can be given one or two numbers. If +there is only one number, 1 is returned instead of 0 (causing matching to +backtrack) when a callout of that number is reached. If two numbers (<n>:<m>) +are given, 1 is returned when callout <n> is reached and there have been at +least <m> callouts. The callout_error modifier is similar, except that +PCRE2_ERROR_CALLOUT is returned, causing the entire matching process to be +aborted. If both these modifiers are set for the same callout number, +callout_error takes precedence. Note that callouts with string arguments +are always given the number zero. See +
++The callout_data modifier can be given an unsigned or a negative number. +This is set as the "user data" that is passed to the matching function, and +passed back when the callout function is invoked. Any value other than zero is +used as a return from pcre2test's callout function.
Inserting callouts can be helpful when using pcre2test to check @@ -1858,7 +1866,7 @@ Cambridge, England.
REVISION
-Last updated: 16 June 2017 +Last updated: 02 July 2017
Copyright © 1997-2017 University of Cambridge.
diff --git a/doc/pcre2.txt b/doc/pcre2.txt index a72a4fa..6a9bb96 100644 --- a/doc/pcre2.txt +++ b/doc/pcre2.txt @@ -3543,9 +3543,14 @@ JUST-IN-TIME COMPILER SUPPORT This support is available only for certain hardware architectures. If this option is set for an unsupported architecture, a building error - occurs. See the pcre2jit documentation for a discussion of JIT usage. - When JIT support is enabled, pcre2grep automatically makes use of it, - unless you add + occurs. If you are running under SELinux you may also want to add + + --enable-jit-sealloc + + which enables the use of an execmem allocator in JIT that is compatible + with SELinux. This has no effect if JIT is not enabled. See the + pcre2jit documentation for a discussion of JIT usage. When JIT support + is enabled, pcre2grep automatically makes use of it, unless you add --disable-pcre2grep-jit @@ -3554,14 +3559,14 @@ JUST-IN-TIME COMPILER SUPPORT NEWLINE RECOGNITION - By default, PCRE2 interprets the linefeed (LF) character as indicating - the end of a line. This is the normal newline character on Unix-like - systems. You can compile PCRE2 to use carriage return (CR) instead, by + By default, PCRE2 interprets the linefeed (LF) character as indicating + the end of a line. This is the normal newline character on Unix-like + systems. You can compile PCRE2 to use carriage return (CR) instead, by adding --enable-newline-is-cr - to the configure command. There is also an --enable-newline-is-lf + to the configure command. There is also an --enable-newline-is-lf option, which explicitly specifies linefeed as the newline character. Alternatively, you can specify that line endings are to be indicated by @@ -3574,104 +3579,104 @@ NEWLINE RECOGNITION --enable-newline-is-anycrlf - which causes PCRE2 to recognize any of the three sequences CR, LF, or + which causes PCRE2 to recognize any of the three sequences CR, LF, or CRLF as indicating a line ending. Finally, a fifth option, specified by --enable-newline-is-any - causes PCRE2 to recognize any Unicode newline sequence. The Unicode + causes PCRE2 to recognize any Unicode newline sequence. The Unicode newline sequences are the three just mentioned, plus the single charac- ters VT (vertical tab, U+000B), FF (form feed, U+000C), NEL (next line, - U+0085), LS (line separator, U+2028), and PS (paragraph separator, + U+0085), LS (line separator, U+2028), and PS (paragraph separator, U+2029). Whatever default line ending convention is selected when PCRE2 is built - can be overridden by applications that use the library. At build time + can be overridden by applications that use the library. At build time it is conventional to use the standard for your operating system. WHAT \R MATCHES - By default, the sequence \R in a pattern matches any Unicode newline - sequence, independently of what has been selected as the line ending + By default, the sequence \R in a pattern matches any Unicode newline + sequence, independently of what has been selected as the line ending sequence. If you specify --enable-bsr-anycrlf - the default is changed so that \R matches only CR, LF, or CRLF. What- - ever is selected when PCRE2 is built can be overridden by applications + the default is changed so that \R matches only CR, LF, or CRLF. What- + ever is selected when PCRE2 is built can be overridden by applications that use the library. HANDLING VERY LARGE PATTERNS - Within a compiled pattern, offset values are used to point from one - part to another (for example, from an opening parenthesis to an alter- - nation metacharacter). By default, in the 8-bit and 16-bit libraries, - two-byte values are used for these offsets, leading to a maximum size - for a compiled pattern of around 64K code units. This is sufficient to + Within a compiled pattern, offset values are used to point from one + part to another (for example, from an opening parenthesis to an alter- + nation metacharacter). By default, in the 8-bit and 16-bit libraries, + two-byte values are used for these offsets, leading to a maximum size + for a compiled pattern of around 64K code units. This is sufficient to handle all but the most gigantic patterns. Nevertheless, some people do - want to process truly enormous patterns, so it is possible to compile - PCRE2 to use three-byte or four-byte offsets by adding a setting such + want to process truly enormous patterns, so it is possible to compile + PCRE2 to use three-byte or four-byte offsets by adding a setting such as --with-link-size=3 - to the configure command. The value given must be 2, 3, or 4. For the - 16-bit library, a value of 3 is rounded up to 4. In these libraries, - using longer offsets slows down the operation of PCRE2 because it has - to load additional data when handling them. For the 32-bit library the - value is always 4 and cannot be overridden; the value of --with-link- + to the configure command. The value given must be 2, 3, or 4. For the + 16-bit library, a value of 3 is rounded up to 4. In these libraries, + using longer offsets slows down the operation of PCRE2 because it has + to load additional data when handling them. For the 32-bit library the + value is always 4 and cannot be overridden; the value of --with-link- size is ignored. LIMITING PCRE2 RESOURCE USAGE The pcre2_match() function increments a counter each time it goes round - its main loop. Putting a limit on this counter controls the amount of - computing resource used by a single call to pcre2_match(). The limit + its main loop. Putting a limit on this counter controls the amount of + computing resource used by a single call to pcre2_match(). The limit can be changed at run time, as described in the pcre2api documentation. - The default is 10 million, but this can be changed by adding a setting + The default is 10 million, but this can be changed by adding a setting such as --with-match-limit=500000 - to the configure command. This setting also applies to the - pcre2_dfa_match() matching function, and to JIT matching (though the + to the configure command. This setting also applies to the + pcre2_dfa_match() matching function, and to JIT matching (though the counting is done differently). - The pcre2_match() function starts out using a 20K vector on the system - stack to record backtracking points. The more nested backtracking + The pcre2_match() function starts out using a 20K vector on the system + stack to record backtracking points. The more nested backtracking points there are (that is, the deeper the search tree), the more memory - is needed. If the initial vector is not large enough, heap memory is + is needed. If the initial vector is not large enough, heap memory is used, up to a certain limit, which is specified in kilobytes. The limit can be changed at run time, as described in the pcre2api documentation. - The default limit (in effect unlimited) is 20 million. You can change + The default limit (in effect unlimited) is 20 million. You can change this by a setting such as --with-heap-limit=500 - which limits the amount of heap to 500 kilobytes. This limit applies - only to interpretive matching in pcre2_match(). It does not apply when - JIT (which has its own memory arrangements) is used, nor does it apply + which limits the amount of heap to 500 kilobytes. This limit applies + only to interpretive matching in pcre2_match(). It does not apply when + JIT (which has its own memory arrangements) is used, nor does it apply to pcre2_dfa_match(). - You can also explicitly limit the depth of nested backtracking in the + You can also explicitly limit the depth of nested backtracking in the pcre2_match() interpreter. This limit defaults to the value that is set - for --with-match-limit. You can set a lower default limit by adding, + for --with-match-limit. You can set a lower default limit by adding, for example, --with-match-limit_depth=10000 - to the configure command. This value can be overridden at run time. - This depth limit indirectly limits the amount of heap memory that is - used, but because the size of each backtracking "frame" depends on the - number of capturing parentheses in a pattern, the amount of heap that - is used before the limit is reached varies from pattern to pattern. - This limit was more useful in versions before 10.30, where function - recursion was used for backtracking. However, as well as applying to + to the configure command. This value can be overridden at run time. + This depth limit indirectly limits the amount of heap memory that is + used, but because the size of each backtracking "frame" depends on the + number of capturing parentheses in a pattern, the amount of heap that + is used before the limit is reached varies from pattern to pattern. + This limit was more useful in versions before 10.30, where function + recursion was used for backtracking. However, as well as applying to pcre2_match(), this limit also controls the depth of recursive function - calls in pcre2_dfa_match(). These are used for lookaround assertions, + calls in pcre2_dfa_match(). These are used for lookaround assertions, atomic groups, and recursion within patterns. The limit does not apply to JIT matching. @@ -3680,45 +3685,45 @@ CREATING CHARACTER TABLES AT BUILD TIME PCRE2 uses fixed tables for processing characters whose code points are less than 256. By default, PCRE2 is built with a set of tables that are - distributed in the file src/pcre2_chartables.c.dist. These tables are + distributed in the file src/pcre2_chartables.c.dist. These tables are for ASCII codes only. If you add --enable-rebuild-chartables - to the configure command, the distributed tables are no longer used. - Instead, a program called dftables is compiled and run. This outputs + to the configure command, the distributed tables are no longer used. + Instead, a program called dftables is compiled and run. This outputs the source for new set of tables, created in the default locale of your C run-time system. This method of replacing the tables does not work if - you are cross compiling, because dftables is run on the local host. If - you need to create alternative tables when cross compiling, you will + you are cross compiling, because dftables is run on the local host. If + you need to create alternative tables when cross compiling, you will have to do so "by hand". USING EBCDIC CODE - PCRE2 assumes by default that it will run in an environment where the - character code is ASCII or Unicode, which is a superset of ASCII. This + PCRE2 assumes by default that it will run in an environment where the + character code is ASCII or Unicode, which is a superset of ASCII. This is the case for most computer operating systems. PCRE2 can, however, be compiled to run in an 8-bit EBCDIC environment by adding --enable-ebcdic --disable-unicode to the configure command. This setting implies --enable-rebuild-charta- - bles. You should only use it if you know that you are in an EBCDIC + bles. You should only use it if you know that you are in an EBCDIC environment (for example, an IBM mainframe operating system). - It is not possible to support both EBCDIC and UTF-8 codes in the same - version of the library. Consequently, --enable-unicode and --enable- + It is not possible to support both EBCDIC and UTF-8 codes in the same + version of the library. Consequently, --enable-unicode and --enable- ebcdic are mutually exclusive. The EBCDIC character that corresponds to an ASCII LF is assumed to have - the value 0x15 by default. However, in some EBCDIC environments, 0x25 + the value 0x15 by default. However, in some EBCDIC environments, 0x25 is used. In such an environment you should use --enable-ebcdic-nl25 as well as, or instead of, --enable-ebcdic. The EBCDIC character for CR - has the same value as in ASCII, namely, 0x0d. Whichever of 0x15 and + has the same value as in ASCII, namely, 0x0d. Whichever of 0x15 and 0x25 is not chosen as LF is made to correspond to the Unicode NEL char- acter (which, in Unicode, is 0x85). @@ -3731,34 +3736,34 @@ PCRE2GREP SUPPORT FOR EXTERNAL SCRIPTS By default, on non-Windows systems, pcre2grep supports the use of call- outs with string arguments within the patterns it is matching, in order - to run external scripts. For details, see the pcre2grep documentation. - This support can be disabled by adding --disable-pcre2grep-callout to + to run external scripts. For details, see the pcre2grep documentation. + This support can be disabled by adding --disable-pcre2grep-callout to the configure command. PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT - By default, pcre2grep reads all files as plain text. You can build it - so that it recognizes files whose names end in .gz or .bz2, and reads + By default, pcre2grep reads all files as plain text. You can build it + so that it recognizes files whose names end in .gz or .bz2, and reads them with libz or libbz2, respectively, by adding one or both of --enable-pcre2grep-libz --enable-pcre2grep-libbz2 to the configure command. These options naturally require that the rel- - evant libraries are installed on your system. Configuration will fail + evant libraries are installed on your system. Configuration will fail if they are not. PCRE2GREP BUFFER SIZE - pcre2grep uses an internal buffer to hold a "window" on the file it is + pcre2grep uses an internal buffer to hold a "window" on the file it is scanning, in order to be able to output "before" and "after" lines when - it finds a match. The starting size of the buffer is controlled by a - parameter whose default value is 20K. The buffer itself is three times - this size, but because of the way it is used for holding "before" - lines, the longest line that is guaranteed to be processable is the - parameter size. If a longer line is encountered, pcre2grep automati- + it finds a match. The starting size of the buffer is controlled by a + parameter whose default value is 20K. The buffer itself is three times + this size, but because of the way it is used for holding "before" + lines, the longest line that is guaranteed to be processable is the + parameter size. If a longer line is encountered, pcre2grep automati- cally expands the buffer, up to a specified maximum size, whose default is 1M or the starting size, whichever is the larger. You can change the default parameter values by adding, for example, @@ -3766,8 +3771,8 @@ PCRE2GREP BUFFER SIZE --with-pcre2grep-bufsize=51200 --with-pcre2grep-max-bufsize=2097152 - to the configure command. The caller of pcre2grep can override these - values by using --buffer-size and --max-buffer-size on the command + to the configure command. The caller of pcre2grep can override these + values by using --buffer-size and --max-buffer-size on the command line. @@ -3778,26 +3783,26 @@ PCRE2TEST OPTION FOR LIBREADLINE SUPPORT --enable-pcre2test-libreadline --enable-pcre2test-libedit - to the configure command, pcre2test is linked with the libreadline + to the configure command, pcre2test is linked with the libreadline orlibedit library, respectively, and when its input is from a terminal, - it reads it using the readline() function. This provides line-editing - and history facilities. Note that libreadline is GPL-licensed, so if - you distribute a binary of pcre2test linked in this way, there may be + it reads it using the readline() function. This provides line-editing + and history facilities. Note that libreadline is GPL-licensed, so if + you distribute a binary of pcre2test linked in this way, there may be licensing issues. These can be avoided by linking instead with libedit, which has a BSD licence. - Setting --enable-pcre2test-libreadline causes the -lreadline option to - be added to the pcre2test build. In many operating environments with a - sytem-installed readline library this is sufficient. However, in some + Setting --enable-pcre2test-libreadline causes the -lreadline option to + be added to the pcre2test build. In many operating environments with a + sytem-installed readline library this is sufficient. However, in some environments (e.g. if an unmodified distribution version of readline is - in use), some extra configuration may be necessary. The INSTALL file + in use), some extra configuration may be necessary. The INSTALL file for libreadline says this: "Readline uses the termcap functions, but does not link with the termcap or curses library itself, allowing applications which link with readline the to choose an appropriate library." - If your environment has not been set up so that an appropriate library + If your environment has not been set up so that an appropriate library is automatically included, you may need to add something like LIBS="-ncurses" @@ -3811,7 +3816,7 @@ INCLUDING DEBUGGING CODE --enable-debug - to the configure command, additional debugging code is included in the + to the configure command, additional debugging code is included in the build. This feature is intended for use by the PCRE2 maintainers. @@ -3821,15 +3826,15 @@ DEBUGGING WITH VALGRIND SUPPORT --enable-valgrind - to the configure command, PCRE2 will use valgrind annotations to mark - certain memory regions as unaddressable. This allows it to detect - invalid memory accesses, and is mostly useful for debugging PCRE2 + to the configure command, PCRE2 will use valgrind annotations to mark + certain memory regions as unaddressable. This allows it to detect + invalid memory accesses, and is mostly useful for debugging PCRE2 itself. CODE COVERAGE REPORTING - If your C compiler is gcc, you can build a version of PCRE2 that can + If your C compiler is gcc, you can build a version of PCRE2 that can generate a code coverage report for its test suite. To enable this, you must install lcov version 1.6 or above. Then specify @@ -3838,20 +3843,20 @@ CODE COVERAGE REPORTING to the configure command and build PCRE2 in the usual way. Note that using ccache (a caching C compiler) is incompatible with code - coverage reporting. If you have configured ccache to run automatically + coverage reporting. If you have configured ccache to run automatically on your system, you must set the environment variable CCACHE_DISABLE=1 before running make to build PCRE2, so that ccache is not used. - When --enable-coverage is used, the following addition targets are + When --enable-coverage is used, the following addition targets are added to the Makefile: make coverage - This creates a fresh coverage report for the PCRE2 test suite. It is - equivalent to running "make coverage-reset", "make coverage-baseline", + This creates a fresh coverage report for the PCRE2 test suite. It is + equivalent to running "make coverage-reset", "make coverage-baseline", "make check", and then "make coverage-report". make coverage-reset @@ -3868,56 +3873,56 @@ CODE COVERAGE REPORTING make coverage-clean-report - This removes the generated coverage report without cleaning the cover- + This removes the generated coverage report without cleaning the cover- age data itself. make coverage-clean-data - This removes the captured coverage data without removing the coverage + This removes the captured coverage data without removing the coverage files created at compile time (*.gcno). make coverage-clean - This cleans all coverage data including the generated coverage report. - For more information about code coverage, see the gcov and lcov docu- + This cleans all coverage data including the generated coverage report. + For more information about code coverage, see the gcov and lcov docu- mentation. SUPPORT FOR FUZZERS - There is a special option for use by people who want to run fuzzing + There is a special option for use by people who want to run fuzzing tests on PCRE2: --enable-fuzz-support At present this applies only to the 8-bit library. If set, it causes an - extra library called libpcre2-fuzzsupport.a to be built, but not - installed. This contains a single function called LLVMFuzzerTestOneIn- - put() whose arguments are a pointer to a string and the length of the - string. When called, this function tries to compile the string as a - pattern, and if that succeeds, to match it. This is done both with no - options and with some random options bits that are generated from the + extra library called libpcre2-fuzzsupport.a to be built, but not + installed. This contains a single function called LLVMFuzzerTestOneIn- + put() whose arguments are a pointer to a string and the length of the + string. When called, this function tries to compile the string as a + pattern, and if that succeeds, to match it. This is done both with no + options and with some random options bits that are generated from the string. - Setting --enable-fuzz-support also causes a binary called pcre2fuz- - zcheck to be created. This is normally run under valgrind or used when + Setting --enable-fuzz-support also causes a binary called pcre2fuz- + zcheck to be created. This is normally run under valgrind or used when PCRE2 is compiled with address sanitizing enabled. It calls the fuzzing - function and outputs information about it is doing. The input strings - are specified by arguments: if an argument starts with "=" the rest of - it is a literal input string. Otherwise, it is assumed to be a file + function and outputs information about it is doing. The input strings + are specified by arguments: if an argument starts with "=" the rest of + it is a literal input string. Otherwise, it is assumed to be a file name, and the contents of the file are the test string. OBSOLETE OPTION - In versions of PCRE2 prior to 10.30, there were two ways of handling - backtracking in the pcre2_match() function. The default was to use the + In versions of PCRE2 prior to 10.30, there were two ways of handling + backtracking in the pcre2_match() function. The default was to use the system stack, but if --disable-stack-for-recursion - was set, memory on the heap was used. From release 10.30 onwards this - has changed (the stack is no longer used) and this option now does + was set, memory on the heap was used. From release 10.30 onwards this + has changed (the stack is no longer used) and this option now does nothing except give a warning. @@ -3935,7 +3940,7 @@ AUTHOR REVISION - Last updated: 30 May 2017 + Last updated: 17 June 2017 Copyright (c) 1997-2017 University of Cambridge. ------------------------------------------------------------------------------ @@ -6309,26 +6314,28 @@ BACKSLASH Those that are not part of an identified script are lumped together as "Common". The current list of scripts is: - Ahom, Anatolian_Hieroglyphs, Arabic, Armenian, Avestan, Balinese, - Bamum, Bassa_Vah, Batak, Bengali, Bopomofo, Brahmi, Braille, Buginese, - Buhid, Canadian_Aboriginal, Carian, Caucasian_Albanian, Chakma, Cham, - Cherokee, Common, Coptic, Cuneiform, Cypriot, Cyrillic, Deseret, - Devanagari, Duployan, Egyptian_Hieroglyphs, Elbasan, Ethiopic, Geor- - gian, Glagolitic, Gothic, Grantha, Greek, Gujarati, Gurmukhi, Han, - Hangul, Hanunoo, Hatran, Hebrew, Hiragana, Imperial_Aramaic, Inherited, - Inscriptional_Pahlavi, Inscriptional_Parthian, Javanese, Kaithi, Kan- - nada, Katakana, Kayah_Li, Kharoshthi, Khmer, Khojki, Khudawadi, Lao, - Latin, Lepcha, Limbu, Linear_A, Linear_B, Lisu, Lycian, Lydian, Maha- - jani, Malayalam, Mandaic, Manichaean, Meetei_Mayek, Mende_Kikakui, - Meroitic_Cursive, Meroitic_Hieroglyphs, Miao, Modi, Mongolian, Mro, - Multani, Myanmar, Nabataean, New_Tai_Lue, Nko, Ogham, Ol_Chiki, - Old_Hungarian, Old_Italic, Old_North_Arabian, Old_Permic, Old_Persian, - Old_South_Arabian, Old_Turkic, Oriya, Osmanya, Pahawh_Hmong, Palmyrene, - Pau_Cin_Hau, Phags_Pa, Phoenician, Psalter_Pahlavi, Rejang, Runic, - Samaritan, Saurashtra, Sharada, Shavian, Siddham, SignWriting, Sinhala, - Sora_Sompeng, Sundanese, Syloti_Nagri, Syriac, Tagalog, Tagbanwa, - Tai_Le, Tai_Tham, Tai_Viet, Takri, Tamil, Telugu, Thaana, Thai, - Tibetan, Tifinagh, Tirhuta, Ugaritic, Vai, Warang_Citi, Yi. + Adlam, Ahom, Anatolian_Hieroglyphs, Arabic, Armenian, Avestan, Bali- + nese, Bamum, Bassa_Vah, Batak, Bengali, Bhaiksuki, Bopomofo, Brahmi, + Braille, Buginese, Buhid, Canadian_Aboriginal, Carian, Caucasian_Alba- + nian, Chakma, Cham, Cherokee, Common, Coptic, Cuneiform, Cypriot, + Cyrillic, Deseret, Devanagari, Duployan, Egyptian_Hieroglyphs, Elbasan, + Ethiopic, Georgian, Glagolitic, Gothic, Grantha, Greek, Gujarati, Gur- + mukhi, Han, Hangul, Hanunoo, Hatran, Hebrew, Hiragana, Imperial_Ara- + maic, Inherited, Inscriptional_Pahlavi, Inscriptional_Parthian, + Javanese, Kaithi, Kannada, Katakana, Kayah_Li, Kharoshthi, Khmer, Kho- + jki, Khudawadi, Lao, Latin, Lepcha, Limbu, Linear_A, Linear_B, Lisu, + Lycian, Lydian, Mahajani, Malayalam, Mandaic, Manichaean, Marchen, + Masaram_Gondi, Meetei_Mayek, Mende_Kikakui, Meroitic_Cursive, + Meroitic_Hieroglyphs, Miao, Modi, Mongolian, Mro, Multani, Myanmar, + Nabataean, New_Tai_Lue, Newa, Nko, Nushu, Ogham, Ol_Chiki, Old_Hungar- + ian, Old_Italic, Old_North_Arabian, Old_Permic, Old_Persian, + Old_South_Arabian, Old_Turkic, Oriya, Osage, Osmanya, Pahawh_Hmong, + Palmyrene, Pau_Cin_Hau, Phags_Pa, Phoenician, Psalter_Pahlavi, Rejang, + Runic, Samaritan, Saurashtra, Sharada, Shavian, Siddham, SignWriting, + Sinhala, Sora_Sompeng, Soyombo, Sundanese, Syloti_Nagri, Syriac, Taga- + log, Tagbanwa, Tai_Le, Tai_Tham, Tai_Viet, Takri, Tamil, Tangut, Tel- + ugu, Thaana, Thai, Tibetan, Tifinagh, Tirhuta, Ugaritic, Vai, + Warang_Citi, Yi, Zanabazar_Square. Each character has exactly one Unicode general category property, spec- ified by a two-letter abbreviation. For compatibility with Perl, nega- @@ -8737,7 +8744,7 @@ AUTHOR REVISION - Last updated: 30 May 2017 + Last updated: 02 July 2017 Copyright (c) 1997-2017 University of Cambridge. ------------------------------------------------------------------------------ diff --git a/doc/pcre2pattern.3 b/doc/pcre2pattern.3 index bafe1cd..42ab96b 100644 --- a/doc/pcre2pattern.3 +++ b/doc/pcre2pattern.3 @@ -1,4 +1,4 @@ -.TH PCRE2PATTERN 3 "30 May 2017" "PCRE2 10.30" +.TH PCRE2PATTERN 3 "02 July 2017" "PCRE2 10.30" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH "PCRE2 REGULAR EXPRESSION DETAILS" @@ -754,6 +754,7 @@ example: Those that are not part of an identified script are lumped together as "Common". The current list of scripts is: .P +Adlam, Ahom, Anatolian_Hieroglyphs, Arabic, @@ -764,6 +765,7 @@ Bamum, Bassa_Vah, Batak, Bengali, +Bhaiksuki, Bopomofo, Brahmi, Braille, @@ -825,6 +827,8 @@ Mahajani, Malayalam, Mandaic, Manichaean, +Marchen, +Masaram_Gondi, Meetei_Mayek, Mende_Kikakui, Meroitic_Cursive, @@ -837,7 +841,9 @@ Multani, Myanmar, Nabataean, New_Tai_Lue, +Newa, Nko, +Nushu, Ogham, Ol_Chiki, Old_Hungarian, @@ -848,6 +854,7 @@ Old_Persian, Old_South_Arabian, Old_Turkic, Oriya, +Osage, Osmanya, Pahawh_Hmong, Palmyrene, @@ -865,6 +872,7 @@ Siddham, SignWriting, Sinhala, Sora_Sompeng, +Soyombo, Sundanese, Syloti_Nagri, Syriac, @@ -875,6 +883,7 @@ Tai_Tham, Tai_Viet, Takri, Tamil, +Tangut, Telugu, Thaana, Thai, @@ -884,7 +893,8 @@ Tirhuta, Ugaritic, Vai, Warang_Citi, -Yi. +Yi, +Zanabazar_Square. .P Each character has exactly one Unicode general category property, specified by a two-letter abbreviation. For compatibility with Perl, negation can be @@ -3475,6 +3485,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 30 May 2017 +Last updated: 02 July 2017 Copyright (c) 1997-2017 University of Cambridge. .fi diff --git a/doc/pcre2test.1 b/doc/pcre2test.1 index 39b3a16..c13188c 100644 --- a/doc/pcre2test.1 +++ b/doc/pcre2test.1 @@ -1,4 +1,4 @@ -.TH PCRE2TEST 1 "16 June 2017" "PCRE 10.30" +.TH PCRE2TEST 1 "02 July 2017" "PCRE 10.30" .SH NAME pcre2test - a program for testing Perl-compatible regular expressions. .SH SYNOPSIS @@ -527,7 +527,7 @@ by a previous \fB#pattern\fP command. .rs .sp The following modifiers set options for \fBpcre2_compile()\fP. Most of them set -bits in the options argument of that function, but those whose names start with +bits in the options argument of that function, but those whose names start with PCRE2_EXTRA are additional options that are set in the compile context. For the main options, there are some single-letter abbreviations that are the same as Perl options. There is special handling for /x: if a second x is present, @@ -540,25 +540,25 @@ way \fBpcre2_compile()\fP behaves. See for a description of the effects of these options. .sp allow_empty_class set PCRE2_ALLOW_EMPTY_CLASS - allow_surrogate_escapes set PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES + allow_surrogate_escapes set PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES alt_bsux set PCRE2_ALT_BSUX alt_circumflex set PCRE2_ALT_CIRCUMFLEX alt_verbnames set PCRE2_ALT_VERBNAMES anchored set PCRE2_ANCHORED auto_callout set PCRE2_AUTO_CALLOUT - bad_escape_is_literal set PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL + bad_escape_is_literal set PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL /i caseless set PCRE2_CASELESS dollar_endonly set PCRE2_DOLLAR_ENDONLY /s dotall set PCRE2_DOTALL dupnames set PCRE2_DUPNAMES endanchored set PCRE2_ENDANCHORED /x extended set PCRE2_EXTENDED - /xx extended_more set PCRE2_EXTENDED_MORE + /xx extended_more set PCRE2_EXTENDED_MORE firstline set PCRE2_FIRSTLINE - literal set PCRE2_LITERAL - match_line set PCRE2_EXTRA_MATCH_LINE + literal set PCRE2_LITERAL + match_line set PCRE2_EXTRA_MATCH_LINE match_unset_backref set PCRE2_MATCH_UNSET_BACKREF - match_word set PCRE2_EXTRA_MATCH_WORD + match_word set PCRE2_EXTRA_MATCH_WORD /m multiline set PCRE2_MULTILINE never_backslash_c set PCRE2_NEVER_BACKSLASH_C never_ucp set PCRE2_NEVER_UCP @@ -593,7 +593,7 @@ heavily used in the test files. /B bincode show binary code without lengths callout_info show callout information debug same as info,fullbincode - framesize show matching frame size + framesize show matching frame size fullbincode show binary code with lengths /I info show info about compiled pattern hex unquoted characters are hexadecimal @@ -611,7 +611,7 @@ heavily used in the test files. push push compiled pattern onto the stack pushcopy push a copy onto the stack stackguard=test the stackguard feature - subject_literal treat all subject lines as literal + subject_literal treat all subject lines as literal tables=[0|1|2] select internal tables use_length do not zero-terminate the pattern utf8_input treat input as UTF-8 @@ -677,7 +677,7 @@ unit" is the last literal code unit that must be present in any match. This is not necessarily the last character. These lines are omitted if no starting or ending code units are recorded. .P -The \fBframesize\fP modifier shows the size, in bytes, of the storage frames +The \fBframesize\fP modifier shows the size, in bytes, of the storage frames used by \fBpcre2_match()\fP for handling backtracking. The size depends on the number of capturing parentheses in the pattern. .P @@ -934,8 +934,8 @@ The \fBaftertext\fP and \fBallaftertext\fP subject modifiers work as described below. All other modifiers are either ignored, with a warning message, or cause an error. .P -The pattern is passed to \fBregcomp()\fP as a zero-terminated string by -default, but if the \fBuse_length\fP or \fBhex\fP modifiers are set, the +The pattern is passed to \fBregcomp()\fP as a zero-terminated string by +default, but if the \fBuse_length\fP or \fBhex\fP modifiers are set, the REG_PEND extension is used to pass it by length. . . @@ -977,7 +977,7 @@ are mutually exclusive. .SS "Setting certain match controls" .rs .sp -The following modifiers are really subject modifiers, and are described under +The following modifiers are really subject modifiers, and are described under "Subject Modifiers" below. However, they may be included in a pattern's modifier list, in which case they are applied to every subject line that is processed with that pattern. They may not appear in \fB#pattern\fP commands. @@ -1004,9 +1004,9 @@ defaults, set them in a \fB#subject\fP command. .SS "Specifying literal subject lines" .rs .sp -If the \fBsubject_literal\fP modifier is present on a pattern, all the subject -lines that it matches are taken as literal strings, with no interpretation of -backslashes. It is not possible to set subject modifiers on such lines, but any +If the \fBsubject_literal\fP modifier is present on a pattern, all the subject +lines that it matches are taken as literal strings, with no interpretation of +backslashes. It is not possible to set subject modifiers on such lines, but any that are set as defaults by a \fB#subject\fP command are recognized. . . @@ -1020,7 +1020,9 @@ facility is used when saving compiled patterns to a file, as described in the section entitled "Saving and restoring compiled patterns" .\" HTML .\" -below. If \fBpushcopy\fP is used instead of \fBpush\fP, a copy of the compiled +below. +.\" +If \fBpushcopy\fP is used instead of \fBpush\fP, a copy of the compiled pattern is stacked, leaving the original as current, ready to match the following input lines. This provides a way of testing the \fBpcre2_code_copy()\fP function. @@ -1073,10 +1075,10 @@ that have any effect are \fBnotbol\fP, \fBnotempty\fP, and \fBnoteol\fP, causing REG_NOTBOL, REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to \fBregexec()\fP. The other modifiers are ignored, with a warning message. .P -There is one additional modifier that can be used with the POSIX wrapper. It is +There is one additional modifier that can be used with the POSIX wrapper. It is ignored (with a warning) if used for non-POSIX matching. .sp - posix_startend= [: ] + posix_startend= [: ] .sp This causes the subject string to be passed to \fBregexec()\fP using the REG_STARTEND option, which uses offsets to specify which part of the string is @@ -1085,8 +1087,8 @@ the subject string. For more detail of REG_STARTEND, see the .\" HREF \fBpcre2posix\fP .\" -documentation. If the subject string contains binary zeros (coded as escapes -such as \ex{00} because \fBpcre2test\fP does not support actual binary zeros in +documentation. If the subject string contains binary zeros (coded as escapes +such as \ex{00} because \fBpcre2test\fP does not support actual binary zeros in its input), you must use \fBposix_startend\fP to specify its length. . . @@ -1107,6 +1109,7 @@ pattern. callout_data= set a value to pass via callouts callout_error= [: ] control callout error callout_fail= [: ] control callout failure + callout_no_where do not show position of a callout callout_none do not supply a callout function copy= copy captured substring depth_limit= set a depth limit @@ -1200,26 +1203,13 @@ does no capturing); it is ignored, with a warning message, if present. .rs .sp A callout function is supplied when \fBpcre2test\fP calls the library matching -functions, unless \fBcallout_none\fP is specified. If \fBcallout_capture\fP is -set, the current captured groups are output when a callout occurs. The default -return from the callout function is zero, which allows matching to continue. -.P -The \fBcallout_fail\fP modifier can be given one or two numbers. If there is -only one number, 1 is returned instead of 0 (causing matching to backtrack) -when a callout of that number is reached. If two numbers ( : ) are given, 1 -is returned when callout is reached and there have been at least -callouts. The \fBcallout_error\fP modifier is similar, except that -PCRE2_ERROR_CALLOUT is returned, causing the entire matching process to be -aborted. If both these modifiers are set for the same callout number, -\fBcallout_error\fP takes precedence. -.P -Note that callouts with string arguments are always given the number zero. See -"Callouts" below for a description of the output when a callout it taken. -.P -The \fBcallout_data\fP modifier can be given an unsigned or a negative number. -This is set as the "user data" that is passed to the matching function, and -passed back when the callout function is invoked. Any value other than zero is -used as a return from \fBpcre2test\fP's callout function. +functions, unless \fBcallout_none\fP is specified. Its behaviour can be +controlled by various modifiers listed above whose names begin with +\fBcallout_\fP. Details are given in the section entitled "Callouts" +.\" HTML +.\" +below. +.\" . . .SS "Finding all matches in a string" @@ -1344,7 +1334,7 @@ that is used by the just-in-time optimization code. It is ignored if JIT optimization is not being used. The value is a number of kilobytes. Setting zero reverts to the default of 32K. Providing a stack that is larger than the default is necessary only for very complicated patterns. If \fBjitstack\fP is -set non-zero on a subject line it overrides any value that was set on the +set non-zero on a subject line it overrides any value that was set on the pattern. . . @@ -1372,7 +1362,7 @@ The \fImatch_limit\fP number is a measure of the amount of backtracking that takes place, and learning the minimum value can be instructive. For most simple matches, the number is quite small, but for patterns with very large numbers of matching possibilities, it can become large very quickly with -increasing length of subject string. +increasing length of subject string. .P For non-DFA matching, the minimum \fIdepth_limit\fP number is a measure of how much nested backtracking happens (that is, how deeply the pattern's tree is @@ -1625,6 +1615,7 @@ For further information about partial matching, see the documentation. . . +.\" HTML .SH CALLOUTS .rs .sp @@ -1633,8 +1624,30 @@ function is called during matching unless \fBcallout_none\fP is specified. This works with both matching functions. .P The callout function in \fBpcre2test\fP returns zero (carry on matching) by -default, but you can use a \fBcallout_fail\fP modifier in a subject line (as -described above) to change this and other parameters of the callout. +default, but you can use a \fBcallout_fail\fP modifier in a subject line to +change this and other parameters of the callout. +.P +If \fBcallout_capture\fP is set, the current captured groups are output when a +callout occurs. By default, the callout function then generates output that +indicates where the current match start and matching points are in the subject, +and what the next pattern item is. This output is suppressed if the +\fBcallout_no_where\fP modifier is set. +.P +The default return from the callout function is zero, which allows matching to +continue. The \fBcallout_fail\fP modifier can be given one or two numbers. If +there is only one number, 1 is returned instead of 0 (causing matching to +backtrack) when a callout of that number is reached. If two numbers ( : ) +are given, 1 is returned when callout is reached and there have been at +least callouts. The \fBcallout_error\fP modifier is similar, except that +PCRE2_ERROR_CALLOUT is returned, causing the entire matching process to be +aborted. If both these modifiers are set for the same callout number, +\fBcallout_error\fP takes precedence. Note that callouts with string arguments +are always given the number zero. See +.P +The \fBcallout_data\fP modifier can be given an unsigned or a negative number. +This is set as the "user data" that is passed to the matching function, and +passed back when the callout function is invoked. Any value other than zero is +used as a return from \fBpcre2test\fP's callout function. .P Inserting callouts can be helpful when using \fBpcre2test\fP to check complicated regular expressions. For further information about callouts, see @@ -1837,6 +1850,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 16 June 2017 +Last updated: 02 July 2017 Copyright (c) 1997-2017 University of Cambridge. .fi diff --git a/doc/pcre2test.txt b/doc/pcre2test.txt index 32ac6d6..c1a8302 100644 --- a/doc/pcre2test.txt +++ b/doc/pcre2test.txt @@ -943,7 +943,7 @@ PATTERN MODIFIERS next line to contain a new pattern (or a command) instead of a subject line. This facility is used when saving compiled patterns to a file, as described in the section entitled "Saving and restoring compiled pat- - terns" below. If pushcopy is used instead of push, a copy of the com- + terns" below. If pushcopy is used instead of push, a copy of the com- piled pattern is stacked, leaving the original as current, ready to match the following input lines. This provides a way of testing the pcre2_code_copy() function. The push and pushcopy modifiers are @@ -1016,6 +1016,7 @@ SUBJECT MODIFIERS callout_data= set a value to pass via callouts callout_error= [: ] control callout error callout_fail= [: ] control callout failure + callout_no_where do not show position of a callout callout_none do not supply a callout function copy= copy captured substring depth_limit= set a depth limit @@ -1107,29 +1108,9 @@ SUBJECT MODIFIERS Testing callouts A callout function is supplied when pcre2test calls the library match- - ing functions, unless callout_none is specified. If callout_capture is - set, the current captured groups are output when a callout occurs. The - default return from the callout function is zero, which allows matching - to continue. - - The callout_fail modifier can be given one or two numbers. If there is - only one number, 1 is returned instead of 0 (causing matching to back- - track) when a callout of that number is reached. If two numbers - ( : ) are given, 1 is returned when callout is reached and - there have been at least callouts. The callout_error modifier is - similar, except that PCRE2_ERROR_CALLOUT is returned, causing the - entire matching process to be aborted. If both these modifiers are set - for the same callout number, callout_error takes precedence. - - Note that callouts with string arguments are always given the number - zero. See "Callouts" below for a description of the output when a call- - out it taken. - - The callout_data modifier can be given an unsigned or a negative num- - ber. This is set as the "user data" that is passed to the matching - function, and passed back when the callout function is invoked. Any - value other than zero is used as a return from pcre2test's callout - function. + ing functions, unless callout_none is specified. Its behaviour can be + controlled by various modifiers listed above whose names begin with + callout_. Details are given in the section entitled "Callouts" below. Finding all matches in a string @@ -1511,8 +1492,32 @@ CALLOUTS works with both matching functions. The callout function in pcre2test returns zero (carry on matching) by - default, but you can use a callout_fail modifier in a subject line (as - described above) to change this and other parameters of the callout. + default, but you can use a callout_fail modifier in a subject line to + change this and other parameters of the callout. + + If callout_capture is set, the current captured groups are output when + a callout occurs. By default, the callout function then generates out- + put that indicates where the current match start and matching points + are in the subject, and what the next pattern item is. This output is + suppressed if the callout_no_where modifier is set. + + The default return from the callout function is zero, which allows + matching to continue. The callout_fail modifier can be given one or two + numbers. If there is only one number, 1 is returned instead of 0 (caus- + ing matching to backtrack) when a callout of that number is reached. If + two numbers ( : ) are given, 1 is returned when callout is + reached and there have been at least callouts. The callout_error + modifier is similar, except that PCRE2_ERROR_CALLOUT is returned, caus- + ing the entire matching process to be aborted. If both these modifiers + are set for the same callout number, callout_error takes precedence. + Note that callouts with string arguments are always given the number + zero. See + + The callout_data modifier can be given an unsigned or a negative num- + ber. This is set as the "user data" that is passed to the matching + function, and passed back when the callout function is invoked. Any + value other than zero is used as a return from pcre2test's callout + function. Inserting callouts can be helpful when using pcre2test to check compli- cated regular expressions. For further information about callouts, see @@ -1687,5 +1692,5 @@ AUTHOR REVISION - Last updated: 16 June 2017 + Last updated: 02 July 2017 Copyright (c) 1997-2017 University of Cambridge. diff --git a/maint/GenerateUtt.py b/maint/GenerateUtt.py index 94cb525..a152566 100755 --- a/maint/GenerateUtt.py +++ b/maint/GenerateUtt.py @@ -23,6 +23,7 @@ # Script updated to Python 3 by running it through the 2to3 converter. # Added script names for Unicode 7.0.0, 20-June-2014. # Added script names for Unicode 8.0.0, 19-June-2015. +# Added script names for Unicode 10.0.0, 02-July-2017. script_names = ['Arabic', 'Armenian', 'Bengali', 'Bopomofo', 'Braille', 'Buginese', 'Buhid', 'Canadian_Aboriginal', \ 'Cherokee', 'Common', 'Coptic', 'Cypriot', 'Cyrillic', 'Deseret', 'Devanagari', 'Ethiopic', 'Georgian', \ @@ -51,7 +52,10 @@ script_names = ['Arabic', 'Armenian', 'Bengali', 'Bopomofo', 'Braille', 'Bugines 'Pau_Cin_Hau', 'Siddham', 'Tirhuta', 'Warang_Citi', # New for Unicode 8.0.0 'Ahom', 'Anatolian_Hieroglyphs', 'Hatran', 'Multani', 'Old_Hungarian', - 'SignWriting' + 'SignWriting', +# New for Unicode 10.0.0 + 'Adlam', 'Bhaiksuki', 'Marchen', 'Newa', 'Osage', 'Tangut', 'Masaram_Gondi', + 'Nushu', 'Soyombo', 'Zanabazar_Square' ] category_names = ['Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu', diff --git a/maint/MultiStage2.py b/maint/MultiStage2.py index 0649332..f124538 100755 --- a/maint/MultiStage2.py +++ b/maint/MultiStage2.py @@ -122,6 +122,7 @@ # 20-June-2014: Updated for Unicode 7.0.0 # 12-August-2014: Updated to put Unicode version into the file # 19-June-2015: Updated for Unicode 8.0.0 +# 02-July-2017: Updated for Unicode 10.0.0 ############################################################################## @@ -335,7 +336,10 @@ script_names = ['Arabic', 'Armenian', 'Bengali', 'Bopomofo', 'Braille', 'Bugines 'Pau_Cin_Hau', 'Siddham', 'Tirhuta', 'Warang_Citi', # New for Unicode 8.0.0 'Ahom', 'Anatolian_Hieroglyphs', 'Hatran', 'Multani', 'Old_Hungarian', - 'SignWriting' + 'SignWriting', +# New for Unicode 10.0.0 + 'Adlam', 'Bhaiksuki', 'Marchen', 'Newa', 'Osage', 'Tangut', 'Masaram_Gondi', + 'Nushu', 'Soyombo', 'Zanabazar_Square' ] category_names = ['Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu', @@ -343,7 +347,8 @@ category_names = ['Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu', 'Sc', 'Sk', 'Sm', 'So', 'Zl', 'Zp', 'Zs' ] break_property_names = ['CR', 'LF', 'Control', 'Extend', 'Prepend', - 'SpacingMark', 'L', 'V', 'T', 'LV', 'LVT', 'Regional_Indicator', 'Other' ] + 'SpacingMark', 'L', 'V', 'T', 'LV', 'LVT', 'Regional_Indicator', 'Other', + 'E_Base', 'E_Modifier', 'E_Base_GAZ', 'ZWJ', 'Glue_After_Zwj' ] test_record_size() unicode_version = "" diff --git a/maint/Unicode.tables/CaseFolding.txt b/maint/Unicode.tables/CaseFolding.txt index 0197a6c..efdf18e 100644 --- a/maint/Unicode.tables/CaseFolding.txt +++ b/maint/Unicode.tables/CaseFolding.txt @@ -1,10 +1,11 @@ -# CaseFolding-8.0.0.txt -# Date: 2015-01-13, 18:16:36 GMT [MD] +# CaseFolding-10.0.0.txt +# Date: 2017-04-14, 05:40:18 GMT +# © 2017 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see http://www.unicode.org/terms_of_use.html # # Unicode Character Database -# Copyright (c) 1991-2015 Unicode, Inc. -# For terms of use, see http://www.unicode.org/terms_of_use.html -# For documentation, see http://www.unicode.org/reports/tr44/ +# For documentation, see http://www.unicode.org/reports/tr44/ # # Case Folding Properties # @@ -23,7 +24,7 @@ # # NOTE: case folding does not preserve normalization formats! # -# For information on case folding, including how to have case folding +# For information on case folding, including how to have case folding # preserve normalization formats, see Section 3.13 Default Case Algorithms in # The Unicode Standard. # @@ -593,6 +594,15 @@ 13FB; C; 13F3; # CHEROKEE SMALL LETTER YU 13FC; C; 13F4; # CHEROKEE SMALL LETTER YV 13FD; C; 13F5; # CHEROKEE SMALL LETTER MV +1C80; C; 0432; # CYRILLIC SMALL LETTER ROUNDED VE +1C81; C; 0434; # CYRILLIC SMALL LETTER LONG-LEGGED DE +1C82; C; 043E; # CYRILLIC SMALL LETTER NARROW O +1C83; C; 0441; # CYRILLIC SMALL LETTER WIDE ES +1C84; C; 0442; # CYRILLIC SMALL LETTER TALL TE +1C85; C; 0442; # CYRILLIC SMALL LETTER THREE-LEGGED TE +1C86; C; 044A; # CYRILLIC SMALL LETTER TALL HARD SIGN +1C87; C; 0463; # CYRILLIC SMALL LETTER TALL YAT +1C88; C; A64B; # CYRILLIC SMALL LETTER UNBLENDED UK 1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW 1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE 1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW @@ -1163,6 +1173,7 @@ A7AA; C; 0266; # LATIN CAPITAL LETTER H WITH HOOK A7AB; C; 025C; # LATIN CAPITAL LETTER REVERSED OPEN E A7AC; C; 0261; # LATIN CAPITAL LETTER SCRIPT G A7AD; C; 026C; # LATIN CAPITAL LETTER L WITH BELT +A7AE; C; 026A; # LATIN CAPITAL LETTER SMALL CAPITAL I A7B0; C; 029E; # LATIN CAPITAL LETTER TURNED K A7B1; C; 0287; # LATIN CAPITAL LETTER TURNED T A7B2; C; 029D; # LATIN CAPITAL LETTER J WITH CROSSED-TAIL @@ -1327,6 +1338,42 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z 10425; C; 1044D; # DESERET CAPITAL LETTER ENG 10426; C; 1044E; # DESERET CAPITAL LETTER OI 10427; C; 1044F; # DESERET CAPITAL LETTER EW +104B0; C; 104D8; # OSAGE CAPITAL LETTER A +104B1; C; 104D9; # OSAGE CAPITAL LETTER AI +104B2; C; 104DA; # OSAGE CAPITAL LETTER AIN +104B3; C; 104DB; # OSAGE CAPITAL LETTER AH +104B4; C; 104DC; # OSAGE CAPITAL LETTER BRA +104B5; C; 104DD; # OSAGE CAPITAL LETTER CHA +104B6; C; 104DE; # OSAGE CAPITAL LETTER EHCHA +104B7; C; 104DF; # OSAGE CAPITAL LETTER E +104B8; C; 104E0; # OSAGE CAPITAL LETTER EIN +104B9; C; 104E1; # OSAGE CAPITAL LETTER HA +104BA; C; 104E2; # OSAGE CAPITAL LETTER HYA +104BB; C; 104E3; # OSAGE CAPITAL LETTER I +104BC; C; 104E4; # OSAGE CAPITAL LETTER KA +104BD; C; 104E5; # OSAGE CAPITAL LETTER EHKA +104BE; C; 104E6; # OSAGE CAPITAL LETTER KYA +104BF; C; 104E7; # OSAGE CAPITAL LETTER LA +104C0; C; 104E8; # OSAGE CAPITAL LETTER MA +104C1; C; 104E9; # OSAGE CAPITAL LETTER NA +104C2; C; 104EA; # OSAGE CAPITAL LETTER O +104C3; C; 104EB; # OSAGE CAPITAL LETTER OIN +104C4; C; 104EC; # OSAGE CAPITAL LETTER PA +104C5; C; 104ED; # OSAGE CAPITAL LETTER EHPA +104C6; C; 104EE; # OSAGE CAPITAL LETTER SA +104C7; C; 104EF; # OSAGE CAPITAL LETTER SHA +104C8; C; 104F0; # OSAGE CAPITAL LETTER TA +104C9; C; 104F1; # OSAGE CAPITAL LETTER EHTA +104CA; C; 104F2; # OSAGE CAPITAL LETTER TSA +104CB; C; 104F3; # OSAGE CAPITAL LETTER EHTSA +104CC; C; 104F4; # OSAGE CAPITAL LETTER TSHA +104CD; C; 104F5; # OSAGE CAPITAL LETTER DHA +104CE; C; 104F6; # OSAGE CAPITAL LETTER U +104CF; C; 104F7; # OSAGE CAPITAL LETTER WA +104D0; C; 104F8; # OSAGE CAPITAL LETTER KHA +104D1; C; 104F9; # OSAGE CAPITAL LETTER GHA +104D2; C; 104FA; # OSAGE CAPITAL LETTER ZA +104D3; C; 104FB; # OSAGE CAPITAL LETTER ZHA 10C80; C; 10CC0; # OLD HUNGARIAN CAPITAL LETTER A 10C81; C; 10CC1; # OLD HUNGARIAN CAPITAL LETTER AA 10C82; C; 10CC2; # OLD HUNGARIAN CAPITAL LETTER EB @@ -1410,5 +1457,39 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z 118BD; C; 118DD; # WARANG CITI CAPITAL LETTER SSUU 118BE; C; 118DE; # WARANG CITI CAPITAL LETTER SII 118BF; C; 118DF; # WARANG CITI CAPITAL LETTER VIYO +1E900; C; 1E922; # ADLAM CAPITAL LETTER ALIF +1E901; C; 1E923; # ADLAM CAPITAL LETTER DAALI +1E902; C; 1E924; # ADLAM CAPITAL LETTER LAAM +1E903; C; 1E925; # ADLAM CAPITAL LETTER MIIM +1E904; C; 1E926; # ADLAM CAPITAL LETTER BA +1E905; C; 1E927; # ADLAM CAPITAL LETTER SINNYIIYHE +1E906; C; 1E928; # ADLAM CAPITAL LETTER PE +1E907; C; 1E929; # ADLAM CAPITAL LETTER BHE +1E908; C; 1E92A; # ADLAM CAPITAL LETTER RA +1E909; C; 1E92B; # ADLAM CAPITAL LETTER E +1E90A; C; 1E92C; # ADLAM CAPITAL LETTER FA +1E90B; C; 1E92D; # ADLAM CAPITAL LETTER I +1E90C; C; 1E92E; # ADLAM CAPITAL LETTER O +1E90D; C; 1E92F; # ADLAM CAPITAL LETTER DHA +1E90E; C; 1E930; # ADLAM CAPITAL LETTER YHE +1E90F; C; 1E931; # ADLAM CAPITAL LETTER WAW +1E910; C; 1E932; # ADLAM CAPITAL LETTER NUN +1E911; C; 1E933; # ADLAM CAPITAL LETTER KAF +1E912; C; 1E934; # ADLAM CAPITAL LETTER YA +1E913; C; 1E935; # ADLAM CAPITAL LETTER U +1E914; C; 1E936; # ADLAM CAPITAL LETTER JIIM +1E915; C; 1E937; # ADLAM CAPITAL LETTER CHI +1E916; C; 1E938; # ADLAM CAPITAL LETTER HA +1E917; C; 1E939; # ADLAM CAPITAL LETTER QAAF +1E918; C; 1E93A; # ADLAM CAPITAL LETTER GA +1E919; C; 1E93B; # ADLAM CAPITAL LETTER NYA +1E91A; C; 1E93C; # ADLAM CAPITAL LETTER TU +1E91B; C; 1E93D; # ADLAM CAPITAL LETTER NHA +1E91C; C; 1E93E; # ADLAM CAPITAL LETTER VA +1E91D; C; 1E93F; # ADLAM CAPITAL LETTER KHA +1E91E; C; 1E940; # ADLAM CAPITAL LETTER GBE +1E91F; C; 1E941; # ADLAM CAPITAL LETTER ZAL +1E920; C; 1E942; # ADLAM CAPITAL LETTER KPO +1E921; C; 1E943; # ADLAM CAPITAL LETTER SHA # # EOF diff --git a/maint/Unicode.tables/DerivedGeneralCategory.txt b/maint/Unicode.tables/DerivedGeneralCategory.txt index 77c9b38..bc7f5e8 100644 --- a/maint/Unicode.tables/DerivedGeneralCategory.txt +++ b/maint/Unicode.tables/DerivedGeneralCategory.txt @@ -1,10 +1,11 @@ -# DerivedGeneralCategory-8.0.0.txt -# Date: 2015-02-13, 13:47:11 GMT [MD] +# DerivedGeneralCategory-10.0.0.txt +# Date: 2017-03-08, 08:41:49 GMT +# © 2017 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see http://www.unicode.org/terms_of_use.html # # Unicode Character Database -# Copyright (c) 1991-2015 Unicode, Inc. -# For terms of use, see http://www.unicode.org/terms_of_use.html -# For documentation, see http://www.unicode.org/reports/tr44/ +# For documentation, see http://www.unicode.org/reports/tr44/ # ================================================ @@ -36,8 +37,10 @@ 082E..082F ; Cn # [2] .. 083F ; Cn # 085C..085D ; Cn # [2] .. -085F..089F ; Cn # [65] .. -08B5..08E2 ; Cn # [46] .. +085F ; Cn # +086B..089F ; Cn # [53] .. +08B5 ; Cn # +08BE..08D3 ; Cn # [22] .. 0984 ; Cn # 098D..098E ; Cn # [2] .. 0991..0992 ; Cn # [2] .. @@ -51,7 +54,7 @@ 09D8..09DB ; Cn # [4] .. 09DE ; Cn # 09E4..09E5 ; Cn # [2] .. -09FC..0A00 ; Cn # [5] .. +09FE..0A00 ; Cn # [3] .. 0A04 ; Cn # 0A0B..0A0E ; Cn # [4] .. 0A11..0A12 ; Cn # [2] .. @@ -81,7 +84,7 @@ 0AD1..0ADF ; Cn # [15] .. 0AE4..0AE5 ; Cn # [2] .. 0AF2..0AF8 ; Cn # [7] .. -0AFA..0B00 ; Cn # [7] .. +0B00 ; Cn # 0B04 ; Cn # 0B0D..0B0E ; Cn # [2] .. 0B11..0B12 ; Cn # [2] .. @@ -124,7 +127,6 @@ 0C5B..0C5F ; Cn # [5] .. 0C64..0C65 ; Cn # [2] .. 0C70..0C77 ; Cn # [8] .. -0C80 ; Cn # 0C84 ; Cn # 0C8D ; Cn # 0C91 ; Cn # @@ -138,17 +140,14 @@ 0CDF ; Cn # 0CE4..0CE5 ; Cn # [2] .. 0CF0 ; Cn # -0CF3..0D00 ; Cn # [14] .. +0CF3..0CFF ; Cn # [13] .. 0D04 ; Cn # 0D0D ; Cn # 0D11 ; Cn # -0D3B..0D3C ; Cn # [2] .. 0D45 ; Cn # 0D49 ; Cn # -0D4F..0D56 ; Cn # [8] .. -0D58..0D5E ; Cn # [7] .. +0D50..0D53 ; Cn # [4] .. 0D64..0D65 ; Cn # [2] .. -0D76..0D78 ; Cn # [3] .. 0D80..0D81 ; Cn # [2] .. 0D84 ; Cn # 0D97..0D99 ; Cn # [3] .. @@ -249,11 +248,10 @@ 1BF4..1BFB ; Cn # [8] .. 1C38..1C3A ; Cn # [3] .. 1C4A..1C4C ; Cn # [3] .. -1C80..1CBF ; Cn # [64] .. +1C89..1CBF ; Cn # [55] .. 1CC8..1CCF ; Cn # [8] .. -1CF7 ; Cn # 1CFA..1CFF ; Cn # [6] .. -1DF6..1DFB ; Cn # [6] .. +1DFA ; Cn # 1F16..1F17 ; Cn # [2] .. 1F1E..1F1F ; Cn # [2] .. 1F46..1F47 ; Cn # [2] .. @@ -274,17 +272,16 @@ 2072..2073 ; Cn # [2] .. 208F ; Cn # 209D..209F ; Cn # [3] .. -20BF..20CF ; Cn # [17] .. +20C0..20CF ; Cn # [16] .. 20F1..20FF ; Cn # [15] .. 218C..218F ; Cn # [4] .. -23FB..23FF ; Cn # [5] .. 2427..243F ; Cn # [25] .. 244B..245F ; Cn # [21] .. 2B74..2B75 ; Cn # [2] .. 2B96..2B97 ; Cn # [2] .. 2BBA..2BBC ; Cn # [3] .. 2BC9 ; Cn # -2BD2..2BEB ; Cn # [26] .. +2BD3..2BEB ; Cn # [25] .. 2BF0..2BFF ; Cn # [16] .. 2C2F ; Cn # 2C5F ; Cn # @@ -303,7 +300,7 @@ 2DCF ; Cn # 2DD7 ; Cn # 2DDF ; Cn # -2E43..2E7F ; Cn # [61] .. +2E4A..2E7F ; Cn # [54] .. 2E9A ; Cn # 2EF4..2EFF ; Cn # [12] .. 2FD6..2FEF ; Cn # [26] .. @@ -311,24 +308,24 @@ 3040 ; Cn # 3097..3098 ; Cn # [2] .. 3100..3104 ; Cn # [5] .. -312E..3130 ; Cn # [3] .. +312F..3130 ; Cn # [2] .. 318F ; Cn # 31BB..31BF ; Cn # [5] .. 31E4..31EF ; Cn # [12] .. 321F ; Cn # 32FF ; Cn # 4DB6..4DBF ; Cn # [10] .. -9FD6..9FFF ; Cn # [42] .. +9FEB..9FFF ; Cn # [21] .. A48D..A48F ; Cn # [3] .. A4C7..A4CF ; Cn # [9] .. A62C..A63F ; Cn # [20] .. A6F8..A6FF ; Cn # [8] .. -A7AE..A7AF ; Cn # [2] .. +A7AF ; Cn # A7B8..A7F6 ; Cn # [63] .. A82C..A82F ; Cn # [4] .. A83A..A83F ; Cn # [6] .. A878..A87F ; Cn # [8] .. -A8C5..A8CD ; Cn # [9] .. +A8C6..A8CD ; Cn # [8] .. A8DA..A8DF ; Cn # [6] .. A8FE..A8FF ; Cn # [2] .. A954..A95E ; Cn # [11] .. @@ -390,21 +387,23 @@ FFFE..FFFF ; Cn # [2] .. 100FB..100FF ; Cn # [5] .. 10103..10106 ; Cn # [4] .. 10134..10136 ; Cn # [3] .. -1018D..1018F ; Cn # [3] ..