From 7375089fa5702da050e45899439792a822c576f0 Mon Sep 17 00:00:00 2001
From: "Philip.Hazel"
PCRE2_ANCHORED Match only at the first position
PCRE2_COPY_MATCHED_SUBJECT
- On success, make a private subject copy
+ On success, make a private subject copy
PCRE2_ENDANCHORED Pattern can match only at end of subject
PCRE2_NOTBOL Subject is not the beginning of a line
PCRE2_NOTEOL Subject is not the end of a line
diff --git a/doc/html/pcre2_match.html b/doc/html/pcre2_match.html
index 82c9491..90f7fcc 100644
--- a/doc/html/pcre2_match.html
+++ b/doc/html/pcre2_match.html
@@ -61,7 +61,7 @@ terminated by a binary zero code unit. The options are:
PCRE2_ANCHORED Match only at the first position
PCRE2_COPY_MATCHED_SUBJECT
- On success, make a private subject copy
+ On success, make a private subject copy
PCRE2_ENDANCHORED Pattern can match only at end of subject
PCRE2_NOTBOL Subject string is not the beginning of a line
PCRE2_NOTEOL Subject string is not the end of a line
diff --git a/doc/html/pcre2_match_data_free.html b/doc/html/pcre2_match_data_free.html
index 746c3c1..6ba6162 100644
--- a/doc/html/pcre2_match_data_free.html
+++ b/doc/html/pcre2_match_data_free.html
@@ -31,7 +31,7 @@ using the memory freeing function from the general context or compiled pattern
with which it was created, or free() if that was not set.
-If the PCRE2_COPY_MATCHED_SUBJECT was used for a successful match using this +If the PCRE2_COPY_MATCHED_SUBJECT was used for a successful match using this match data block, the copy of the subject that was remembered with the block is also freed.
diff --git a/doc/html/pcre2_set_compile_extra_options.html b/doc/html/pcre2_set_compile_extra_options.html index 4e342cf..c6c11f7 100644 --- a/doc/html/pcre2_set_compile_extra_options.html +++ b/doc/html/pcre2_set_compile_extra_options.html @@ -31,7 +31,7 @@ housed in a compile context. It completely replaces all the bits. The extra options are:PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES Allow \x{df800} to \x{dfff} in UTF-8 and UTF-32 modes - PCRE2_EXTRA_ALT_BSUX Extended alternate \u, \U, and \x handling + PCRE2_EXTRA_ALT_BSUX Extended alternate \u, \U, and \x handling PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL Treat all invalid escapes as a literal following character PCRE2_EXTRA_ESCAPED_CR_IS_LF Interpret \r as \n PCRE2_EXTRA_MATCH_LINE Pattern matches whole lines diff --git a/doc/html/pcre2api.html b/doc/html/pcre2api.html index 682d9ad..20d92c0 100644 --- a/doc/html/pcre2api.html +++ b/doc/html/pcre2api.html @@ -1309,7 +1309,7 @@ be referenced by the substring extraction functions after a successful match. After running a match, you must not free a compiled pattern or a subject string until after all operations on the match data block -have taken place, unless, in the case of the subject string, you have used the +have taken place, unless, in the case of the subject string, you have used the PCRE2_COPY_MATCHED_SUBJECT option, which is described in the section entitled "Option bits for pcre2_match()" below. @@ -1437,8 +1437,8 @@ binary zero character followed by z). ECMAscript 6 added additional functionality to \u. This can be accessed using the PCRE2_EXTRA_ALT_BSUX extra option (see "Extra compile options" below). -Note that this alternative escape handling applies only to patterns. Neither of -these options affects the processing of replacement strings passed to +Note that this alternative escape handling applies only to patterns. Neither of +these options affects the processing of replacement strings passed to pcre2_substitute().PCRE2_ALT_CIRCUMFLEX @@ -1875,10 +1875,10 @@ characters if the matching function is called with PCRE2_NO_UTF_CHECK set.PCRE2_EXTRA_ALT_BSUX-The original option PCRE2_ALT_BSUX causes PCRE2 to process \U, \u, and \x in -the way that ECMAscript (aka JavaScript) does. Additional functionality was -defined by ECMAscript 6; setting PCRE2_EXTRA_ALT_BSUX has the effect of -PCRE2_ALT_BSUX, but in addition it recognizes \u{hhh..} as a hexadecimal +The original option PCRE2_ALT_BSUX causes PCRE2 to process \U, \u, and \x in +the way that ECMAscript (aka JavaScript) does. Additional functionality was +defined by ECMAscript 6; setting PCRE2_EXTRA_ALT_BSUX has the effect of +PCRE2_ALT_BSUX, but in addition it recognizes \u{hhh..} as a hexadecimal character code, where hhh.. is any number of hexadecimal digits.PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL @@ -1896,7 +1896,7 @@ If the PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL extra option is passed to pcre2_compile(), all unrecognized or malformed escape sequences are treated as single-character escapes. For example, \j is a literal "j" and \x{2z} is treated as the literal string "x{2z}". Setting this option means -that typos in patterns may go undetected and have unexpected results. Also note +that typos in patterns may go undetected and have unexpected results. Also note that a sequence such as [\N{] is interpreted as a malformed attempt at [\N{...}] and so is treated as [N{] whereas [\N] gives an error because an unqualified \N is a valid escape sequence but is not supported in a character @@ -1904,9 +1904,9 @@ class. To reiterate: this is a dangerous option. Use with great care.PCRE2_EXTRA_ESCAPED_CR_IS_LF-There are some legacy applications where the escape sequence \r in a pattern -is expected to match a newline. If this option is set, \r in a pattern is -converted to \n so that it matches a LF (linefeed) instead of a CR (carriage +There are some legacy applications where the escape sequence \r in a pattern +is expected to match a newline. If this option is set, \r in a pattern is +converted to \n so that it matches a LF (linefeed) instead of a CR (carriage return) character. The option does not affect a literal CR in the pattern, nor does it affect CR specified as an explicit code point such as \x{0D}.@@ -2564,7 +2564,7 @@ Option bits for pcre2_match()
The unused bits of the options argument for pcre2_match() must be -zero. The only bits that may be set are PCRE2_ANCHORED, +zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_COPY_MATCHED_SUBJECT, PCRE2_ENDANCHORED, PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_JIT, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Their action is described below. @@ -2585,8 +2585,8 @@ matching.
PCRE2_COPY_MATCHED_SUBJECT-By default, a pointer to the subject is remembered in the match data block so -that, after a successful match, it can be referenced by the substring +By default, a pointer to the subject is remembered in the match data block so +that, after a successful match, it can be referenced by the substring extraction functions. This means that the subject's memory must not be freed until all such operations are complete. For some applications where the lifetime of the subject string is not guaranteed, it may be necessary to make a @@ -2866,8 +2866,8 @@ undefined.After a successful match, a partial match (PCRE2_ERROR_PARTIAL), or a failure to match (PCRE2_ERROR_NOMATCH), a mark name may be available. The function -pcre2_get_mark() can be called to access this name, which can be -specified in the pattern by any of the backtracking control verbs, not just +pcre2_get_mark() can be called to access this name, which can be +specified in the pattern by any of the backtracking control verbs, not just (*MARK). The same function applies to all the verbs. It returns a pointer to the zero-terminated name, which is within the compiled pattern. If no name is available, NULL is returned. The length of the name (excluding the terminating @@ -3002,7 +3002,7 @@ The backtracking match limit was reached. If a pattern contains many nested backtracking points, heap memory is used to remember them. This error is given when the memory allocation function (default or custom) fails. Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given -if the amount of memory needed exceeds the heap limit. PCRE2_ERROR_NOMEMORY is +if the amount of memory needed exceeds the heap limit. PCRE2_ERROR_NOMEMORY is also returned if PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
PCRE2_ERROR_NULL @@ -3405,7 +3405,7 @@ capture groups and letters within \Q...\E quoted sequences.Note that case forcing sequences such as \U...\E do not nest. For example, the result of processing "\Uaa\LBB\Ecc\E" is "AAbbcc"; the final \E has no -effect. Note also that the PCRE2_ALT_BSUX and PCRE2_EXTRA_ALT_BSUX options do +effect. Note also that the PCRE2_ALT_BSUX and PCRE2_EXTRA_ALT_BSUX options do not apply to not apply to replacement strings.
@@ -3439,7 +3439,7 @@ substitutions. However, PCRE2_SUBSTITUTE_UNKNOWN_UNSET does cause unknown groups in the extended syntax forms to be treated as unset.
-If successful, pcre2_substitute() returns the number of successful +If successful, pcre2_substitute() returns the number of successful matches. This may be zero if no matches were found, and is never greater than 1 unless PCRE2_SUBSTITUTE_GLOBAL is set.
@@ -3489,8 +3489,8 @@ Substitution callouts
The pcre2_set_substitution_callout() function can be used to specify a callout function for pcre2_substitute(). This information is passed in -a match context. The callout function is called after each substitution has -been processed, but it can cause the replacement not to happen. The callout +a match context. The callout function is called after each substitution has +been processed, but it can cause the replacement not to happen. The callout function is not called for simulated substitutions that happen as a result of the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option. @@ -3500,10 +3500,10 @@ block structure, which contains the following fields, not necessarily in this order:uint32_t version; - uint32_t subscount; + uint32_t subscount; PCRE2_SPTR input; - PCRE2_SPTR output; - PCRE2_SIZE *ovector; + PCRE2_SPTR output; + PCRE2_SIZE *ovector; uint32_t oveccount; PCRE2_SIZE output_offsets[2];@@ -3517,9 +3517,9 @@ first callout, 2 for the second, and so on. The input and output pointers are copies of the values passed to pcre2_substitute().-The ovector field points to the ovector, which contains the result of the -most recent match. The oveccount field contains the number of pairs that -are set in the ovector, and is always greater than zero. +The ovector field points to the ovector, which contains the result of the +most recent match. The oveccount field contains the number of pairs that +are set in the ovector, and is always greater than zero.
The output_offsets vector contains the offsets of the replacement in the diff --git a/doc/html/pcre2build.html b/doc/html/pcre2build.html index a18e269..13d9da2 100644 --- a/doc/html/pcre2build.html +++ b/doc/html/pcre2build.html @@ -376,12 +376,15 @@ environment.
PCRE2GREP SUPPORT FOR EXTERNAL SCRIPTS
-By default, on non-Windows systems, pcre2grep supports the use of -callouts with string arguments within the patterns it is matching, in order to -run external scripts. For details, see the +By default pcre2grep supports the use of callouts with string arguments +within the patterns it is matching. There are two kinds: one that generates +output using local code, and another that calls an external program or script. +If --disable-pcre2grep-callout-fork is added to the configure command, +only the first kind of callout is supported; if --disable-pcre2grep-callout is +used, all callouts are completely ignored. For more details of pcre2grep +callouts, see the pcre2grep -documentation. This support can be disabled by adding ---disable-pcre2grep-callout to the configure command. +documentation.
PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT
@@ -526,14 +529,14 @@ documentation.
DISABLING THE Z AND T FORMATTING MODIFIERS
-The C99 standard defines formatting modifiers z and t for size_t and -ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in -environments other than Microsoft Visual Studio when __STDC_VERSION__ is +The C99 standard defines formatting modifiers z and t for size_t and +ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in +environments other than Microsoft Visual Studio when __STDC_VERSION__ is defined and has a value greater than or equal to 199901L (indicating C99). However, there is at least one environment that claims to be C99 but does not -support these modifiers. If +support these modifiers. If
- --disable-percent-zt + --disable-percent-ztis specified, no use is made of the z or t modifiers. Instead or %td or %zu, %lu is used, with a cast for size_t values. @@ -589,9 +592,9 @@ Cambridge, England.
REVISION
-Last updated: 15 November 2018 +Last updated: 03 March 2019
-Copyright © 1997-2018 University of Cambridge. +Copyright © 1997-2019 University of Cambridge.
Return to the PCRE2 index page. diff --git a/doc/html/pcre2callout.html b/doc/html/pcre2callout.html index 899a476..65db933 100644 --- a/doc/html/pcre2callout.html +++ b/doc/html/pcre2callout.html @@ -48,7 +48,7 @@ When using the pcre2_substitute() function, an additional callout feature is available. This does a callout after each change to the subject string and is described in the pcre2api -documentation; the rest of this document is concerned with callouts during +documentation; the rest of this document is concerned with callouts during pattern matching.
diff --git a/doc/html/pcre2grep.html b/doc/html/pcre2grep.html index 634d517..d66cee3 100644 --- a/doc/html/pcre2grep.html +++ b/doc/html/pcre2grep.html @@ -871,8 +871,8 @@ only callouts with string arguments are useful. Calling external programs or scripts
-This facility can be independently disabled when pcre2grep is built. It -is supported for Windows, where a call to _spawnvp() is used, for VMS, +This facility can be independently disabled when pcre2grep is built. It +is supported for Windows, where a call to _spawnvp() is used, for VMS, where lib$spawn() is used, and for any other Unix-like environment where fork() and execv() are available.
diff --git a/doc/html/pcre2pattern.html b/doc/html/pcre2pattern.html index d69e6cb..e6958c1 100644 --- a/doc/html/pcre2pattern.html +++ b/doc/html/pcre2pattern.html @@ -418,13 +418,13 @@ two compile-time options. If PCRE2_ALT_BSUX is set, the sequence \x followed by { is not recognized. Only if \x is followed by two hexadecimal digits is it recognized as a character escape. Otherwise it is interpreted as a literal "x" character. In this mode, support for code points greater than 256 is provided -by \u, which must be followed by four hexadecimal digits; otherwise it is +by \u, which must be followed by four hexadecimal digits; otherwise it is interpreted as a literal "u" character.PCRE2_EXTRA_ALT_BSUX has the same effect as PCRE2_ALT_BSUX and, in addition, \u{hhh..} is recognized as the character specified by hexadecimal code point. -There may be any number of hexadecimal digits. This syntax is from ECMAScript +There may be any number of hexadecimal digits. This syntax is from ECMAScript 6.
@@ -1194,7 +1194,7 @@ character. If any other of these assertions appears in a character class, an A word boundary is a position in the subject string where the current character and the previous character do not both match \w or \W (i.e. one matches \w and the other matches \W), or the start or end of the string if the -first or last character matches \w, respectively. When PCRE2 is built with +first or last character matches \w, respectively. When PCRE2 is built with Unicode support, the meanings of \w and \W can be changed by setting the PCRE2_UCP option. When this is done, it also affects \b and \B. Neither PCRE2 nor Perl has a separate "start of word" or "end of word" metasequence. However, diff --git a/doc/html/pcre2posix.html b/doc/html/pcre2posix.html index b03948e..20a2009 100644 --- a/doc/html/pcre2posix.html +++ b/doc/html/pcre2posix.html @@ -50,13 +50,13 @@ expression 8-bit library. There are no POSIX-style wrappers for PCRE2's 16-bit and 32-bit libraries. See the pcre2api documentation for a description of PCRE2's native API, which contains much -additional functionality. +additional functionality.
The functions described here are wrapper functions that ultimately call the PCRE2 native API. Their prototypes are defined in the pcre2posix.h header file, and they all have unique names starting with pcre2_. However, the -pcre2posix.h header also contains macro definitions that convert the +pcre2posix.h header also contains macro definitions that convert the standard POSIX names such regcomp() into pcre2_regcomp() etc. This means that a program can use the usual POSIX names without running the risk of accidentally linking with POSIX functions from a different library. @@ -68,7 +68,7 @@ application. Because the POSIX functions call the native ones, it is also necessary to add -lpcre2-8.
-Although they are not defined as protypes in pcre2posix.h, the library +Although they are not defined as protypes in pcre2posix.h, the library does contain functions with the POSIX names regcomp() etc. These simply pass their arguments to the PCRE2 functions. These functions are provided for backwards compatibility with earlier versions of PCRE2, so that existing diff --git a/doc/html/pcre2syntax.html b/doc/html/pcre2syntax.html index 5022e12..e3dc186 100644 --- a/doc/html/pcre2syntax.html +++ b/doc/html/pcre2syntax.html @@ -58,7 +58,7 @@ documentation. This document contains a quick-reference summary of the syntax.
ESCAPED CHARACTERS
-This table applies to ASCII and Unicode environments. An unrecognized escape +This table applies to ASCII and Unicode environments. An unrecognized escape sequence causes an error.
\a alarm, that is, the BEL character (hex 07) @@ -85,7 +85,7 @@ following are also recognized: When \x is not followed by {, from zero to two hexadecimal digits are read, but in ALT_BSUX mode \x must be followed by two hexadecimal digits to be recognized as a hexadecimal escape; otherwise it matches a literal "x". -Likewise, if \u (in ALT_BSUX mode) is not followed by four hexadecimal digits +Likewise, if \u (in ALT_BSUX mode) is not followed by four hexadecimal digits or (in EXTRA_ALT_BSUX mode) a sequence of hex digits in curly brackets, it matches a literal "u". diff --git a/doc/html/pcre2test.html b/doc/html/pcre2test.html index 1eb1553..8f35acc 100644 --- a/doc/html/pcre2test.html +++ b/doc/html/pcre2test.html @@ -606,10 +606,10 @@ for a description of the effects of these options. /s dotall set PCRE2_DOTALL dupnames set PCRE2_DUPNAMES endanchored set PCRE2_ENDANCHORED - escaped_cr_is_lf set PCRE2_EXTRA_ESCAPED_CR_IS_LF + escaped_cr_is_lf set PCRE2_EXTRA_ESCAPED_CR_IS_LF /x extended set PCRE2_EXTENDED /xx extended_more set PCRE2_EXTENDED_MORE - extra_alt_bsux set PCRE2_EXTRA_ALT_BSUX + extra_alt_bsux set PCRE2_EXTRA_ALT_BSUX firstline set PCRE2_FIRSTLINE literal set PCRE2_LITERAL match_line set PCRE2_EXTRA_MATCH_LINE @@ -1043,7 +1043,7 @@ process. aftertext show text after match allaftertext show text after captures allcaptures show all captures - allvector show the entire ovector + allvector show the entire ovector allusedtext show all consulted text altglobal alternative global matching /g global global matching @@ -1051,9 +1051,9 @@ process. mark show mark values replace=<string> specify a replacement string startchar show starting character when relevant - substitute_callout use substitution callouts + substitute_callout use substitution callouts substitute_extended use PCRE2_SUBSTITUTE_EXTENDED - substitute_skip=<n> skip substitution number n + substitute_skip=<n> skip substitution number n substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH substitute_stop=<n> skip substitution number n and greater substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET @@ -1191,7 +1191,7 @@ pattern. aftertext show text after match allaftertext show text after captures allcaptures show all captures - allvector show the entire ovector + allvector show the entire ovector allusedtext show all consulted text (non-JIT only) altglobal alternative global matching callout_capture show captures at callout time @@ -1221,9 +1221,9 @@ pattern. replace=<string> specify a replacement string startchar show startchar when relevant startoffset=<n> same as offset=<n> - substitute_callout use substitution callouts + substitute_callout use substitution callouts substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED - substitute_skip=<n> skip substitution number n + substitute_skip=<n> skip substitution number n substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH substitute_stop=<n> skip substitution number n and greater substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET @@ -1306,9 +1306,9 @@ result, and also for DFA matching, provides a means of checking that there are no unexpected modifications to ovector fields. Before each match attempt, the ovector is filled with a special value, and if this is found in both elements of a capturing pair, "<unchanged>" is output. After a successful match, this -applies to all groups after the maximum capture group for the pattern. In other -cases it applies to the entire ovector. After a partial match, the first two -elements are the only ones that should be set. After a DFA match, the amount of +applies to all groups after the maximum capture group for the pattern. In other +cases it applies to the entire ovector. After a partial match, the first two +elements are the only ones that should be set. After a DFA match, the amount of ovector that is used depends on the number of matches that were found.
@@ -1320,7 +1320,7 @@ functions, unless callout_none is specified. Its behaviour can be controlled by various modifiers listed above whose names begin with callout_. Details are given in the section entitled "Callouts" below. -Testing callouts from pcre2_substitute() is decribed separately in +Testing callouts from pcre2_substitute() is decribed separately in "Testing the substitution function" below. @@ -1449,14 +1449,14 @@ matching provokes an error return ("bad option value") from Testing substitute callouts
-If the substitute_callout modifier is set, a substitution callout +If the substitute_callout modifier is set, a substitution callout function is set up. When it is called (after each substitution), details of the the input and output strings are output. For example:
/abc/g,replace=<$0>,substitute_callout abcdefabcpqr 1(1) Old 0 3 "abc" New 0 5 "<abc>" - 2(1) Old 6 9 "abc" New 8 13 "<abc>" + 2(1) Old 6 9 "abc" New 8 13 "<abc>" 2: <abc>def<abc>pqrThe first number on each callout line is the count of matches. The @@ -1466,11 +1466,11 @@ listed the offsets of the old substring, its contents, and the same for the replacement.-By default, the substitution callout function returns zero, which accepts the -replacement and causes matching to continue if /g was used. Two further -modifiers can be used to test other return values. If substitute_skip is -set to a value greater than zero the callout function returns +1 for the match -of that number, and similarly substitute_stop returns -1. These cause the +By default, the substitution callout function returns zero, which accepts the +replacement and causes matching to continue if /g was used. Two further +modifiers can be used to test other return values. If substitute_skip is +set to a value greater than zero the callout function returns +1 for the match +of that number, and similarly substitute_stop returns -1. These cause the replacement to be rejected, and -1 causes no further matching to take place. If either of them are set, substitute_callout is assumed. For example:
@@ -1483,7 +1483,7 @@ either of them are set, substitute_callout is assumed. For example: 1(1) Old 0 3 "abc" New 0 5 "<abc> STOPPED" 1: abcdefabcpqr-If both are set for the same number, stop takes precedence. Only a single skip +If both are set for the same number, stop takes precedence. Only a single skip or stop is supported, which is sufficient for testing that the feature works.
diff --git a/doc/html/pcre2unicode.html b/doc/html/pcre2unicode.html index 53a2e11..268119c 100644 --- a/doc/html/pcre2unicode.html +++ b/doc/html/pcre2unicode.html @@ -82,7 +82,7 @@ The escape sequence \C can be used to match a single code unit in a UTF mode, but its use can lead to some strange effects because it breaks up multi-unit characters (see the description of \C in the pcre2pattern -documentation). For this reason, there is a build-time option that disables +documentation). For this reason, there is a build-time option that disables support for \C completely. There is also a less draconian compile-time option for locking out the use of \C when a pattern is compiled. @@ -144,14 +144,14 @@ scripts are commonly used together, and because some diacritical and other marks are used with multiple scripts, it is not that simple.-Every Unicode character has a Script property, mostly with a value +Every Unicode character has a Script property, mostly with a value corresponding to the name of a script, such as Latin, Greek, or Cyrillic. There are also three special values:
"Unknown" is used for code points that have not been assigned, and also for the surrogate code points. In the PCRE2 32-bit library, characters whose code -points are greater than the Unicode maximum (U+10FFFF), which are accessible +points are greater than the Unicode maximum (U+10FFFF), which are accessible only in non-UTF mode, are assigned the Unknown script.
@@ -165,20 +165,20 @@ previous character. These are considered to take on the script of the character that they modify.
-Some Inherited characters are used with many scripts, but many of them are only -normally used with a small number of scripts. For example, U+102E0 (Coptic -Epact thousands mark) is used only with Arabic and Coptic. In order to make it -possible to check this, a Unicode property called Script Extension exists. Its -value is a list of scripts that apply to the character. For the majority of +Some Inherited characters are used with many scripts, but many of them are only +normally used with a small number of scripts. For example, U+102E0 (Coptic +Epact thousands mark) is used only with Arabic and Coptic. In order to make it +possible to check this, a Unicode property called Script Extension exists. Its +value is a list of scripts that apply to the character. For the majority of characters, the list contains just one script, the same one as the Script property. However, for characters such as U+102E0 more than one Script is listed. There are also some Common characters that have a single, non-Common script in their Script Extension list.
-The next section describes the basic rules for deciding whether a given string -of characters is a script run. Note, however, that there are some special cases -involving the Chinese Han script, and an additional constraint for decimal +The next section describes the basic rules for deciding whether a given string +of characters is a script run. Note, however, that there are some special cases +involving the Chinese Han script, and an additional constraint for decimal digits. These are covered in subsequent sections.
@@ -201,17 +201,17 @@ all the sets of scripts must not be empty.A simple example is an Internet name such as "google.com". The letters are all in the Latin script, and the dot is Common, so this string is a script run. -However, the Cyrillic letter "o" looks exactly the same as the Latin "o"; a +However, the Cyrillic letter "o" looks exactly the same as the Latin "o"; a string that looks the same, but with Cyrillic "o"s is not a script run.
-More interesting examples involve characters with more than one script in their +More interesting examples involve characters with more than one script in their Script Extension. Consider the following characters:
U+060C Arabic comma U+06D4 Arabic full stop-The first has the Script Extension list Arabic, Hanifi Rohingya, Syriac, and +The first has the Script Extension list Arabic, Hanifi Rohingya, Syriac, and Thaana; the second has just Arabic and Hanifi Rohingya. Both of them could appear in script runs of either Arabic or Hanifi Rohingya. The first could also appear in Syriac or Thaana script runs, but the second could not. @@ -220,8 +220,8 @@ appear in Syriac or Thaana script runs, but the second could not. The Chinese Han script
-The Chinese Han script is commonly used in conjunction with other scripts for -writing certain languages. Japanese uses the Hiragana and Katakana scripts +The Chinese Han script is commonly used in conjunction with other scripts for +writing certain languages. Japanese uses the Hiragana and Katakana scripts together with Han; Korean uses Hangul and Han; Taiwanese Mandarin uses Bopomofo and Han. These three combinations are treated as special cases when checking script runs and are, in effect, "virtual scripts". Thus, a script run may diff --git a/doc/pcre2.txt b/doc/pcre2.txt index 1ccaf90..a2a9e7f 100644 --- a/doc/pcre2.txt +++ b/doc/pcre2.txt @@ -180,8 +180,8 @@ REVISION Last updated: 17 September 2018 Copyright (c) 1997-2018 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2API(3) Library Functions Manual PCRE2API(3) @@ -3681,8 +3681,8 @@ REVISION Last updated: 14 February 2019 Copyright (c) 1997-2019 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2BUILD(3) Library Functions Manual PCRE2BUILD(3) @@ -4027,45 +4027,48 @@ USING EBCDIC CODE PCRE2GREP SUPPORT FOR EXTERNAL SCRIPTS - By default, on non-Windows systems, pcre2grep supports the use of call- - outs with string arguments within the patterns it is matching, in order - to run external scripts. For details, see the pcre2grep documentation. - This support can be disabled by adding --disable-pcre2grep-callout to - the configure command. + By default pcre2grep supports the use of callouts with string arguments + within the patterns it is matching. There are two kinds: one that gen- + erates output using local code, and another that calls an external pro- + gram or script. If --disable-pcre2grep-callout-fork is added to the + configure command, only the first kind of callout is supported; if + --disable-pcre2grep-callout is used, all callouts are completely + ignored. For more details of pcre2grep callouts, see the pcre2grep doc- + umentation. PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT - By default, pcre2grep reads all files as plain text. You can build it - so that it recognizes files whose names end in .gz or .bz2, and reads + By default, pcre2grep reads all files as plain text. You can build it + so that it recognizes files whose names end in .gz or .bz2, and reads them with libz or libbz2, respectively, by adding one or both of --enable-pcre2grep-libz --enable-pcre2grep-libbz2 to the configure command. These options naturally require that the rel- - evant libraries are installed on your system. Configuration will fail + evant libraries are installed on your system. Configuration will fail if they are not. PCRE2GREP BUFFER SIZE - pcre2grep uses an internal buffer to hold a "window" on the file it is + pcre2grep uses an internal buffer to hold a "window" on the file it is scanning, in order to be able to output "before" and "after" lines when it finds a match. The default starting size of the buffer is 20KiB. The - buffer itself is three times this size, but because of the way it is + buffer itself is three times this size, but because of the way it is used for holding "before" lines, the longest line that is guaranteed to be processable is the notional buffer size. If a longer line is encoun- - tered, pcre2grep automatically expands the buffer, up to a specified - maximum size, whose default is 1MiB or the starting size, whichever is - the larger. You can change the default parameter values by adding, for + tered, pcre2grep automatically expands the buffer, up to a specified + maximum size, whose default is 1MiB or the starting size, whichever is + the larger. You can change the default parameter values by adding, for example, --with-pcre2grep-bufsize=51200 --with-pcre2grep-max-bufsize=2097152 - to the configure command. The caller of pcre2grep can override these - values by using --buffer-size and --max-buffer-size on the command + to the configure command. The caller of pcre2grep can override these + values by using --buffer-size and --max-buffer-size on the command line. @@ -4076,26 +4079,26 @@ PCRE2TEST OPTION FOR LIBREADLINE SUPPORT --enable-pcre2test-libreadline --enable-pcre2test-libedit - to the configure command, pcre2test is linked with the libreadline + to the configure command, pcre2test is linked with the libreadline orlibedit library, respectively, and when its input is from a terminal, - it reads it using the readline() function. This provides line-editing - and history facilities. Note that libreadline is GPL-licensed, so if - you distribute a binary of pcre2test linked in this way, there may be + it reads it using the readline() function. This provides line-editing + and history facilities. Note that libreadline is GPL-licensed, so if + you distribute a binary of pcre2test linked in this way, there may be licensing issues. These can be avoided by linking instead with libedit, which has a BSD licence. - Setting --enable-pcre2test-libreadline causes the -lreadline option to - be added to the pcre2test build. In many operating environments with a - sytem-installed readline library this is sufficient. However, in some + Setting --enable-pcre2test-libreadline causes the -lreadline option to + be added to the pcre2test build. In many operating environments with a + sytem-installed readline library this is sufficient. However, in some environments (e.g. if an unmodified distribution version of readline is - in use), some extra configuration may be necessary. The INSTALL file + in use), some extra configuration may be necessary. The INSTALL file for libreadline says this: "Readline uses the termcap functions, but does not link with the termcap or curses library itself, allowing applications which link with readline the to choose an appropriate library." - If your environment has not been set up so that an appropriate library + If your environment has not been set up so that an appropriate library is automatically included, you may need to add something like LIBS="-ncurses" @@ -4109,7 +4112,7 @@ INCLUDING DEBUGGING CODE --enable-debug - to the configure command, additional debugging code is included in the + to the configure command, additional debugging code is included in the build. This feature is intended for use by the PCRE2 maintainers. @@ -4119,15 +4122,15 @@ DEBUGGING WITH VALGRIND SUPPORT --enable-valgrind - to the configure command, PCRE2 will use valgrind annotations to mark - certain memory regions as unaddressable. This allows it to detect - invalid memory accesses, and is mostly useful for debugging PCRE2 + to the configure command, PCRE2 will use valgrind annotations to mark + certain memory regions as unaddressable. This allows it to detect + invalid memory accesses, and is mostly useful for debugging PCRE2 itself. CODE COVERAGE REPORTING - If your C compiler is gcc, you can build a version of PCRE2 that can + If your C compiler is gcc, you can build a version of PCRE2 that can generate a code coverage report for its test suite. To enable this, you must install lcov version 1.6 or above. Then specify @@ -4136,20 +4139,20 @@ CODE COVERAGE REPORTING to the configure command and build PCRE2 in the usual way. Note that using ccache (a caching C compiler) is incompatible with code - coverage reporting. If you have configured ccache to run automatically + coverage reporting. If you have configured ccache to run automatically on your system, you must set the environment variable CCACHE_DISABLE=1 before running make to build PCRE2, so that ccache is not used. - When --enable-coverage is used, the following addition targets are + When --enable-coverage is used, the following addition targets are added to the Makefile: make coverage - This creates a fresh coverage report for the PCRE2 test suite. It is - equivalent to running "make coverage-reset", "make coverage-baseline", + This creates a fresh coverage report for the PCRE2 test suite. It is + equivalent to running "make coverage-reset", "make coverage-baseline", "make check", and then "make coverage-report". make coverage-reset @@ -4166,28 +4169,28 @@ CODE COVERAGE REPORTING make coverage-clean-report - This removes the generated coverage report without cleaning the cover- + This removes the generated coverage report without cleaning the cover- age data itself. make coverage-clean-data - This removes the captured coverage data without removing the coverage + This removes the captured coverage data without removing the coverage files created at compile time (*.gcno). make coverage-clean - This cleans all coverage data including the generated coverage report. - For more information about code coverage, see the gcov and lcov docu- + This cleans all coverage data including the generated coverage report. + For more information about code coverage, see the gcov and lcov docu- mentation. DISABLING THE Z AND T FORMATTING MODIFIERS - The C99 standard defines formatting modifiers z and t for size_t and - ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers - in environments other than Microsoft Visual Studio when __STDC_VER- - SION__ is defined and has a value greater than or equal to 199901L - (indicating C99). However, there is at least one environment that + The C99 standard defines formatting modifiers z and t for size_t and + ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers + in environments other than Microsoft Visual Studio when __STDC_VER- + SION__ is defined and has a value greater than or equal to 199901L + (indicating C99). However, there is at least one environment that claims to be C99 but does not support these modifiers. If --disable-percent-zt @@ -4198,39 +4201,39 @@ DISABLING THE Z AND T FORMATTING MODIFIERS SUPPORT FOR FUZZERS - There is a special option for use by people who want to run fuzzing + There is a special option for use by people who want to run fuzzing tests on PCRE2: --enable-fuzz-support At present this applies only to the 8-bit library. If set, it causes an - extra library called libpcre2-fuzzsupport.a to be built, but not - installed. This contains a single function called LLVMFuzzerTestOneIn- - put() whose arguments are a pointer to a string and the length of the - string. When called, this function tries to compile the string as a - pattern, and if that succeeds, to match it. This is done both with no - options and with some random options bits that are generated from the + extra library called libpcre2-fuzzsupport.a to be built, but not + installed. This contains a single function called LLVMFuzzerTestOneIn- + put() whose arguments are a pointer to a string and the length of the + string. When called, this function tries to compile the string as a + pattern, and if that succeeds, to match it. This is done both with no + options and with some random options bits that are generated from the string. - Setting --enable-fuzz-support also causes a binary called pcre2fuz- - zcheck to be created. This is normally run under valgrind or used when + Setting --enable-fuzz-support also causes a binary called pcre2fuz- + zcheck to be created. This is normally run under valgrind or used when PCRE2 is compiled with address sanitizing enabled. It calls the fuzzing - function and outputs information about what it is doing. The input - strings are specified by arguments: if an argument starts with "=" the - rest of it is a literal input string. Otherwise, it is assumed to be a + function and outputs information about what it is doing. The input + strings are specified by arguments: if an argument starts with "=" the + rest of it is a literal input string. Otherwise, it is assumed to be a file name, and the contents of the file are the test string. OBSOLETE OPTION - In versions of PCRE2 prior to 10.30, there were two ways of handling - backtracking in the pcre2_match() function. The default was to use the + In versions of PCRE2 prior to 10.30, there were two ways of handling + backtracking in the pcre2_match() function. The default was to use the system stack, but if --disable-stack-for-recursion - was set, memory on the heap was used. From release 10.30 onwards this - has changed (the stack is no longer used) and this option now does + was set, memory on the heap was used. From release 10.30 onwards this + has changed (the stack is no longer used) and this option now does nothing except give a warning. @@ -4248,11 +4251,11 @@ AUTHOR REVISION - Last updated: 15 November 2018 - Copyright (c) 1997-2018 University of Cambridge. + Last updated: 03 March 2019 + Copyright (c) 1997-2019 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2CALLOUT(3) Library Functions Manual PCRE2CALLOUT(3) @@ -4682,8 +4685,8 @@ REVISION Last updated: 03 February 2019 Copyright (c) 1997-2019 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2COMPAT(3) Library Functions Manual PCRE2COMPAT(3) @@ -4887,8 +4890,8 @@ REVISION Last updated: 12 February 2019 Copyright (c) 1997-2019 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2JIT(3) Library Functions Manual PCRE2JIT(3) @@ -5287,8 +5290,8 @@ REVISION Last updated: 16 October 2018 Copyright (c) 1997-2018 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2LIMITS(3) Library Functions Manual PCRE2LIMITS(3) @@ -5357,8 +5360,8 @@ REVISION Last updated: 02 February 2019 Copyright (c) 1997-2019 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2MATCHING(3) Library Functions Manual PCRE2MATCHING(3) @@ -5578,8 +5581,8 @@ REVISION Last updated: 10 October 2018 Copyright (c) 1997-2018 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2PARTIAL(3) Library Functions Manual PCRE2PARTIAL(3) @@ -6018,8 +6021,8 @@ REVISION Last updated: 22 December 2014 Copyright (c) 1997-2014 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2PATTERN(3) Library Functions Manual PCRE2PATTERN(3) @@ -9362,8 +9365,8 @@ REVISION Last updated: 12 February 2019 Copyright (c) 1997-2019 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2PERFORM(3) Library Functions Manual PCRE2PERFORM(3) @@ -9597,8 +9600,8 @@ REVISION Last updated: 03 February 2019 Copyright (c) 1997-2019 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2POSIX(3) Library Functions Manual PCRE2POSIX(3) @@ -9927,8 +9930,8 @@ REVISION Last updated: 30 January 2019 Copyright (c) 1997-2019 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2SAMPLE(3) Library Functions Manual PCRE2SAMPLE(3) @@ -10206,8 +10209,8 @@ REVISION Last updated: 27 June 2018 Copyright (c) 1997-2018 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2SYNTAX(3) Library Functions Manual PCRE2SYNTAX(3) @@ -10707,8 +10710,8 @@ REVISION Last updated: 11 February 2019 Copyright (c) 1997-2019 University of Cambridge. ------------------------------------------------------------------------------ - - + + PCRE2UNICODE(3) Library Functions Manual PCRE2UNICODE(3) @@ -11079,5 +11082,5 @@ REVISION Last updated: 03 February 2019 Copyright (c) 1997-2019 University of Cambridge. ------------------------------------------------------------------------------ - - + + diff --git a/doc/pcre2_compile.3 b/doc/pcre2_compile.3 index c212355..b23bf46 100644 --- a/doc/pcre2_compile.3 +++ b/doc/pcre2_compile.3 @@ -75,7 +75,7 @@ PCRE2_UTF, PCRE2_UCP and related options. .P Additional options may be set in the compile context via the .\" HREF -\fBpcre2_set_compile_extra_options\fP +\fBpcre2_set_compile_extra_options\fP .\" function. .P diff --git a/doc/pcre2_dfa_match.3 b/doc/pcre2_dfa_match.3 index 834158c..6413cb6 100644 --- a/doc/pcre2_dfa_match.3 +++ b/doc/pcre2_dfa_match.3 @@ -40,7 +40,7 @@ characters. The options are: .sp PCRE2_ANCHORED Match only at the first position PCRE2_COPY_MATCHED_SUBJECT - On success, make a private subject copy + On success, make a private subject copy PCRE2_ENDANCHORED Pattern can match only at end of subject PCRE2_NOTBOL Subject is not the beginning of a line PCRE2_NOTEOL Subject is not the end of a line diff --git a/doc/pcre2_match.3 b/doc/pcre2_match.3 index 10a1a0f..2be2dd0 100644 --- a/doc/pcre2_match.3 +++ b/doc/pcre2_match.3 @@ -49,7 +49,7 @@ terminated by a binary zero code unit. The options are: .sp PCRE2_ANCHORED Match only at the first position PCRE2_COPY_MATCHED_SUBJECT - On success, make a private subject copy + On success, make a private subject copy PCRE2_ENDANCHORED Pattern can match only at end of subject PCRE2_NOTBOL Subject string is not the beginning of a line PCRE2_NOTEOL Subject string is not the end of a line diff --git a/doc/pcre2_match_data_free.3 b/doc/pcre2_match_data_free.3 index 5b920e4..cebdef9 100644 --- a/doc/pcre2_match_data_free.3 +++ b/doc/pcre2_match_data_free.3 @@ -18,7 +18,7 @@ If \fImatch_data\fP is NULL, this function does nothing. Otherwise, using the memory freeing function from the general context or compiled pattern with which it was created, or \fBfree()\fP if that was not set. .P -If the PCRE2_COPY_MATCHED_SUBJECT was used for a successful match using this +If the PCRE2_COPY_MATCHED_SUBJECT was used for a successful match using this match data block, the copy of the subject that was remembered with the block is also freed. .P diff --git a/doc/pcre2_set_compile_extra_options.3 b/doc/pcre2_set_compile_extra_options.3 index 26d1e33..764a75e 100644 --- a/doc/pcre2_set_compile_extra_options.3 +++ b/doc/pcre2_set_compile_extra_options.3 @@ -23,7 +23,7 @@ options are: in UTF-8 and UTF-32 modes .\" JOIN PCRE2_EXTRA_ALT_BSUX Extended alternate \eu, \eU, and \ex - handling + handling .\" JOIN PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL Treat all invalid escapes as a literal following character diff --git a/doc/pcre2api.3 b/doc/pcre2api.3 index ca73237..d219466 100644 --- a/doc/pcre2api.3 +++ b/doc/pcre2api.3 @@ -247,7 +247,7 @@ document for an overview of all the PCRE2 documentation. .sp .B const unsigned char *pcre2_maketables(pcre2_general_context *\fIgcontext\fP); .sp -.B int pcre2_pattern_info(const pcre2_code *\fIcode\fP, uint32_t \fIwhat\fP, +.B int pcre2_pattern_info(const pcre2_code *\fIcode\fP, uint32_t \fIwhat\fP, .B " void *\fIwhere\fP);" .sp .B int pcre2_callout_enumerate(const pcre2_code *\fIcode\fP, @@ -1244,7 +1244,7 @@ until after all operations on the .\" match data block .\" -have taken place, unless, in the case of the subject string, you have used the +have taken place, unless, in the case of the subject string, you have used the PCRE2_COPY_MATCHED_SUBJECT option, which is described in the section entitled "Option bits for \fBpcre2_match()\fP" .\" HTML @@ -1375,8 +1375,8 @@ the PCRE2_EXTRA_ALT_BSUX extra option (see "Extra compile options" .\" below). .\" -Note that this alternative escape handling applies only to patterns. Neither of -these options affects the processing of replacement strings passed to +Note that this alternative escape handling applies only to patterns. Neither of +these options affects the processing of replacement strings passed to \fBpcre2_substitute()\fP. .sp PCRE2_ALT_CIRCUMFLEX @@ -1832,10 +1832,10 @@ characters if the matching function is called with PCRE2_NO_UTF_CHECK set. .sp PCRE2_EXTRA_ALT_BSUX .sp -The original option PCRE2_ALT_BSUX causes PCRE2 to process \eU, \eu, and \ex in -the way that ECMAscript (aka JavaScript) does. Additional functionality was -defined by ECMAscript 6; setting PCRE2_EXTRA_ALT_BSUX has the effect of -PCRE2_ALT_BSUX, but in addition it recognizes \eu{hhh..} as a hexadecimal +The original option PCRE2_ALT_BSUX causes PCRE2 to process \eU, \eu, and \ex in +the way that ECMAscript (aka JavaScript) does. Additional functionality was +defined by ECMAscript 6; setting PCRE2_EXTRA_ALT_BSUX has the effect of +PCRE2_ALT_BSUX, but in addition it recognizes \eu{hhh..} as a hexadecimal character code, where hhh.. is any number of hexadecimal digits. .sp PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL @@ -1852,7 +1852,7 @@ If the PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL extra option is passed to \fBpcre2_compile()\fP, all unrecognized or malformed escape sequences are treated as single-character escapes. For example, \ej is a literal "j" and \ex{2z} is treated as the literal string "x{2z}". Setting this option means -that typos in patterns may go undetected and have unexpected results. Also note +that typos in patterns may go undetected and have unexpected results. Also note that a sequence such as [\eN{] is interpreted as a malformed attempt at [\eN{...}] and so is treated as [N{] whereas [\eN] gives an error because an unqualified \eN is a valid escape sequence but is not supported in a character @@ -1860,9 +1860,9 @@ class. To reiterate: this is a dangerous option. Use with great care. .sp PCRE2_EXTRA_ESCAPED_CR_IS_LF .sp -There are some legacy applications where the escape sequence \er in a pattern -is expected to match a newline. If this option is set, \er in a pattern is -converted to \en so that it matches a LF (linefeed) instead of a CR (carriage +There are some legacy applications where the escape sequence \er in a pattern +is expected to match a newline. If this option is set, \er in a pattern is +converted to \en so that it matches a LF (linefeed) instead of a CR (carriage return) character. The option does not affect a literal CR in the pattern, nor does it affect CR specified as an explicit code point such as \ex{0D}. .sp @@ -2547,7 +2547,7 @@ the use of .* with PCRE2_DOTALL, not by starting the pattern with ^ or \eA. .rs .sp The unused bits of the \fIoptions\fP argument for \fBpcre2_match()\fP must be -zero. The only bits that may be set are PCRE2_ANCHORED, +zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_COPY_MATCHED_SUBJECT, PCRE2_ENDANCHORED, PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_JIT, PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Their action is described below. @@ -2567,8 +2567,8 @@ matching. .sp PCRE2_COPY_MATCHED_SUBJECT .sp -By default, a pointer to the subject is remembered in the match data block so -that, after a successful match, it can be referenced by the substring +By default, a pointer to the subject is remembered in the match data block so +that, after a successful match, it can be referenced by the substring extraction functions. This means that the subject's memory must not be freed until all such operations are complete. For some applications where the lifetime of the subject string is not guaranteed, it may be necessary to make a @@ -2868,8 +2868,8 @@ undefined. .P After a successful match, a partial match (PCRE2_ERROR_PARTIAL), or a failure to match (PCRE2_ERROR_NOMATCH), a mark name may be available. The function -\fBpcre2_get_mark()\fP can be called to access this name, which can be -specified in the pattern by any of the backtracking control verbs, not just +\fBpcre2_get_mark()\fP can be called to access this name, which can be +specified in the pattern by any of the backtracking control verbs, not just (*MARK). The same function applies to all the verbs. It returns a pointer to the zero-terminated name, which is within the compiled pattern. If no name is available, NULL is returned. The length of the name (excluding the terminating @@ -3016,7 +3016,7 @@ The backtracking match limit was reached. If a pattern contains many nested backtracking points, heap memory is used to remember them. This error is given when the memory allocation function (default or custom) fails. Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given -if the amount of memory needed exceeds the heap limit. PCRE2_ERROR_NOMEMORY is +if the amount of memory needed exceeds the heap limit. PCRE2_ERROR_NOMEMORY is also returned if PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails. .sp PCRE2_ERROR_NULL @@ -3407,7 +3407,7 @@ capture groups and letters within \eQ...\eE quoted sequences. .P Note that case forcing sequences such as \eU...\eE do not nest. For example, the result of processing "\eUaa\eLBB\eEcc\eE" is "AAbbcc"; the final \eE has no -effect. Note also that the PCRE2_ALT_BSUX and PCRE2_EXTRA_ALT_BSUX options do +effect. Note also that the PCRE2_ALT_BSUX and PCRE2_EXTRA_ALT_BSUX options do not apply to not apply to replacement strings. .P The second effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more @@ -3439,7 +3439,7 @@ The PCRE2_SUBSTITUTE_UNSET_EMPTY option does not affect these extended substitutions. However, PCRE2_SUBSTITUTE_UNKNOWN_UNSET does cause unknown groups in the extended syntax forms to be treated as unset. .P -If successful, \fBpcre2_substitute()\fP returns the number of successful +If successful, \fBpcre2_substitute()\fP returns the number of successful matches. This may be zero if no matches were found, and is never greater than 1 unless PCRE2_SUBSTITUTE_GLOBAL is set. .P @@ -3487,8 +3487,8 @@ above). .sp The \fBpcre2_set_substitution_callout()\fP function can be used to specify a callout function for \fBpcre2_substitute()\fP. This information is passed in -a match context. The callout function is called after each substitution has -been processed, but it can cause the replacement not to happen. The callout +a match context. The callout function is called after each substitution has +been processed, but it can cause the replacement not to happen. The callout function is not called for simulated substitutions that happen as a result of the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option. .P @@ -3497,10 +3497,10 @@ block structure, which contains the following fields, not necessarily in this order: .sp uint32_t \fIversion\fP; - uint32_t \fIsubscount\fP; + uint32_t \fIsubscount\fP; PCRE2_SPTR \fIinput\fP; - PCRE2_SPTR \fIoutput\fP; - PCRE2_SIZE \fI*ovector\fP; + PCRE2_SPTR \fIoutput\fP; + PCRE2_SIZE \fI*ovector\fP; uint32_t \fIoveccount\fP; PCRE2_SIZE \fIoutput_offsets[2]\fP; .sp @@ -3512,9 +3512,9 @@ The \fIsubscount\fP field is the number of the current match. It is 1 for the first callout, 2 for the second, and so on. The \fIinput\fP and \fIoutput\fP pointers are copies of the values passed to \fBpcre2_substitute()\fP. .P -The \fIovector\fP field points to the ovector, which contains the result of the -most recent match. The \fIoveccount\fP field contains the number of pairs that -are set in the ovector, and is always greater than zero. +The \fIovector\fP field points to the ovector, which contains the result of the +most recent match. The \fIoveccount\fP field contains the number of pairs that +are set in the ovector, and is always greater than zero. .P The \fIoutput_offsets\fP vector contains the offsets of the replacement in the output string. This has already been processed for dollar and (if requested) diff --git a/doc/pcre2callout.3 b/doc/pcre2callout.3 index 7a9c0d8..adb411b 100644 --- a/doc/pcre2callout.3 +++ b/doc/pcre2callout.3 @@ -33,7 +33,7 @@ is described in the .\" HREF \fBpcre2api\fP .\" -documentation; the rest of this document is concerned with callouts during +documentation; the rest of this document is concerned with callouts during pattern matching. .P Within a regular expression, (?C
) indicates a point at which the external diff --git a/doc/pcre2grep.1 b/doc/pcre2grep.1 index 9a8b0db..6b3219b 100644 --- a/doc/pcre2grep.1 +++ b/doc/pcre2grep.1 @@ -778,8 +778,8 @@ only callouts with string arguments are useful. .SS "Calling external programs or scripts" .rs .sp -This facility can be independently disabled when \fBpcre2grep\fP is built. It -is supported for Windows, where a call to \fB_spawnvp()\fP is used, for VMS, +This facility can be independently disabled when \fBpcre2grep\fP is built. It +is supported for Windows, where a call to \fB_spawnvp()\fP is used, for VMS, where \fBlib$spawn()\fP is used, and for any other Unix-like environment where \fBfork()\fP and \fBexecv()\fP are available. .P diff --git a/doc/pcre2pattern.3 b/doc/pcre2pattern.3 index 0576f0b..de8d7ce 100644 --- a/doc/pcre2pattern.3 +++ b/doc/pcre2pattern.3 @@ -390,12 +390,12 @@ two compile-time options. If PCRE2_ALT_BSUX is set, the sequence \ex followed by { is not recognized. Only if \ex is followed by two hexadecimal digits is it recognized as a character escape. Otherwise it is interpreted as a literal "x" character. In this mode, support for code points greater than 256 is provided -by \eu, which must be followed by four hexadecimal digits; otherwise it is +by \eu, which must be followed by four hexadecimal digits; otherwise it is interpreted as a literal "u" character. .P PCRE2_EXTRA_ALT_BSUX has the same effect as PCRE2_ALT_BSUX and, in addition, \eu{hhh..} is recognized as the character specified by hexadecimal code point. -There may be any number of hexadecimal digits. This syntax is from ECMAScript +There may be any number of hexadecimal digits. This syntax is from ECMAScript 6. .P The \eN{U+hhh..} escape sequence is recognized only when the PCRE2_UTF option @@ -1188,7 +1188,7 @@ character. If any other of these assertions appears in a character class, an A word boundary is a position in the subject string where the current character and the previous character do not both match \ew or \eW (i.e. one matches \ew and the other matches \eW), or the start or end of the string if the -first or last character matches \ew, respectively. When PCRE2 is built with +first or last character matches \ew, respectively. When PCRE2 is built with Unicode support, the meanings of \ew and \eW can be changed by setting the PCRE2_UCP option. When this is done, it also affects \eb and \eB. Neither PCRE2 nor Perl has a separate "start of word" or "end of word" metasequence. However, diff --git a/doc/pcre2posix.3 b/doc/pcre2posix.3 index 05eb605..35e68e2 100644 --- a/doc/pcre2posix.3 +++ b/doc/pcre2posix.3 @@ -29,12 +29,12 @@ and 32-bit libraries. See the \fBpcre2api\fP .\" documentation for a description of PCRE2's native API, which contains much -additional functionality. +additional functionality. .P The functions described here are wrapper functions that ultimately call the PCRE2 native API. Their prototypes are defined in the \fBpcre2posix.h\fP header file, and they all have unique names starting with \fBpcre2_\fP. However, the -\fBpcre2posix.h\fP header also contains macro definitions that convert the +\fBpcre2posix.h\fP header also contains macro definitions that convert the standard POSIX names such \fBregcomp()\fP into \fBpcre2_regcomp()\fP etc. This means that a program can use the usual POSIX names without running the risk of accidentally linking with POSIX functions from a different library. @@ -44,7 +44,7 @@ can be accessed by adding \fB-lpcre2-posix\fP to the command for linking an application. Because the POSIX functions call the native ones, it is also necessary to add \fB-lpcre2-8\fP. .P -Although they are not defined as protypes in \fBpcre2posix.h\fP, the library +Although they are not defined as protypes in \fBpcre2posix.h\fP, the library does contain functions with the POSIX names \fBregcomp()\fP etc. These simply pass their arguments to the PCRE2 functions. These functions are provided for backwards compatibility with earlier versions of PCRE2, so that existing diff --git a/doc/pcre2syntax.3 b/doc/pcre2syntax.3 index b6dd33c..70538e4 100644 --- a/doc/pcre2syntax.3 +++ b/doc/pcre2syntax.3 @@ -22,7 +22,7 @@ documentation. This document contains a quick-reference summary of the syntax. .SH "ESCAPED CHARACTERS" .rs .sp -This table applies to ASCII and Unicode environments. An unrecognized escape +This table applies to ASCII and Unicode environments. An unrecognized escape sequence causes an error. .sp \ea alarm, that is, the BEL character (hex 07) @@ -49,7 +49,7 @@ following are also recognized: When \ex is not followed by {, from zero to two hexadecimal digits are read, but in ALT_BSUX mode \ex must be followed by two hexadecimal digits to be recognized as a hexadecimal escape; otherwise it matches a literal "x". -Likewise, if \eu (in ALT_BSUX mode) is not followed by four hexadecimal digits +Likewise, if \eu (in ALT_BSUX mode) is not followed by four hexadecimal digits or (in EXTRA_ALT_BSUX mode) a sequence of hex digits in curly brackets, it matches a literal "u". .P diff --git a/doc/pcre2test.1 b/doc/pcre2test.1 index b4b8eca..954e043 100644 --- a/doc/pcre2test.1 +++ b/doc/pcre2test.1 @@ -565,10 +565,10 @@ for a description of the effects of these options. /s dotall set PCRE2_DOTALL dupnames set PCRE2_DUPNAMES endanchored set PCRE2_ENDANCHORED - escaped_cr_is_lf set PCRE2_EXTRA_ESCAPED_CR_IS_LF + escaped_cr_is_lf set PCRE2_EXTRA_ESCAPED_CR_IS_LF /x extended set PCRE2_EXTENDED /xx extended_more set PCRE2_EXTENDED_MORE - extra_alt_bsux set PCRE2_EXTRA_ALT_BSUX + extra_alt_bsux set PCRE2_EXTRA_ALT_BSUX firstline set PCRE2_FIRSTLINE literal set PCRE2_LITERAL match_line set PCRE2_EXTRA_MATCH_LINE @@ -1005,7 +1005,7 @@ process. aftertext show text after match allaftertext show text after captures allcaptures show all captures - allvector show the entire ovector + allvector show the entire ovector allusedtext show all consulted text altglobal alternative global matching /g global global matching @@ -1013,9 +1013,9 @@ process. mark show mark values replace= specify a replacement string startchar show starting character when relevant - substitute_callout use substitution callouts + substitute_callout use substitution callouts substitute_extended use PCRE2_SUBSTITUTE_EXTENDED - substitute_skip= skip substitution number n + substitute_skip= skip substitution number n substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH substitute_stop= skip substitution number n and greater substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET @@ -1160,7 +1160,7 @@ pattern. aftertext show text after match allaftertext show text after captures allcaptures show all captures - allvector show the entire ovector + allvector show the entire ovector allusedtext show all consulted text (non-JIT only) altglobal alternative global matching callout_capture show captures at callout time @@ -1190,9 +1190,9 @@ pattern. replace= specify a replacement string startchar show startchar when relevant startoffset= same as offset= - substitute_callout use substitution callouts + substitute_callout use substitution callouts substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED - substitute_skip= skip substitution number n + substitute_skip= skip substitution number n substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH substitute_stop= skip substitution number n and greater substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET @@ -1273,9 +1273,9 @@ result, and also for DFA matching, provides a means of checking that there are no unexpected modifications to ovector fields. Before each match attempt, the ovector is filled with a special value, and if this is found in both elements of a capturing pair, " " is output. After a successful match, this -applies to all groups after the maximum capture group for the pattern. In other -cases it applies to the entire ovector. After a partial match, the first two -elements are the only ones that should be set. After a DFA match, the amount of +applies to all groups after the maximum capture group for the pattern. In other +cases it applies to the entire ovector. After a partial match, the first two +elements are the only ones that should be set. After a DFA match, the amount of ovector that is used depends on the number of matches that were found. . . @@ -1288,13 +1288,13 @@ controlled by various modifiers listed above whose names begin with \fBcallout_\fP. Details are given in the section entitled "Callouts" .\" HTML .\" -below. +below. .\" -Testing callouts from \fBpcre2_substitute()\fP is decribed separately in +Testing callouts from \fBpcre2_substitute()\fP is decribed separately in "Testing the substitution function" .\" HTML .\" -below. +below. .\" . . @@ -1416,14 +1416,14 @@ matching provokes an error return ("bad option value") from .SS "Testing substitute callouts" .rs .sp -If the \fBsubstitute_callout\fP modifier is set, a substitution callout +If the \fBsubstitute_callout\fP modifier is set, a substitution callout function is set up. When it is called (after each substitution), details of the the input and output strings are output. For example: .sp /abc/g,replace=<$0>,substitute_callout abcdefabcpqr 1(1) Old 0 3 "abc" New 0 5 " " - 2(1) Old 6 9 "abc" New 8 13 " " + 2(1) Old 6 9 "abc" New 8 13 " " 2: def pqr .sp The first number on each callout line is the count of matches. The @@ -1432,11 +1432,11 @@ is, one more than the number of capturing groups that were set). Then are listed the offsets of the old substring, its contents, and the same for the replacement. .P -By default, the substitution callout function returns zero, which accepts the -replacement and causes matching to continue if /g was used. Two further -modifiers can be used to test other return values. If \fBsubstitute_skip\fP is -set to a value greater than zero the callout function returns +1 for the match -of that number, and similarly \fBsubstitute_stop\fP returns -1. These cause the +By default, the substitution callout function returns zero, which accepts the +replacement and causes matching to continue if /g was used. Two further +modifiers can be used to test other return values. If \fBsubstitute_skip\fP is +set to a value greater than zero the callout function returns +1 for the match +of that number, and similarly \fBsubstitute_stop\fP returns -1. These cause the replacement to be rejected, and -1 causes no further matching to take place. If either of them are set, \fBsubstitute_callout\fP is assumed. For example: .sp @@ -1449,7 +1449,7 @@ either of them are set, \fBsubstitute_callout\fP is assumed. For example: 1(1) Old 0 3 "abc" New 0 5 " STOPPED" 1: abcdefabcpqr .sp -If both are set for the same number, stop takes precedence. Only a single skip +If both are set for the same number, stop takes precedence. Only a single skip or stop is supported, which is sufficient for testing that the feature works. . . diff --git a/doc/pcre2unicode.3 b/doc/pcre2unicode.3 index a34a400..fc594aa 100644 --- a/doc/pcre2unicode.3 +++ b/doc/pcre2unicode.3 @@ -72,7 +72,7 @@ characters (see the description of \eC in the .\" HREF \fBpcre2pattern\fP .\" -documentation). For this reason, there is a build-time option that disables +documentation). For this reason, there is a build-time option that disables support for \eC completely. There is also a less draconian compile-time option for locking out the use of \eC when a pattern is compiled. .P @@ -135,13 +135,13 @@ characters that are all from the same Unicode script. However, because some scripts are commonly used together, and because some diacritical and other marks are used with multiple scripts, it is not that simple. .P -Every Unicode character has a Script property, mostly with a value +Every Unicode character has a Script property, mostly with a value corresponding to the name of a script, such as Latin, Greek, or Cyrillic. There are also three special values: .P "Unknown" is used for code points that have not been assigned, and also for the surrogate code points. In the PCRE2 32-bit library, characters whose code -points are greater than the Unicode maximum (U+10FFFF), which are accessible +points are greater than the Unicode maximum (U+10FFFF), which are accessible only in non-UTF mode, are assigned the Unknown script. .P "Common" is used for characters that are used with many scripts. These include @@ -152,19 +152,19 @@ digits 0 to 9. previous character. These are considered to take on the script of the character that they modify. .P -Some Inherited characters are used with many scripts, but many of them are only -normally used with a small number of scripts. For example, U+102E0 (Coptic -Epact thousands mark) is used only with Arabic and Coptic. In order to make it -possible to check this, a Unicode property called Script Extension exists. Its -value is a list of scripts that apply to the character. For the majority of +Some Inherited characters are used with many scripts, but many of them are only +normally used with a small number of scripts. For example, U+102E0 (Coptic +Epact thousands mark) is used only with Arabic and Coptic. In order to make it +possible to check this, a Unicode property called Script Extension exists. Its +value is a list of scripts that apply to the character. For the majority of characters, the list contains just one script, the same one as the Script property. However, for characters such as U+102E0 more than one Script is listed. There are also some Common characters that have a single, non-Common script in their Script Extension list. .P -The next section describes the basic rules for deciding whether a given string -of characters is a script run. Note, however, that there are some special cases -involving the Chinese Han script, and an additional constraint for decimal +The next section describes the basic rules for deciding whether a given string +of characters is a script run. Note, however, that there are some special cases +involving the Chinese Han script, and an additional constraint for decimal digits. These are covered in subsequent sections. . . @@ -185,16 +185,16 @@ all the sets of scripts must not be empty. .P A simple example is an Internet name such as "google.com". The letters are all in the Latin script, and the dot is Common, so this string is a script run. -However, the Cyrillic letter "o" looks exactly the same as the Latin "o"; a +However, the Cyrillic letter "o" looks exactly the same as the Latin "o"; a string that looks the same, but with Cyrillic "o"s is not a script run. .P -More interesting examples involve characters with more than one script in their +More interesting examples involve characters with more than one script in their Script Extension. Consider the following characters: .sp U+060C Arabic comma U+06D4 Arabic full stop .sp -The first has the Script Extension list Arabic, Hanifi Rohingya, Syriac, and +The first has the Script Extension list Arabic, Hanifi Rohingya, Syriac, and Thaana; the second has just Arabic and Hanifi Rohingya. Both of them could appear in script runs of either Arabic or Hanifi Rohingya. The first could also appear in Syriac or Thaana script runs, but the second could not. @@ -202,9 +202,9 @@ appear in Syriac or Thaana script runs, but the second could not. . .SS "The Chinese Han script" .rs -.sp -The Chinese Han script is commonly used in conjunction with other scripts for -writing certain languages. Japanese uses the Hiragana and Katakana scripts +.sp +The Chinese Han script is commonly used in conjunction with other scripts for +writing certain languages. Japanese uses the Hiragana and Katakana scripts together with Han; Korean uses Hangul and Han; Taiwanese Mandarin uses Bopomofo and Han. These three combinations are treated as special cases when checking script runs and are, in effect, "virtual scripts". Thus, a script run may diff --git a/perltest.sh b/perltest.sh index 4806f8d..406a14e 100755 --- a/perltest.sh +++ b/perltest.sh @@ -29,7 +29,7 @@ if [ $# -gt 1 -a "$1" = "-perl" ] ; then shift perl=$1 shift -fi +fi if [ $# -gt 0 -a "$1" = "-w" ] ; then perlarg="-w" @@ -386,10 +386,10 @@ for (;;) } } -# By closing OUTFILE explicitly, we avoid a Perl warning in -w mode +# By closing OUTFILE explicitly, we avoid a Perl warning in -w mode # "main::OUTFILE" used only once". -close(OUTFILE) if $outfile eq "OUTFILE"; +close(OUTFILE) if $outfile eq "OUTFILE"; PERLEND ) | $perl $perlarg - $@ diff --git a/src/pcre2.h.generic b/src/pcre2.h.generic index 7e01fbd..201e314 100644 --- a/src/pcre2.h.generic +++ b/src/pcre2.h.generic @@ -44,7 +44,7 @@ POSSIBILITY OF SUCH DAMAGE. #define PCRE2_MAJOR 10 #define PCRE2_MINOR 33 #define PCRE2_PRERELEASE -RC1 -#define PCRE2_DATE 2018-09-14 +#define PCRE2_DATE 2019-03-03 /* When an application links to a PCRE DLL in Windows, the symbols that are imported have to be identified as such. When building PCRE2, the appropriate @@ -150,6 +150,7 @@ D is inspected during pcre2_dfa_match() execution #define PCRE2_EXTRA_MATCH_WORD 0x00000004u /* C */ #define PCRE2_EXTRA_MATCH_LINE 0x00000008u /* C */ #define PCRE2_EXTRA_ESCAPED_CR_IS_LF 0x00000010u /* C */ +#define PCRE2_EXTRA_ALT_BSUX 0x00000020u /* C */ /* These are for pcre2_jit_compile(). */ diff --git a/src/pcre2_auto_possess.c b/src/pcre2_auto_possess.c index 5a19f53..5814af7 100644 --- a/src/pcre2_auto_possess.c +++ b/src/pcre2_auto_possess.c @@ -604,15 +604,15 @@ for(;;) case OP_SCBRAPOS: if (cb->had_recurse) return FALSE; break; - + /* A script run might have to backtrack if the iterated item can match - characters from more than one script. So give up unless repeating an + characters from more than one script. So give up unless repeating an explicit character. */ - + case OP_SCRIPT_RUN: if (base_list[0] != OP_CHAR && base_list[0] != OP_CHARI) - return FALSE; - break; + return FALSE; + break; /* Atomic sub-patterns and assertions can always auto-possessify their last iterator. However, if the group was entered as a result of checking diff --git a/src/pcre2_context.c b/src/pcre2_context.c index 55225de..9c2886a 100644 --- a/src/pcre2_context.c +++ b/src/pcre2_context.c @@ -407,7 +407,7 @@ return 0; PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION pcre2_set_substitute_callout(pcre2_match_context *mcontext, - int (*substitute_callout)(pcre2_substitute_callout_block *, void *), + int (*substitute_callout)(pcre2_substitute_callout_block *, void *), void *substitute_callout_data) { mcontext->substitute_callout = substitute_callout; diff --git a/src/pcre2_error.c b/src/pcre2_error.c index 349351d..1d02cf1 100644 --- a/src/pcre2_error.c +++ b/src/pcre2_error.c @@ -182,8 +182,8 @@ static const unsigned char compile_error_texts[] = "\\N{U+dddd} is supported only in Unicode (UTF) mode\0" "invalid hyphen in option setting\0" /* 95 */ - "(*alpha_assertion) not recognized\0" - "script runs require Unicode support, which this version of PCRE2 does not have\0" + "(*alpha_assertion) not recognized\0" + "script runs require Unicode support, which this version of PCRE2 does not have\0" ; /* Match-time and UTF error texts are in the same format. */ diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h index 5669990..814d91b 100644 --- a/src/pcre2_internal.h +++ b/src/pcre2_internal.h @@ -525,10 +525,10 @@ bytes in a code unit in that mode. */ enum { PCRE2_MATCHEDBY_INTERPRETER, /* pcre2_match() */ PCRE2_MATCHEDBY_DFA_INTERPRETER, /* pcre2_dfa_match() */ PCRE2_MATCHEDBY_JIT }; /* pcre2_jit_match() */ - + /* Values for the flags field in a match data block. */ -#define PCRE2_MD_COPIED_SUBJECT 0x01u +#define PCRE2_MD_COPIED_SUBJECT 0x01u /* Magic number to provide a small check against being handed junk. */ @@ -1774,7 +1774,7 @@ typedef struct { uint8_t caseset; /* offset to multichar other cases or zero */ int32_t other_case; /* offset to other case, or zero if none */ int16_t scriptx; /* script extension value */ - int16_t dummy; /* spare - to round to multiple of 4 bytes */ + int16_t dummy; /* spare - to round to multiple of 4 bytes */ } ucd_record; /* UCD access macros */ diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c index 0061782..6519b4b 100644 --- a/src/pcre2_jit_compile.c +++ b/src/pcre2_jit_compile.c @@ -7794,12 +7794,12 @@ if (needstype || needsscript) OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2); OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3); - OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 0); - + // OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3); ccbegin = cc; @@ -7848,7 +7848,7 @@ if (needstype || needsscript) //fprintf(stderr, "~~C\n"); OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2); OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3); - OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP1, 0); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); @@ -7862,12 +7862,12 @@ if (needstype || needsscript) // PH hacking //fprintf(stderr, "~~D\n"); OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2); - + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3); - + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); - + OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); typereg = RETURN_ADDR; } @@ -9207,9 +9207,9 @@ if (common->utf && *cc == OP_REFI) OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2); OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3); - - OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); - + + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records)); OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case)); diff --git a/src/pcre2_maketables.c b/src/pcre2_maketables.c index d40c2f1..1c66579 100644 --- a/src/pcre2_maketables.c +++ b/src/pcre2_maketables.c @@ -138,7 +138,7 @@ for (i = 0; i < 256; i++) int x = 0; if (isspace(i)) x += ctype_space; if (isalpha(i)) x += ctype_letter; - if (islower(i)) x += ctype_lcletter; + if (islower(i)) x += ctype_lcletter; if (isdigit(i)) x += ctype_digit; if (isalnum(i) || i == '_') x += ctype_word; *p++ = x; diff --git a/src/pcre2_match_data.c b/src/pcre2_match_data.c index b480dec..ccc5f67 100644 --- a/src/pcre2_match_data.c +++ b/src/pcre2_match_data.c @@ -96,10 +96,10 @@ pcre2_match_data_free(pcre2_match_data *match_data) if (match_data != NULL) { if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0) - match_data->memctl.free((void *)match_data->subject, + match_data->memctl.free((void *)match_data->subject, match_data->memctl.memory_data); match_data->memctl.free(match_data, match_data->memctl.memory_data); - } + } } diff --git a/src/pcre2_printint.c b/src/pcre2_printint.c index 03a18db..a4a7693 100644 --- a/src/pcre2_printint.c +++ b/src/pcre2_printint.c @@ -393,7 +393,7 @@ for(;;) case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: case OP_ONCE: - case OP_SCRIPT_RUN: + case OP_SCRIPT_RUN: case OP_COND: case OP_SCOND: case OP_REVERSE: diff --git a/src/pcre2_study.c b/src/pcre2_study.c index 8d9eb9d..a39f38f 100644 --- a/src/pcre2_study.c +++ b/src/pcre2_study.c @@ -171,7 +171,7 @@ for (;;) /* Fall through */ case OP_ONCE: - case OP_SCRIPT_RUN: + case OP_SCRIPT_RUN: case OP_SBRA: case OP_BRAPOS: case OP_SBRAPOS: @@ -1076,7 +1076,7 @@ do case OP_CBRAPOS: case OP_SCBRAPOS: case OP_ONCE: - case OP_SCRIPT_RUN: + case OP_SCRIPT_RUN: case OP_ASSERT: rc = set_start_bits(re, tcode, utf); if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc; diff --git a/src/pcre2posix.h b/src/pcre2posix.h index cb59d03..3a663b9 100644 --- a/src/pcre2posix.h +++ b/src/pcre2posix.h @@ -3,8 +3,8 @@ *************************************************/ /* PCRE2 is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. This is -the public header file to be #included by applications that call PCRE2 via the +and semantics are as close as possible to those of the Perl 5 language. This is +the public header file to be #included by applications that call PCRE2 via the POSIX wrapper interface. Written by Philip Hazel @@ -138,7 +138,7 @@ file. */ # endif #endif -/* The functions. The actual code is in functions with pcre2_xxx names for +/* The functions. The actual code is in functions with pcre2_xxx names for uniqueness. POSIX names are provided as macros for API compatibility with POSIX regex functions. It's done this way to ensure to they are always linked from the PCRE2 library and not by accident from elsewhere (regex_t differs in size @@ -155,7 +155,7 @@ PCRE2POSIX_EXP_DECL void pcre2_regfree(regex_t *); #define regerror pcre2_regerror #define regfree pcre2_regfree -/* Debian had a patch that used different names. These are now here to save +/* Debian had a patch that used different names. These are now here to save them having to maintain their own patch, but are not documented by PCRE2. */ #define PCRE2regcomp pcre2_regcomp