Fix dynamic options changing bug.

This commit is contained in:
Philip.Hazel 2018-08-04 08:20:18 +00:00
parent c722bf2399
commit 9332d4be69
4 changed files with 40 additions and 16 deletions

View File

@ -140,6 +140,13 @@ generated by pcre2_maketables(), which uses isspace() to identify white space.
Now, when Unicode support is compiled, PCRE2_EXTENDED also discards U+0085, Now, when Unicode support is compiled, PCRE2_EXTENDED also discards U+0085,
U+200E, U+200F, U+2028, and U+2029, which are additional characters defined by U+200E, U+200F, U+2028, and U+2029, which are additional characters defined by
Unicode as "Pattern White Space". This makes PCRE2 compatible with Perl. Unicode as "Pattern White Space". This makes PCRE2 compatible with Perl.
32. In certain circumstances, option settings within patterns were not being
correctly processed. For example, the pattern /((?i)A)(?m)B/ incorrectly
matched "ab". (The (?m) setting lost the fact that (?i) should be reset at the
end of its group during the parse process, but without another setting such as
(?m) the compile phase got it right.) This bug was introduced by the
refactoring in release 10.23.
Version 10.31 12-February-2018 Version 10.31 12-February-2018

View File

@ -2284,11 +2284,14 @@ typedef struct nest_save {
#define NSF_RESET 0x0001u #define NSF_RESET 0x0001u
#define NSF_CONDASSERT 0x0002u #define NSF_CONDASSERT 0x0002u
/* Of the options that are changeable within the pattern, these are tracked /* Options that are changeable within the pattern must be tracked during
during parsing. The rest are used from META_OPTIONS items when compiling. */ parsing. Some (e.g. PCRE2_EXTENDED) are implemented entirely during parsing,
but all must be tracked so that META_OPTIONS items set the correct values for
the main compiling phase. */
#define PARSE_TRACKED_OPTIONS \ #define PARSE_TRACKED_OPTIONS (PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_DUPNAMES| \
(PCRE2_DUPNAMES|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_NO_AUTO_CAPTURE) PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE| \
PCRE2_UNGREEDY)
/* States used for analyzing ranges in character classes. The two OK values /* States used for analyzing ranges in character classes. The two OK values
must be last. */ must be last. */
@ -2468,16 +2471,16 @@ while (ptr < ptrend)
/* EITHER: not both options set */ /* EITHER: not both options set */
((options & (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) != ((options & (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) !=
(PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) || (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) ||
#ifdef SUPPORT_UNICODE #ifdef SUPPORT_UNICODE
/* OR: character > 255 AND not Unicode Pattern White Space */ /* OR: character > 255 AND not Unicode Pattern White Space */
(c > 255 && (c|1) != 0x200f && (c|1) != 0x2029) || (c > 255 && (c|1) != 0x200f && (c|1) != 0x2029) ||
#endif #endif
/* OR: not a # comment or isspace() white space */ /* OR: not a # comment or isspace() white space */
(c < 256 && c != CHAR_NUMBER_SIGN && (cb->ctypes[c] & ctype_space) == 0 (c < 256 && c != CHAR_NUMBER_SIGN && (cb->ctypes[c] & ctype_space) == 0
#ifdef SUPPORT_UNICODE #ifdef SUPPORT_UNICODE
/* and not CHAR_NEL when Unicode is supported */ /* and not CHAR_NEL when Unicode is supported */
&& c != CHAR_NEL && c != CHAR_NEL
#endif #endif
))) )))
{ {
PCRE2_SIZE verbnamelength; PCRE2_SIZE verbnamelength;
@ -2562,16 +2565,16 @@ while (ptr < ptrend)
character, not a code unit, so we must not use MAX_255 to test its size character, not a code unit, so we must not use MAX_255 to test its size
because MAX_255 tests code units and is assumed TRUE in 8-bit mode. The because MAX_255 tests code units and is assumed TRUE in 8-bit mode. The
whitespace characters are those designated as "Pattern White Space" by whitespace characters are those designated as "Pattern White Space" by
Unicode, which are the isspace() characters plus CHAR_NEL (newline), which is Unicode, which are the isspace() characters plus CHAR_NEL (newline), which is
U+0085 in Unicode, plus U+200E, U+200F, U+2028, and U+2029. These are a U+0085 in Unicode, plus U+200E, U+200F, U+2028, and U+2029. These are a
subset of space characters that match \h and \v. */ subset of space characters that match \h and \v. */
if ((options & PCRE2_EXTENDED) != 0) if ((options & PCRE2_EXTENDED) != 0)
{ {
if (c < 256 && (cb->ctypes[c] & ctype_space) != 0) continue; if (c < 256 && (cb->ctypes[c] & ctype_space) != 0) continue;
#ifdef SUPPORT_UNICODE #ifdef SUPPORT_UNICODE
if (c == CHAR_NEL || (c|1) == 0x200f || (c|1) == 0x2029) continue; if (c == CHAR_NEL || (c|1) == 0x200f || (c|1) == 0x2029) continue;
#endif #endif
if (c == CHAR_NUMBER_SIGN) if (c == CHAR_NUMBER_SIGN)
{ {
while (ptr < ptrend) while (ptr < ptrend)
@ -3590,6 +3593,8 @@ while (ptr < ptrend)
else else
{ {
BOOL hyphenok = TRUE; BOOL hyphenok = TRUE;
uint32_t oldoptions = options;
top_nest->reset_group = 0; top_nest->reset_group = 0;
top_nest->max_group = 0; top_nest->max_group = 0;
set = unset = 0; set = unset = 0;
@ -3602,7 +3607,7 @@ while (ptr < ptrend)
options &= ~(PCRE2_CASELESS|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE| options &= ~(PCRE2_CASELESS|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE|
PCRE2_DOTALL|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE); PCRE2_DOTALL|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE);
hyphenok = FALSE; hyphenok = FALSE;
ptr++; ptr++;
} }
while (ptr < ptrend && *ptr != CHAR_RIGHT_PARENTHESIS && while (ptr < ptrend && *ptr != CHAR_RIGHT_PARENTHESIS &&
@ -3618,7 +3623,7 @@ while (ptr < ptrend)
goto FAILED; goto FAILED;
} }
optset = &unset; optset = &unset;
hyphenok = FALSE; hyphenok = FALSE;
break; break;
case CHAR_J: /* Record that it changed in the external options */ case CHAR_J: /* Record that it changed in the external options */
@ -3677,10 +3682,9 @@ while (ptr < ptrend)
} }
else *parsed_pattern++ = META_NOCAPTURE; else *parsed_pattern++ = META_NOCAPTURE;
/* If nothing changed, no need to record. The check of hyphenok catches /* If nothing changed, no need to record. */
the (?^) case. */
if (set != 0 || unset != 0 || !hyphenok) if (options != oldoptions)
{ {
*parsed_pattern++ = META_OPTIONS; *parsed_pattern++ = META_OPTIONS;
*parsed_pattern++ = options; *parsed_pattern++ = options;

5
testdata/testinput1 vendored
View File

@ -2184,6 +2184,11 @@
Blah blah Blah blah
blaH blah blaH blah
/((?i)blah)\s+(?m)A(?i:\1)/
blah ABLAH
\= Expect no match
blah aBLAH
/(?>a*)*/ /(?>a*)*/
a a
aa aa

View File

@ -3346,6 +3346,14 @@ No match
0: blaH blah 0: blaH blah
1: blaH 1: blaH
/((?i)blah)\s+(?m)A(?i:\1)/
blah ABLAH
0: blah ABLAH
1: blah
\= Expect no match
blah aBLAH
No match
/(?>a*)*/ /(?>a*)*/
a a
0: a 0: a