Fix dynamic options changing bug.

This commit is contained in:
Philip.Hazel 2018-08-04 08:20:18 +00:00
parent c722bf2399
commit 9332d4be69
4 changed files with 40 additions and 16 deletions

View File

@ -140,6 +140,13 @@ generated by pcre2_maketables(), which uses isspace() to identify white space.
Now, when Unicode support is compiled, PCRE2_EXTENDED also discards U+0085,
U+200E, U+200F, U+2028, and U+2029, which are additional characters defined by
Unicode as "Pattern White Space". This makes PCRE2 compatible with Perl.
32. In certain circumstances, option settings within patterns were not being
correctly processed. For example, the pattern /((?i)A)(?m)B/ incorrectly
matched "ab". (The (?m) setting lost the fact that (?i) should be reset at the
end of its group during the parse process, but without another setting such as
(?m) the compile phase got it right.) This bug was introduced by the
refactoring in release 10.23.
Version 10.31 12-February-2018

View File

@ -2284,11 +2284,14 @@ typedef struct nest_save {
#define NSF_RESET 0x0001u
#define NSF_CONDASSERT 0x0002u
/* Of the options that are changeable within the pattern, these are tracked
during parsing. The rest are used from META_OPTIONS items when compiling. */
/* Options that are changeable within the pattern must be tracked during
parsing. Some (e.g. PCRE2_EXTENDED) are implemented entirely during parsing,
but all must be tracked so that META_OPTIONS items set the correct values for
the main compiling phase. */
#define PARSE_TRACKED_OPTIONS \
(PCRE2_DUPNAMES|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_NO_AUTO_CAPTURE)
#define PARSE_TRACKED_OPTIONS (PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_DUPNAMES| \
PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE| \
PCRE2_UNGREEDY)
/* States used for analyzing ranges in character classes. The two OK values
must be last. */
@ -2468,16 +2471,16 @@ while (ptr < ptrend)
/* EITHER: not both options set */
((options & (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) !=
(PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) ||
#ifdef SUPPORT_UNICODE
#ifdef SUPPORT_UNICODE
/* OR: character > 255 AND not Unicode Pattern White Space */
(c > 255 && (c|1) != 0x200f && (c|1) != 0x2029) ||
#endif
#endif
/* OR: not a # comment or isspace() white space */
(c < 256 && c != CHAR_NUMBER_SIGN && (cb->ctypes[c] & ctype_space) == 0
#ifdef SUPPORT_UNICODE
/* and not CHAR_NEL when Unicode is supported */
&& c != CHAR_NEL
#endif
#endif
)))
{
PCRE2_SIZE verbnamelength;
@ -2562,16 +2565,16 @@ while (ptr < ptrend)
character, not a code unit, so we must not use MAX_255 to test its size
because MAX_255 tests code units and is assumed TRUE in 8-bit mode. The
whitespace characters are those designated as "Pattern White Space" by
Unicode, which are the isspace() characters plus CHAR_NEL (newline), which is
U+0085 in Unicode, plus U+200E, U+200F, U+2028, and U+2029. These are a
Unicode, which are the isspace() characters plus CHAR_NEL (newline), which is
U+0085 in Unicode, plus U+200E, U+200F, U+2028, and U+2029. These are a
subset of space characters that match \h and \v. */
if ((options & PCRE2_EXTENDED) != 0)
{
if (c < 256 && (cb->ctypes[c] & ctype_space) != 0) continue;
#ifdef SUPPORT_UNICODE
#ifdef SUPPORT_UNICODE
if (c == CHAR_NEL || (c|1) == 0x200f || (c|1) == 0x2029) continue;
#endif
#endif
if (c == CHAR_NUMBER_SIGN)
{
while (ptr < ptrend)
@ -3590,6 +3593,8 @@ while (ptr < ptrend)
else
{
BOOL hyphenok = TRUE;
uint32_t oldoptions = options;
top_nest->reset_group = 0;
top_nest->max_group = 0;
set = unset = 0;
@ -3602,7 +3607,7 @@ while (ptr < ptrend)
options &= ~(PCRE2_CASELESS|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE|
PCRE2_DOTALL|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE);
hyphenok = FALSE;
ptr++;
ptr++;
}
while (ptr < ptrend && *ptr != CHAR_RIGHT_PARENTHESIS &&
@ -3618,7 +3623,7 @@ while (ptr < ptrend)
goto FAILED;
}
optset = &unset;
hyphenok = FALSE;
hyphenok = FALSE;
break;
case CHAR_J: /* Record that it changed in the external options */
@ -3677,10 +3682,9 @@ while (ptr < ptrend)
}
else *parsed_pattern++ = META_NOCAPTURE;
/* If nothing changed, no need to record. The check of hyphenok catches
the (?^) case. */
/* If nothing changed, no need to record. */
if (set != 0 || unset != 0 || !hyphenok)
if (options != oldoptions)
{
*parsed_pattern++ = META_OPTIONS;
*parsed_pattern++ = options;

5
testdata/testinput1 vendored
View File

@ -2184,6 +2184,11 @@
Blah blah
blaH blah
/((?i)blah)\s+(?m)A(?i:\1)/
blah ABLAH
\= Expect no match
blah aBLAH
/(?>a*)*/
a
aa

View File

@ -3346,6 +3346,14 @@ No match
0: blaH blah
1: blaH
/((?i)blah)\s+(?m)A(?i:\1)/
blah ABLAH
0: blah ABLAH
1: blah
\= Expect no match
blah aBLAH
No match
/(?>a*)*/
a
0: a