Fix /x bug when pattern starts with whitespace followed by (?-x).
This commit is contained in:
parent
d71b70cdf7
commit
1f9b2a2e4b
|
@ -365,6 +365,12 @@ displaying fields containing NULLS:
|
|||
(a) Within /x extended #-comments
|
||||
(b) Within the "name" part of (*MARK) and other *verbs
|
||||
(c) Within the text argument of a callout
|
||||
|
||||
108. If a pattern that was compiled with PCRE2_EXTENDED started with white
|
||||
space or a #-type comment that was followed by (?-x), which turns off
|
||||
PCRE2_EXTENDED, and there was no subsequent (?x) to turn it on again,
|
||||
pcre2_compile() assumed that (?-x) applied to the whole pattern and
|
||||
consequently mis-compiled it. This bug was found by the LLVM fuzzer.
|
||||
|
||||
|
||||
Version 10.20 30-June-2015
|
||||
|
|
|
@ -6862,44 +6862,16 @@ for (;; ptr++)
|
|||
newoptions = (options | set) & (~unset);
|
||||
|
||||
/* If the options ended with ')' this is not the start of a nested
|
||||
group with option changes, so the options change at this level. If this
|
||||
item is right at the start of the pattern, the options can be
|
||||
abstracted and made external in the pre-compile phase, and ignored in
|
||||
the compile phase. This can be helpful when matching -- for instance in
|
||||
caseless checking of required bytes.
|
||||
|
||||
If the code pointer is not (cb->start_code + 1 + LINK_SIZE), we are
|
||||
definitely *not* at the start of the pattern because something has been
|
||||
compiled. In the pre-compile phase, however, the code pointer can have
|
||||
that value after the start, because it gets reset as code is discarded
|
||||
during the pre-compile. However, this can happen only at top level - if
|
||||
we are within parentheses, the starting BRA will still be present. At
|
||||
any parenthesis level, the length value can be used to test if anything
|
||||
has been compiled at that level. Thus, a test for both these conditions
|
||||
is necessary to ensure we correctly detect the start of the pattern in
|
||||
both phases.
|
||||
|
||||
If we are not at the pattern start, reset the greedy defaults and the
|
||||
case value for firstcu and reqcu. */
|
||||
group with option changes, so the options change at this level. They
|
||||
must also be passed back for use in subsequent branches. Reset the
|
||||
greedy defaults and the case value for firstcu and reqcu. */
|
||||
|
||||
if (*ptr == CHAR_RIGHT_PARENTHESIS)
|
||||
{
|
||||
if (code == cb->start_code + 1 + LINK_SIZE &&
|
||||
(lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))
|
||||
{
|
||||
cb->external_options = newoptions;
|
||||
}
|
||||
else
|
||||
{
|
||||
greedy_default = ((newoptions & PCRE2_UNGREEDY) != 0);
|
||||
greedy_non_default = greedy_default ^ 1;
|
||||
req_caseopt = ((newoptions & PCRE2_CASELESS) != 0)? REQ_CASELESS:0;
|
||||
}
|
||||
|
||||
/* Change options at this level, and pass them back for use
|
||||
in subsequent branches. */
|
||||
|
||||
*optionsptr = options = newoptions;
|
||||
greedy_default = ((newoptions & PCRE2_UNGREEDY) != 0);
|
||||
greedy_non_default = greedy_default ^ 1;
|
||||
req_caseopt = ((newoptions & PCRE2_CASELESS) != 0)? REQ_CASELESS:0;
|
||||
previous = NULL; /* This item can't be repeated */
|
||||
continue; /* It is complete */
|
||||
}
|
||||
|
|
|
@ -4724,4 +4724,15 @@ a)"xI
|
|||
# /A(?#X\x00Y)B/
|
||||
/41 28 3f 23 7b 00 7d 29 42/B,hex
|
||||
|
||||
# Tests for leading comment in extended patterns
|
||||
|
||||
/ (?-x):?/extended
|
||||
|
||||
/(?-x):?/extended
|
||||
|
||||
/0b 28 3f 2d 78 29 3a/hex,extended
|
||||
|
||||
/#comment
|
||||
(?-x):?/extended
|
||||
|
||||
# End of testinput2
|
||||
|
|
|
@ -431,8 +431,6 @@ Subject length lower bound = 2
|
|||
|
||||
/(?U)<.*>/I
|
||||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: ungreedy
|
||||
First code unit = '<'
|
||||
Last code unit = '>'
|
||||
Subject length lower bound = 2
|
||||
|
@ -459,8 +457,6 @@ Subject length lower bound = 3
|
|||
|
||||
/(?U)={3,}?/I
|
||||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: ungreedy
|
||||
First code unit = '='
|
||||
Last code unit = '='
|
||||
Subject length lower bound = 3
|
||||
|
@ -494,8 +490,6 @@ Failed: error 125 at offset 12: lookbehind assertion is not fixed length
|
|||
|
||||
/(?i)abc/I
|
||||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: caseless
|
||||
First code unit = 'a' (caseless)
|
||||
Last code unit = 'c' (caseless)
|
||||
Subject length lower bound = 3
|
||||
|
@ -508,7 +502,7 @@ Subject length lower bound = 1
|
|||
/(?i)^1234/I
|
||||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored caseless
|
||||
Overall options: anchored
|
||||
Subject length lower bound = 4
|
||||
|
||||
/(^b|(?i)^d)/I
|
||||
|
@ -521,7 +515,7 @@ Subject length lower bound = 1
|
|||
Capturing subpattern count = 0
|
||||
May match empty string
|
||||
Compile options: <none>
|
||||
Overall options: anchored dotall
|
||||
Overall options: anchored
|
||||
Subject length lower bound = 0
|
||||
|
||||
/[abcd]/I
|
||||
|
@ -531,15 +525,11 @@ Subject length lower bound = 1
|
|||
|
||||
/(?i)[abcd]/I
|
||||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: caseless
|
||||
Starting code units: A B C D a b c d
|
||||
Subject length lower bound = 1
|
||||
|
||||
/(?m)[xy]|(b|c)/I
|
||||
Capturing subpattern count = 1
|
||||
Compile options: <none>
|
||||
Overall options: multiline
|
||||
Starting code units: b c x y
|
||||
Subject length lower bound = 1
|
||||
|
||||
|
@ -551,8 +541,7 @@ Subject length lower bound = 1
|
|||
|
||||
/(?i)(^a|^b)/Im
|
||||
Capturing subpattern count = 1
|
||||
Compile options: multiline
|
||||
Overall options: caseless multiline
|
||||
Options: multiline
|
||||
First code unit at start or follows newline
|
||||
Subject length lower bound = 1
|
||||
|
||||
|
@ -1153,7 +1142,7 @@ Subject length lower bound = 1
|
|||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 1
|
||||
Compile options: <none>
|
||||
Overall options: anchored dotall
|
||||
Overall options: anchored
|
||||
Subject length lower bound = 1
|
||||
|
||||
/(?s:.*X|^B)/IB
|
||||
|
@ -2682,8 +2671,7 @@ No match
|
|||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Compile options: extended
|
||||
Overall options: caseless extended
|
||||
Options: extended
|
||||
First code unit = 'a' (caseless)
|
||||
Last code unit = 'c' (caseless)
|
||||
Subject length lower bound = 3
|
||||
|
@ -2697,8 +2685,7 @@ Subject length lower bound = 3
|
|||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Compile options: extended
|
||||
Overall options: caseless extended
|
||||
Options: extended
|
||||
First code unit = 'a' (caseless)
|
||||
Last code unit = 'c' (caseless)
|
||||
Subject length lower bound = 3
|
||||
|
@ -3043,8 +3030,6 @@ Subject length lower bound = 3
|
|||
End
|
||||
------------------------------------------------------------------
|
||||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: ungreedy
|
||||
First code unit = 'x'
|
||||
Last code unit = 'b'
|
||||
Subject length lower bound = 3
|
||||
|
@ -3427,8 +3412,6 @@ Subject length lower bound = 1
|
|||
|
||||
/(?i)[ab]/I
|
||||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: caseless
|
||||
Starting code units: A B a b
|
||||
Subject length lower bound = 1
|
||||
|
||||
|
@ -5841,7 +5824,7 @@ Named capturing subpatterns:
|
|||
A 2
|
||||
A 3
|
||||
Compile options: <none>
|
||||
Overall options: anchored dupnames
|
||||
Overall options: anchored
|
||||
Duplicate name status changes
|
||||
Subject length lower bound = 2
|
||||
a1b\=copy=A
|
||||
|
@ -13734,7 +13717,7 @@ Subject length lower bound = 1
|
|||
/(*NO_DOTSTAR_ANCHOR)(?s).*\d/info
|
||||
Capturing subpattern count = 0
|
||||
Compile options: <none>
|
||||
Overall options: dotall no_dotstar_anchor
|
||||
Overall options: no_dotstar_anchor
|
||||
Subject length lower bound = 1
|
||||
|
||||
'^(?:(a)|b)(?(1)A|B)'
|
||||
|
@ -15060,4 +15043,15 @@ Subject length lower bound = 0
|
|||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
# Tests for leading comment in extended patterns
|
||||
|
||||
/ (?-x):?/extended
|
||||
|
||||
/(?-x):?/extended
|
||||
|
||||
/0b 28 3f 2d 78 29 3a/hex,extended
|
||||
|
||||
/#comment
|
||||
(?-x):?/extended
|
||||
|
||||
# End of testinput2
|
||||
|
|
Loading…
Reference in New Issue