Fix /x bug when pattern starts with whitespace followed by (?-x).
This commit is contained in:
parent
d71b70cdf7
commit
1f9b2a2e4b
|
@ -365,6 +365,12 @@ displaying fields containing NULLS:
|
||||||
(a) Within /x extended #-comments
|
(a) Within /x extended #-comments
|
||||||
(b) Within the "name" part of (*MARK) and other *verbs
|
(b) Within the "name" part of (*MARK) and other *verbs
|
||||||
(c) Within the text argument of a callout
|
(c) Within the text argument of a callout
|
||||||
|
|
||||||
|
108. If a pattern that was compiled with PCRE2_EXTENDED started with white
|
||||||
|
space or a #-type comment that was followed by (?-x), which turns off
|
||||||
|
PCRE2_EXTENDED, and there was no subsequent (?x) to turn it on again,
|
||||||
|
pcre2_compile() assumed that (?-x) applied to the whole pattern and
|
||||||
|
consequently mis-compiled it. This bug was found by the LLVM fuzzer.
|
||||||
|
|
||||||
|
|
||||||
Version 10.20 30-June-2015
|
Version 10.20 30-June-2015
|
||||||
|
|
|
@ -6862,44 +6862,16 @@ for (;; ptr++)
|
||||||
newoptions = (options | set) & (~unset);
|
newoptions = (options | set) & (~unset);
|
||||||
|
|
||||||
/* If the options ended with ')' this is not the start of a nested
|
/* If the options ended with ')' this is not the start of a nested
|
||||||
group with option changes, so the options change at this level. If this
|
group with option changes, so the options change at this level. They
|
||||||
item is right at the start of the pattern, the options can be
|
must also be passed back for use in subsequent branches. Reset the
|
||||||
abstracted and made external in the pre-compile phase, and ignored in
|
greedy defaults and the case value for firstcu and reqcu. */
|
||||||
the compile phase. This can be helpful when matching -- for instance in
|
|
||||||
caseless checking of required bytes.
|
|
||||||
|
|
||||||
If the code pointer is not (cb->start_code + 1 + LINK_SIZE), we are
|
|
||||||
definitely *not* at the start of the pattern because something has been
|
|
||||||
compiled. In the pre-compile phase, however, the code pointer can have
|
|
||||||
that value after the start, because it gets reset as code is discarded
|
|
||||||
during the pre-compile. However, this can happen only at top level - if
|
|
||||||
we are within parentheses, the starting BRA will still be present. At
|
|
||||||
any parenthesis level, the length value can be used to test if anything
|
|
||||||
has been compiled at that level. Thus, a test for both these conditions
|
|
||||||
is necessary to ensure we correctly detect the start of the pattern in
|
|
||||||
both phases.
|
|
||||||
|
|
||||||
If we are not at the pattern start, reset the greedy defaults and the
|
|
||||||
case value for firstcu and reqcu. */
|
|
||||||
|
|
||||||
if (*ptr == CHAR_RIGHT_PARENTHESIS)
|
if (*ptr == CHAR_RIGHT_PARENTHESIS)
|
||||||
{
|
{
|
||||||
if (code == cb->start_code + 1 + LINK_SIZE &&
|
|
||||||
(lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))
|
|
||||||
{
|
|
||||||
cb->external_options = newoptions;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
greedy_default = ((newoptions & PCRE2_UNGREEDY) != 0);
|
|
||||||
greedy_non_default = greedy_default ^ 1;
|
|
||||||
req_caseopt = ((newoptions & PCRE2_CASELESS) != 0)? REQ_CASELESS:0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Change options at this level, and pass them back for use
|
|
||||||
in subsequent branches. */
|
|
||||||
|
|
||||||
*optionsptr = options = newoptions;
|
*optionsptr = options = newoptions;
|
||||||
|
greedy_default = ((newoptions & PCRE2_UNGREEDY) != 0);
|
||||||
|
greedy_non_default = greedy_default ^ 1;
|
||||||
|
req_caseopt = ((newoptions & PCRE2_CASELESS) != 0)? REQ_CASELESS:0;
|
||||||
previous = NULL; /* This item can't be repeated */
|
previous = NULL; /* This item can't be repeated */
|
||||||
continue; /* It is complete */
|
continue; /* It is complete */
|
||||||
}
|
}
|
||||||
|
|
|
@ -4724,4 +4724,15 @@ a)"xI
|
||||||
# /A(?#X\x00Y)B/
|
# /A(?#X\x00Y)B/
|
||||||
/41 28 3f 23 7b 00 7d 29 42/B,hex
|
/41 28 3f 23 7b 00 7d 29 42/B,hex
|
||||||
|
|
||||||
|
# Tests for leading comment in extended patterns
|
||||||
|
|
||||||
|
/ (?-x):?/extended
|
||||||
|
|
||||||
|
/(?-x):?/extended
|
||||||
|
|
||||||
|
/0b 28 3f 2d 78 29 3a/hex,extended
|
||||||
|
|
||||||
|
/#comment
|
||||||
|
(?-x):?/extended
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
|
@ -431,8 +431,6 @@ Subject length lower bound = 2
|
||||||
|
|
||||||
/(?U)<.*>/I
|
/(?U)<.*>/I
|
||||||
Capturing subpattern count = 0
|
Capturing subpattern count = 0
|
||||||
Compile options: <none>
|
|
||||||
Overall options: ungreedy
|
|
||||||
First code unit = '<'
|
First code unit = '<'
|
||||||
Last code unit = '>'
|
Last code unit = '>'
|
||||||
Subject length lower bound = 2
|
Subject length lower bound = 2
|
||||||
|
@ -459,8 +457,6 @@ Subject length lower bound = 3
|
||||||
|
|
||||||
/(?U)={3,}?/I
|
/(?U)={3,}?/I
|
||||||
Capturing subpattern count = 0
|
Capturing subpattern count = 0
|
||||||
Compile options: <none>
|
|
||||||
Overall options: ungreedy
|
|
||||||
First code unit = '='
|
First code unit = '='
|
||||||
Last code unit = '='
|
Last code unit = '='
|
||||||
Subject length lower bound = 3
|
Subject length lower bound = 3
|
||||||
|
@ -494,8 +490,6 @@ Failed: error 125 at offset 12: lookbehind assertion is not fixed length
|
||||||
|
|
||||||
/(?i)abc/I
|
/(?i)abc/I
|
||||||
Capturing subpattern count = 0
|
Capturing subpattern count = 0
|
||||||
Compile options: <none>
|
|
||||||
Overall options: caseless
|
|
||||||
First code unit = 'a' (caseless)
|
First code unit = 'a' (caseless)
|
||||||
Last code unit = 'c' (caseless)
|
Last code unit = 'c' (caseless)
|
||||||
Subject length lower bound = 3
|
Subject length lower bound = 3
|
||||||
|
@ -508,7 +502,7 @@ Subject length lower bound = 1
|
||||||
/(?i)^1234/I
|
/(?i)^1234/I
|
||||||
Capturing subpattern count = 0
|
Capturing subpattern count = 0
|
||||||
Compile options: <none>
|
Compile options: <none>
|
||||||
Overall options: anchored caseless
|
Overall options: anchored
|
||||||
Subject length lower bound = 4
|
Subject length lower bound = 4
|
||||||
|
|
||||||
/(^b|(?i)^d)/I
|
/(^b|(?i)^d)/I
|
||||||
|
@ -521,7 +515,7 @@ Subject length lower bound = 1
|
||||||
Capturing subpattern count = 0
|
Capturing subpattern count = 0
|
||||||
May match empty string
|
May match empty string
|
||||||
Compile options: <none>
|
Compile options: <none>
|
||||||
Overall options: anchored dotall
|
Overall options: anchored
|
||||||
Subject length lower bound = 0
|
Subject length lower bound = 0
|
||||||
|
|
||||||
/[abcd]/I
|
/[abcd]/I
|
||||||
|
@ -531,15 +525,11 @@ Subject length lower bound = 1
|
||||||
|
|
||||||
/(?i)[abcd]/I
|
/(?i)[abcd]/I
|
||||||
Capturing subpattern count = 0
|
Capturing subpattern count = 0
|
||||||
Compile options: <none>
|
|
||||||
Overall options: caseless
|
|
||||||
Starting code units: A B C D a b c d
|
Starting code units: A B C D a b c d
|
||||||
Subject length lower bound = 1
|
Subject length lower bound = 1
|
||||||
|
|
||||||
/(?m)[xy]|(b|c)/I
|
/(?m)[xy]|(b|c)/I
|
||||||
Capturing subpattern count = 1
|
Capturing subpattern count = 1
|
||||||
Compile options: <none>
|
|
||||||
Overall options: multiline
|
|
||||||
Starting code units: b c x y
|
Starting code units: b c x y
|
||||||
Subject length lower bound = 1
|
Subject length lower bound = 1
|
||||||
|
|
||||||
|
@ -551,8 +541,7 @@ Subject length lower bound = 1
|
||||||
|
|
||||||
/(?i)(^a|^b)/Im
|
/(?i)(^a|^b)/Im
|
||||||
Capturing subpattern count = 1
|
Capturing subpattern count = 1
|
||||||
Compile options: multiline
|
Options: multiline
|
||||||
Overall options: caseless multiline
|
|
||||||
First code unit at start or follows newline
|
First code unit at start or follows newline
|
||||||
Subject length lower bound = 1
|
Subject length lower bound = 1
|
||||||
|
|
||||||
|
@ -1153,7 +1142,7 @@ Subject length lower bound = 1
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Capturing subpattern count = 1
|
Capturing subpattern count = 1
|
||||||
Compile options: <none>
|
Compile options: <none>
|
||||||
Overall options: anchored dotall
|
Overall options: anchored
|
||||||
Subject length lower bound = 1
|
Subject length lower bound = 1
|
||||||
|
|
||||||
/(?s:.*X|^B)/IB
|
/(?s:.*X|^B)/IB
|
||||||
|
@ -2682,8 +2671,7 @@ No match
|
||||||
End
|
End
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Capturing subpattern count = 0
|
Capturing subpattern count = 0
|
||||||
Compile options: extended
|
Options: extended
|
||||||
Overall options: caseless extended
|
|
||||||
First code unit = 'a' (caseless)
|
First code unit = 'a' (caseless)
|
||||||
Last code unit = 'c' (caseless)
|
Last code unit = 'c' (caseless)
|
||||||
Subject length lower bound = 3
|
Subject length lower bound = 3
|
||||||
|
@ -2697,8 +2685,7 @@ Subject length lower bound = 3
|
||||||
End
|
End
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Capturing subpattern count = 0
|
Capturing subpattern count = 0
|
||||||
Compile options: extended
|
Options: extended
|
||||||
Overall options: caseless extended
|
|
||||||
First code unit = 'a' (caseless)
|
First code unit = 'a' (caseless)
|
||||||
Last code unit = 'c' (caseless)
|
Last code unit = 'c' (caseless)
|
||||||
Subject length lower bound = 3
|
Subject length lower bound = 3
|
||||||
|
@ -3043,8 +3030,6 @@ Subject length lower bound = 3
|
||||||
End
|
End
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Capturing subpattern count = 0
|
Capturing subpattern count = 0
|
||||||
Compile options: <none>
|
|
||||||
Overall options: ungreedy
|
|
||||||
First code unit = 'x'
|
First code unit = 'x'
|
||||||
Last code unit = 'b'
|
Last code unit = 'b'
|
||||||
Subject length lower bound = 3
|
Subject length lower bound = 3
|
||||||
|
@ -3427,8 +3412,6 @@ Subject length lower bound = 1
|
||||||
|
|
||||||
/(?i)[ab]/I
|
/(?i)[ab]/I
|
||||||
Capturing subpattern count = 0
|
Capturing subpattern count = 0
|
||||||
Compile options: <none>
|
|
||||||
Overall options: caseless
|
|
||||||
Starting code units: A B a b
|
Starting code units: A B a b
|
||||||
Subject length lower bound = 1
|
Subject length lower bound = 1
|
||||||
|
|
||||||
|
@ -5841,7 +5824,7 @@ Named capturing subpatterns:
|
||||||
A 2
|
A 2
|
||||||
A 3
|
A 3
|
||||||
Compile options: <none>
|
Compile options: <none>
|
||||||
Overall options: anchored dupnames
|
Overall options: anchored
|
||||||
Duplicate name status changes
|
Duplicate name status changes
|
||||||
Subject length lower bound = 2
|
Subject length lower bound = 2
|
||||||
a1b\=copy=A
|
a1b\=copy=A
|
||||||
|
@ -13734,7 +13717,7 @@ Subject length lower bound = 1
|
||||||
/(*NO_DOTSTAR_ANCHOR)(?s).*\d/info
|
/(*NO_DOTSTAR_ANCHOR)(?s).*\d/info
|
||||||
Capturing subpattern count = 0
|
Capturing subpattern count = 0
|
||||||
Compile options: <none>
|
Compile options: <none>
|
||||||
Overall options: dotall no_dotstar_anchor
|
Overall options: no_dotstar_anchor
|
||||||
Subject length lower bound = 1
|
Subject length lower bound = 1
|
||||||
|
|
||||||
'^(?:(a)|b)(?(1)A|B)'
|
'^(?:(a)|b)(?(1)A|B)'
|
||||||
|
@ -15060,4 +15043,15 @@ Subject length lower bound = 0
|
||||||
End
|
End
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Tests for leading comment in extended patterns
|
||||||
|
|
||||||
|
/ (?-x):?/extended
|
||||||
|
|
||||||
|
/(?-x):?/extended
|
||||||
|
|
||||||
|
/0b 28 3f 2d 78 29 3a/hex,extended
|
||||||
|
|
||||||
|
/#comment
|
||||||
|
(?-x):?/extended
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
Loading…
Reference in New Issue