Don't ignore {1}+ when it is applied to a parenthesized item.
This commit is contained in:
parent
ef79b978a6
commit
da5155fed3
|
@ -61,6 +61,11 @@ addition (a) the default limit for groups requested by -o<n> has been raised to
|
||||||
50, (b) the new --om-capture option changes the limit, (c) an error is raised
|
50, (b) the new --om-capture option changes the limit, (c) an error is raised
|
||||||
if -o asks for a group that is above the limit.
|
if -o asks for a group that is above the limit.
|
||||||
|
|
||||||
|
12. The quantifier {1} was always being ignored, but this is incorrect when it
|
||||||
|
is made possessive and applied to an item in parentheses, because a
|
||||||
|
parenthesized item may contain multiple branches or other backtracking points,
|
||||||
|
for example /(a|ab){1}+c/ or /(a+){1}+a/.
|
||||||
|
|
||||||
|
|
||||||
Version 10.33 16-April-2019
|
Version 10.33 16-April-2019
|
||||||
---------------------------
|
---------------------------
|
||||||
|
|
|
@ -6758,10 +6758,6 @@ for (;; pptr++)
|
||||||
reqvary = (repeat_min == repeat_max)? 0 : REQ_VARY;
|
reqvary = (repeat_min == repeat_max)? 0 : REQ_VARY;
|
||||||
op_type = 0;
|
op_type = 0;
|
||||||
|
|
||||||
/* If the repeat is {1} we can ignore it. */
|
|
||||||
|
|
||||||
if (repeat_max == 1 && repeat_min == 1) goto END_REPEAT;
|
|
||||||
|
|
||||||
/* Adjust first and required code units for a zero repeat. */
|
/* Adjust first and required code units for a zero repeat. */
|
||||||
|
|
||||||
if (repeat_min == 0)
|
if (repeat_min == 0)
|
||||||
|
@ -6804,7 +6800,10 @@ for (;; pptr++)
|
||||||
tempcode = previous;
|
tempcode = previous;
|
||||||
op_previous = *previous;
|
op_previous = *previous;
|
||||||
|
|
||||||
/* Now handle repetition for the different types of item. */
|
/* Now handle repetition for the different types of item. If the repeat
|
||||||
|
minimum and the repeat maximum are both 1, we can ignore the quantifier for
|
||||||
|
non-parenthesized items, as they have only one alternative. For anything in
|
||||||
|
parentheses, we must not ignore if {1} is possessive. */
|
||||||
|
|
||||||
switch (op_previous)
|
switch (op_previous)
|
||||||
{
|
{
|
||||||
|
@ -6818,6 +6817,7 @@ for (;; pptr++)
|
||||||
case OP_CHARI:
|
case OP_CHARI:
|
||||||
case OP_NOT:
|
case OP_NOT:
|
||||||
case OP_NOTI:
|
case OP_NOTI:
|
||||||
|
if (repeat_max == 1 && repeat_min == 1) goto END_REPEAT;
|
||||||
op_type = chartypeoffset[op_previous - OP_CHAR];
|
op_type = chartypeoffset[op_previous - OP_CHAR];
|
||||||
|
|
||||||
/* Deal with UTF characters that take up more than one code unit. */
|
/* Deal with UTF characters that take up more than one code unit. */
|
||||||
|
@ -6864,6 +6864,7 @@ for (;; pptr++)
|
||||||
code = previous;
|
code = previous;
|
||||||
goto END_REPEAT;
|
goto END_REPEAT;
|
||||||
}
|
}
|
||||||
|
if (repeat_max == 1 && repeat_min == 1) goto END_REPEAT;
|
||||||
|
|
||||||
if (repeat_min == 0 && repeat_max == REPEAT_UNLIMITED)
|
if (repeat_min == 0 && repeat_max == REPEAT_UNLIMITED)
|
||||||
*code++ = OP_CRSTAR + repeat_type;
|
*code++ = OP_CRSTAR + repeat_type;
|
||||||
|
@ -6898,6 +6899,8 @@ for (;; pptr++)
|
||||||
repetition. */
|
repetition. */
|
||||||
|
|
||||||
case OP_RECURSE:
|
case OP_RECURSE:
|
||||||
|
if (repeat_max == 1 && repeat_min == 1 && !possessive_quantifier)
|
||||||
|
goto END_REPEAT;
|
||||||
|
|
||||||
/* Generate unwrapped repeats for a non-zero minimum, except when the
|
/* Generate unwrapped repeats for a non-zero minimum, except when the
|
||||||
minimum is 1 and the maximum unlimited, because that can be handled with
|
minimum is 1 and the maximum unlimited, because that can be handled with
|
||||||
|
@ -6980,6 +6983,9 @@ for (;; pptr++)
|
||||||
PCRE2_UCHAR *bralink = NULL;
|
PCRE2_UCHAR *bralink = NULL;
|
||||||
PCRE2_UCHAR *brazeroptr = NULL;
|
PCRE2_UCHAR *brazeroptr = NULL;
|
||||||
|
|
||||||
|
if (repeat_max == 1 && repeat_min == 1 && !possessive_quantifier)
|
||||||
|
goto END_REPEAT;
|
||||||
|
|
||||||
/* Repeating a DEFINE group (or any group where the condition is always
|
/* Repeating a DEFINE group (or any group where the condition is always
|
||||||
FALSE and there is only one branch) is pointless, but Perl allows the
|
FALSE and there is only one branch) is pointless, but Perl allows the
|
||||||
syntax, so we just ignore the repeat. */
|
syntax, so we just ignore the repeat. */
|
||||||
|
@ -7196,11 +7202,12 @@ for (;; pptr++)
|
||||||
and SCRIPT_RUN groups at runtime, but in a different way.]
|
and SCRIPT_RUN groups at runtime, but in a different way.]
|
||||||
|
|
||||||
Then, if the quantifier was possessive and the bracket is not a
|
Then, if the quantifier was possessive and the bracket is not a
|
||||||
conditional, we convert the BRA code to the POS form, and the KET code to
|
conditional, we convert the BRA code to the POS form, and the KET code
|
||||||
KETRPOS. (It turns out to be convenient at runtime to detect this kind of
|
to KETRPOS. (It turns out to be convenient at runtime to detect this
|
||||||
subpattern at both the start and at the end.) The use of special opcodes
|
kind of subpattern at both the start and at the end.) The use of
|
||||||
makes it possible to reduce greatly the stack usage in pcre2_match(). If
|
special opcodes makes it possible to reduce greatly the stack usage in
|
||||||
the group is preceded by OP_BRAZERO, convert this to OP_BRAPOSZERO.
|
pcre2_match(). If the group is preceded by OP_BRAZERO, convert this to
|
||||||
|
OP_BRAPOSZERO.
|
||||||
|
|
||||||
Then, if the minimum number of matches is 1 or 0, cancel the possessive
|
Then, if the minimum number of matches is 1 or 0, cancel the possessive
|
||||||
flag so that the default action below, of wrapping everything inside
|
flag so that the default action below, of wrapping everything inside
|
||||||
|
@ -7301,6 +7308,8 @@ for (;; pptr++)
|
||||||
int prop_type, prop_value;
|
int prop_type, prop_value;
|
||||||
PCRE2_UCHAR *oldcode;
|
PCRE2_UCHAR *oldcode;
|
||||||
|
|
||||||
|
if (repeat_max == 1 && repeat_min == 1) goto END_REPEAT;
|
||||||
|
|
||||||
op_type = OP_TYPESTAR - OP_STAR; /* Use type opcodes */
|
op_type = OP_TYPESTAR - OP_STAR; /* Use type opcodes */
|
||||||
mclength = 0; /* Not a character */
|
mclength = 0; /* Not a character */
|
||||||
|
|
||||||
|
|
|
@ -6351,4 +6351,18 @@ ef) x/x,mark
|
||||||
acb
|
acb
|
||||||
abc
|
abc
|
||||||
|
|
||||||
|
/(?:a|ab){1}+c/
|
||||||
|
\= Expect no match
|
||||||
|
abc
|
||||||
|
|
||||||
|
/(a|ab){1}+c/
|
||||||
|
abc
|
||||||
|
|
||||||
|
/(a+){1}+a/
|
||||||
|
\= Expect no match
|
||||||
|
aaaa
|
||||||
|
|
||||||
|
/(?(DEFINE)(a|ab))(?1){1}+c/
|
||||||
|
abc
|
||||||
|
|
||||||
# End of testinput1
|
# End of testinput1
|
||||||
|
|
|
@ -10063,4 +10063,22 @@ MK: 2
|
||||||
0: a
|
0: a
|
||||||
MK: 2
|
MK: 2
|
||||||
|
|
||||||
|
/(?:a|ab){1}+c/
|
||||||
|
\= Expect no match
|
||||||
|
abc
|
||||||
|
No match
|
||||||
|
|
||||||
|
/(a|ab){1}+c/
|
||||||
|
abc
|
||||||
|
No match
|
||||||
|
|
||||||
|
/(a+){1}+a/
|
||||||
|
\= Expect no match
|
||||||
|
aaaa
|
||||||
|
No match
|
||||||
|
|
||||||
|
/(?(DEFINE)(a|ab))(?1){1}+c/
|
||||||
|
abc
|
||||||
|
No match
|
||||||
|
|
||||||
# End of testinput1
|
# End of testinput1
|
||||||
|
|
Loading…
Reference in New Issue