Don't ignore {1}+ when it is applied to a parenthesized item.

This commit is contained in:
Philip.Hazel 2019-06-19 16:27:50 +00:00
parent ef79b978a6
commit da5155fed3
4 changed files with 57 additions and 11 deletions

View File

@ -61,6 +61,11 @@ addition (a) the default limit for groups requested by -o<n> has been raised to
50, (b) the new --om-capture option changes the limit, (c) an error is raised
if -o asks for a group that is above the limit.
12. The quantifier {1} was always being ignored, but this is incorrect when it
is made possessive and applied to an item in parentheses, because a
parenthesized item may contain multiple branches or other backtracking points,
for example /(a|ab){1}+c/ or /(a+){1}+a/.
Version 10.33 16-April-2019
---------------------------

View File

@ -6758,10 +6758,6 @@ for (;; pptr++)
reqvary = (repeat_min == repeat_max)? 0 : REQ_VARY;
op_type = 0;
/* If the repeat is {1} we can ignore it. */
if (repeat_max == 1 && repeat_min == 1) goto END_REPEAT;
/* Adjust first and required code units for a zero repeat. */
if (repeat_min == 0)
@ -6804,7 +6800,10 @@ for (;; pptr++)
tempcode = previous;
op_previous = *previous;
/* Now handle repetition for the different types of item. */
/* Now handle repetition for the different types of item. If the repeat
minimum and the repeat maximum are both 1, we can ignore the quantifier for
non-parenthesized items, as they have only one alternative. For anything in
parentheses, we must not ignore if {1} is possessive. */
switch (op_previous)
{
@ -6818,6 +6817,7 @@ for (;; pptr++)
case OP_CHARI:
case OP_NOT:
case OP_NOTI:
if (repeat_max == 1 && repeat_min == 1) goto END_REPEAT;
op_type = chartypeoffset[op_previous - OP_CHAR];
/* Deal with UTF characters that take up more than one code unit. */
@ -6864,6 +6864,7 @@ for (;; pptr++)
code = previous;
goto END_REPEAT;
}
if (repeat_max == 1 && repeat_min == 1) goto END_REPEAT;
if (repeat_min == 0 && repeat_max == REPEAT_UNLIMITED)
*code++ = OP_CRSTAR + repeat_type;
@ -6898,6 +6899,8 @@ for (;; pptr++)
repetition. */
case OP_RECURSE:
if (repeat_max == 1 && repeat_min == 1 && !possessive_quantifier)
goto END_REPEAT;
/* Generate unwrapped repeats for a non-zero minimum, except when the
minimum is 1 and the maximum unlimited, because that can be handled with
@ -6980,6 +6983,9 @@ for (;; pptr++)
PCRE2_UCHAR *bralink = NULL;
PCRE2_UCHAR *brazeroptr = NULL;
if (repeat_max == 1 && repeat_min == 1 && !possessive_quantifier)
goto END_REPEAT;
/* Repeating a DEFINE group (or any group where the condition is always
FALSE and there is only one branch) is pointless, but Perl allows the
syntax, so we just ignore the repeat. */
@ -7196,11 +7202,12 @@ for (;; pptr++)
and SCRIPT_RUN groups at runtime, but in a different way.]
Then, if the quantifier was possessive and the bracket is not a
conditional, we convert the BRA code to the POS form, and the KET code to
KETRPOS. (It turns out to be convenient at runtime to detect this kind of
subpattern at both the start and at the end.) The use of special opcodes
makes it possible to reduce greatly the stack usage in pcre2_match(). If
the group is preceded by OP_BRAZERO, convert this to OP_BRAPOSZERO.
conditional, we convert the BRA code to the POS form, and the KET code
to KETRPOS. (It turns out to be convenient at runtime to detect this
kind of subpattern at both the start and at the end.) The use of
special opcodes makes it possible to reduce greatly the stack usage in
pcre2_match(). If the group is preceded by OP_BRAZERO, convert this to
OP_BRAPOSZERO.
Then, if the minimum number of matches is 1 or 0, cancel the possessive
flag so that the default action below, of wrapping everything inside
@ -7301,6 +7308,8 @@ for (;; pptr++)
int prop_type, prop_value;
PCRE2_UCHAR *oldcode;
if (repeat_max == 1 && repeat_min == 1) goto END_REPEAT;
op_type = OP_TYPESTAR - OP_STAR; /* Use type opcodes */
mclength = 0; /* Not a character */

14
testdata/testinput1 vendored
View File

@ -6351,4 +6351,18 @@ ef) x/x,mark
acb
abc
/(?:a|ab){1}+c/
\= Expect no match
abc
/(a|ab){1}+c/
abc
/(a+){1}+a/
\= Expect no match
aaaa
/(?(DEFINE)(a|ab))(?1){1}+c/
abc
# End of testinput1

18
testdata/testoutput1 vendored
View File

@ -10063,4 +10063,22 @@ MK: 2
0: a
MK: 2
/(?:a|ab){1}+c/
\= Expect no match
abc
No match
/(a|ab){1}+c/
abc
No match
/(a+){1}+a/
\= Expect no match
aaaa
No match
/(?(DEFINE)(a|ab))(?1){1}+c/
abc
No match
# End of testinput1