Fix auto-possessification bug at the end of a capturing group that is called
recursively.
This commit is contained in:
parent
7a5b962509
commit
53a588431c
|
@ -134,6 +134,15 @@ groups, making the ovector larger than this. The number has been increased to
|
|||
131072, which allows for the maximum number of captures (65535) plus the
|
||||
overall match. This fixes oss-fuzz issue 5415.
|
||||
|
||||
31. Auto-possessification at the end of a capturing group was dependent on what
|
||||
follows the group (e.g. /(a+)b/ would auto-possessify the a+) but this caused
|
||||
incorrect behaviour when the group was called recursively from elsewhere in the
|
||||
pattern where something different might follow. This bug is an unforseen
|
||||
consequence of change #1 for 10.30 - the implementation of backtracking into
|
||||
recursions. Iterators at the ends of capturing groups are no longer considered
|
||||
for auto-possessification if the pattern contains any recursions. Fixes
|
||||
Bugzilla #2232.
|
||||
|
||||
|
||||
Version 10.30 14-August-2017
|
||||
----------------------------
|
||||
|
|
|
@ -558,47 +558,73 @@ for(;;)
|
|||
continue;
|
||||
}
|
||||
|
||||
/* At the end of a branch, skip to the end of the group. */
|
||||
|
||||
if (c == OP_ALT)
|
||||
{
|
||||
do code += GET(code, 1); while (*code == OP_ALT);
|
||||
c = *code;
|
||||
}
|
||||
|
||||
/* Inspect the next opcode. */
|
||||
|
||||
switch(c)
|
||||
{
|
||||
case OP_END:
|
||||
case OP_KETRPOS:
|
||||
/* TRUE only in greedy case. The non-greedy case could be replaced by
|
||||
an OP_EXACT, but it is probably not worth it. (And note that OP_EXACT
|
||||
uses more memory, which we cannot get at this stage.) */
|
||||
/* We can always possessify a greedy iterator at the end of the pattern,
|
||||
which is reached after skipping over the final OP_KET. A non-greedy
|
||||
iterator must never be possessified. */
|
||||
|
||||
case OP_END:
|
||||
return base_list[1] != 0;
|
||||
|
||||
/* When an iterator is at the end of certain kinds of group we can inspect
|
||||
what follows the group by skipping over the closing ket. Note that this
|
||||
does not apply to OP_KETRMAX or OP_KETRMIN because what follows any given
|
||||
iteration is variable (could be another iteration or could be the next
|
||||
item). As these two opcodes are not listed in the next switch, they will
|
||||
end up as the next code to inspect, and return FALSE by virtue of being
|
||||
unsupported. */
|
||||
|
||||
case OP_KET:
|
||||
/* If the bracket is capturing, and referenced by an OP_RECURSE, or
|
||||
it is an atomic sub-pattern (assert, once, etc.) the non-greedy case
|
||||
cannot be converted to a possessive form. */
|
||||
case OP_KETRPOS:
|
||||
/* The non-greedy case cannot be converted to a possessive form. */
|
||||
|
||||
if (base_list[1] == 0) return FALSE;
|
||||
|
||||
/* If the bracket is capturing it might be referenced by an OP_RECURSE
|
||||
so its last iterator can never be possessified if the pattern contains
|
||||
recursions. (This could be improved by keeping a list of group numbers that
|
||||
are called by recursion.) */
|
||||
|
||||
switch(*(code - GET(code, 1)))
|
||||
{
|
||||
case OP_CBRA:
|
||||
case OP_SCBRA:
|
||||
case OP_CBRAPOS:
|
||||
case OP_SCBRAPOS:
|
||||
if (cb->had_recurse) return FALSE;
|
||||
break;
|
||||
|
||||
/* Atomic sub-patterns and assertions can always auto-possessify their
|
||||
last iterator. However, if the group was entered as a result of checking
|
||||
a previous iterator, this is not possible. */
|
||||
|
||||
case OP_ASSERT:
|
||||
case OP_ASSERT_NOT:
|
||||
case OP_ASSERTBACK:
|
||||
case OP_ASSERTBACK_NOT:
|
||||
case OP_ONCE:
|
||||
|
||||
/* Atomic sub-patterns and assertions can always auto-possessify their
|
||||
last iterator. However, if the group was entered as a result of checking
|
||||
a previous iterator, this is not possible. */
|
||||
|
||||
return !entered_a_group;
|
||||
}
|
||||
|
||||
/* Skip over the bracket and inspect what comes next. */
|
||||
|
||||
code += PRIV(OP_lengths)[c];
|
||||
continue;
|
||||
|
||||
/* Handle cases where the next item is a group. */
|
||||
|
||||
case OP_ONCE:
|
||||
case OP_BRA:
|
||||
case OP_CBRA:
|
||||
|
@ -637,11 +663,15 @@ for(;;)
|
|||
code += PRIV(OP_lengths)[c];
|
||||
continue;
|
||||
|
||||
/* The next opcode does not need special handling; fall through and use it
|
||||
to see if the base can be possessified. */
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* Check for a supported opcode, and load its properties. */
|
||||
/* We now have the next appropriate opcode to compare with the base. Check
|
||||
for a supported opcode, and load its properties. */
|
||||
|
||||
code = get_chr_property_list(code, utf, cb->fcc, list);
|
||||
if (code == NULL) return FALSE; /* Unsupported */
|
||||
|
|
|
@ -6159,4 +6159,34 @@ ef) x/x,mark
|
|||
/((?<=((*ACCEPT))X)\1?Y(*ACCEPT))\1/
|
||||
XYYZ
|
||||
|
||||
/(?(DEFINE)(?<optional_a>a?)X)^(?&optional_a)a$/
|
||||
aa
|
||||
a
|
||||
|
||||
/^(a?)b(?1)a/
|
||||
abaa
|
||||
aba
|
||||
baa
|
||||
ba
|
||||
|
||||
/^(a?)+b(?1)a/
|
||||
abaa
|
||||
aba
|
||||
baa
|
||||
ba
|
||||
|
||||
/^(a?)++b(?1)a/
|
||||
abaa
|
||||
aba
|
||||
baa
|
||||
ba
|
||||
|
||||
/^(a?)+b/
|
||||
b
|
||||
ab
|
||||
aaab
|
||||
|
||||
/(?=a+)a(a+)++b/
|
||||
aab
|
||||
|
||||
# End of testinput1
|
||||
|
|
|
@ -5412,4 +5412,21 @@ a)"xI
|
|||
\= Expect no match
|
||||
\na
|
||||
|
||||
# These tests are matched in test 1 as they are Perl compatible. Here we are
|
||||
# looking at what does and does not get auto-possessified.
|
||||
|
||||
/(?(DEFINE)(?<optional_a>a?))^(?&optional_a)a$/B
|
||||
|
||||
/(?(DEFINE)(?<optional_a>a?)X)^(?&optional_a)a$/B
|
||||
|
||||
/^(a?)b(?1)a/B
|
||||
|
||||
/^(a?)+b(?1)a/B
|
||||
|
||||
/^(a?)++b(?1)a/B
|
||||
|
||||
/^(a?)+b/B
|
||||
|
||||
/(?=a+)a(a+)++b/B
|
||||
|
||||
# End of testinput2
|
||||
|
|
|
@ -9758,4 +9758,68 @@ No match
|
|||
1: Y
|
||||
2:
|
||||
|
||||
/(?(DEFINE)(?<optional_a>a?)X)^(?&optional_a)a$/
|
||||
aa
|
||||
0: aa
|
||||
a
|
||||
0: a
|
||||
|
||||
/^(a?)b(?1)a/
|
||||
abaa
|
||||
0: abaa
|
||||
1: a
|
||||
aba
|
||||
0: aba
|
||||
1: a
|
||||
baa
|
||||
0: baa
|
||||
1:
|
||||
ba
|
||||
0: ba
|
||||
1:
|
||||
|
||||
/^(a?)+b(?1)a/
|
||||
abaa
|
||||
0: abaa
|
||||
1:
|
||||
aba
|
||||
0: aba
|
||||
1:
|
||||
baa
|
||||
0: baa
|
||||
1:
|
||||
ba
|
||||
0: ba
|
||||
1:
|
||||
|
||||
/^(a?)++b(?1)a/
|
||||
abaa
|
||||
0: abaa
|
||||
1:
|
||||
aba
|
||||
0: aba
|
||||
1:
|
||||
baa
|
||||
0: baa
|
||||
1:
|
||||
ba
|
||||
0: ba
|
||||
1:
|
||||
|
||||
/^(a?)+b/
|
||||
b
|
||||
0: b
|
||||
1:
|
||||
ab
|
||||
0: ab
|
||||
1:
|
||||
aaab
|
||||
0: aaab
|
||||
1:
|
||||
|
||||
/(?=a+)a(a+)++b/
|
||||
aab
|
||||
0: aab
|
||||
1: a
|
||||
|
||||
# End of testinput1
|
||||
|
|
|
@ -12701,7 +12701,7 @@ Subject length lower bound = 5
|
|||
Ket
|
||||
a
|
||||
CBraPos 1
|
||||
a++
|
||||
a+
|
||||
KetRpos
|
||||
a
|
||||
Ket
|
||||
|
@ -16468,6 +16468,113 @@ No match
|
|||
\na
|
||||
No match
|
||||
|
||||
# These tests are matched in test 1 as they are Perl compatible. Here we are
|
||||
# looking at what does and does not get auto-possessified.
|
||||
|
||||
/(?(DEFINE)(?<optional_a>a?))^(?&optional_a)a$/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
Cond
|
||||
Cond false
|
||||
CBra 1
|
||||
a?
|
||||
Ket
|
||||
Ket
|
||||
^
|
||||
Recurse
|
||||
a
|
||||
$
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(?(DEFINE)(?<optional_a>a?)X)^(?&optional_a)a$/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
Cond
|
||||
Cond false
|
||||
CBra 1
|
||||
a?
|
||||
Ket
|
||||
X
|
||||
Ket
|
||||
^
|
||||
Recurse
|
||||
a
|
||||
$
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/^(a?)b(?1)a/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
^
|
||||
CBra 1
|
||||
a?
|
||||
Ket
|
||||
b
|
||||
Recurse
|
||||
a
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/^(a?)+b(?1)a/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
^
|
||||
SCBra 1
|
||||
a?
|
||||
KetRmax
|
||||
b
|
||||
Recurse
|
||||
a
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/^(a?)++b(?1)a/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
^
|
||||
SCBraPos 1
|
||||
a?
|
||||
KetRpos
|
||||
b
|
||||
Recurse
|
||||
a
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/^(a?)+b/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
^
|
||||
SCBra 1
|
||||
a?
|
||||
KetRmax
|
||||
b
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/(?=a+)a(a+)++b/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
Assert
|
||||
a++
|
||||
Ket
|
||||
a
|
||||
CBraPos 1
|
||||
a++
|
||||
KetRpos
|
||||
b
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
# End of testinput2
|
||||
Error -65: PCRE2_ERROR_BADDATA (unknown error number)
|
||||
Error -62: bad serialized data
|
||||
|
|
Loading…
Reference in New Issue