Fix auto-possessification bug at the end of a capturing group that is called
recursively.
This commit is contained in:
parent
7a5b962509
commit
53a588431c
|
@ -134,6 +134,15 @@ groups, making the ovector larger than this. The number has been increased to
|
||||||
131072, which allows for the maximum number of captures (65535) plus the
|
131072, which allows for the maximum number of captures (65535) plus the
|
||||||
overall match. This fixes oss-fuzz issue 5415.
|
overall match. This fixes oss-fuzz issue 5415.
|
||||||
|
|
||||||
|
31. Auto-possessification at the end of a capturing group was dependent on what
|
||||||
|
follows the group (e.g. /(a+)b/ would auto-possessify the a+) but this caused
|
||||||
|
incorrect behaviour when the group was called recursively from elsewhere in the
|
||||||
|
pattern where something different might follow. This bug is an unforseen
|
||||||
|
consequence of change #1 for 10.30 - the implementation of backtracking into
|
||||||
|
recursions. Iterators at the ends of capturing groups are no longer considered
|
||||||
|
for auto-possessification if the pattern contains any recursions. Fixes
|
||||||
|
Bugzilla #2232.
|
||||||
|
|
||||||
|
|
||||||
Version 10.30 14-August-2017
|
Version 10.30 14-August-2017
|
||||||
----------------------------
|
----------------------------
|
||||||
|
|
|
@ -558,47 +558,73 @@ for(;;)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* At the end of a branch, skip to the end of the group. */
|
||||||
|
|
||||||
if (c == OP_ALT)
|
if (c == OP_ALT)
|
||||||
{
|
{
|
||||||
do code += GET(code, 1); while (*code == OP_ALT);
|
do code += GET(code, 1); while (*code == OP_ALT);
|
||||||
c = *code;
|
c = *code;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Inspect the next opcode. */
|
||||||
|
|
||||||
switch(c)
|
switch(c)
|
||||||
{
|
{
|
||||||
case OP_END:
|
/* We can always possessify a greedy iterator at the end of the pattern,
|
||||||
case OP_KETRPOS:
|
which is reached after skipping over the final OP_KET. A non-greedy
|
||||||
/* TRUE only in greedy case. The non-greedy case could be replaced by
|
iterator must never be possessified. */
|
||||||
an OP_EXACT, but it is probably not worth it. (And note that OP_EXACT
|
|
||||||
uses more memory, which we cannot get at this stage.) */
|
|
||||||
|
|
||||||
|
case OP_END:
|
||||||
return base_list[1] != 0;
|
return base_list[1] != 0;
|
||||||
|
|
||||||
|
/* When an iterator is at the end of certain kinds of group we can inspect
|
||||||
|
what follows the group by skipping over the closing ket. Note that this
|
||||||
|
does not apply to OP_KETRMAX or OP_KETRMIN because what follows any given
|
||||||
|
iteration is variable (could be another iteration or could be the next
|
||||||
|
item). As these two opcodes are not listed in the next switch, they will
|
||||||
|
end up as the next code to inspect, and return FALSE by virtue of being
|
||||||
|
unsupported. */
|
||||||
|
|
||||||
case OP_KET:
|
case OP_KET:
|
||||||
/* If the bracket is capturing, and referenced by an OP_RECURSE, or
|
case OP_KETRPOS:
|
||||||
it is an atomic sub-pattern (assert, once, etc.) the non-greedy case
|
/* The non-greedy case cannot be converted to a possessive form. */
|
||||||
cannot be converted to a possessive form. */
|
|
||||||
|
|
||||||
if (base_list[1] == 0) return FALSE;
|
if (base_list[1] == 0) return FALSE;
|
||||||
|
|
||||||
|
/* If the bracket is capturing it might be referenced by an OP_RECURSE
|
||||||
|
so its last iterator can never be possessified if the pattern contains
|
||||||
|
recursions. (This could be improved by keeping a list of group numbers that
|
||||||
|
are called by recursion.) */
|
||||||
|
|
||||||
switch(*(code - GET(code, 1)))
|
switch(*(code - GET(code, 1)))
|
||||||
{
|
{
|
||||||
|
case OP_CBRA:
|
||||||
|
case OP_SCBRA:
|
||||||
|
case OP_CBRAPOS:
|
||||||
|
case OP_SCBRAPOS:
|
||||||
|
if (cb->had_recurse) return FALSE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Atomic sub-patterns and assertions can always auto-possessify their
|
||||||
|
last iterator. However, if the group was entered as a result of checking
|
||||||
|
a previous iterator, this is not possible. */
|
||||||
|
|
||||||
case OP_ASSERT:
|
case OP_ASSERT:
|
||||||
case OP_ASSERT_NOT:
|
case OP_ASSERT_NOT:
|
||||||
case OP_ASSERTBACK:
|
case OP_ASSERTBACK:
|
||||||
case OP_ASSERTBACK_NOT:
|
case OP_ASSERTBACK_NOT:
|
||||||
case OP_ONCE:
|
case OP_ONCE:
|
||||||
|
|
||||||
/* Atomic sub-patterns and assertions can always auto-possessify their
|
|
||||||
last iterator. However, if the group was entered as a result of checking
|
|
||||||
a previous iterator, this is not possible. */
|
|
||||||
|
|
||||||
return !entered_a_group;
|
return !entered_a_group;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Skip over the bracket and inspect what comes next. */
|
||||||
|
|
||||||
code += PRIV(OP_lengths)[c];
|
code += PRIV(OP_lengths)[c];
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
/* Handle cases where the next item is a group. */
|
||||||
|
|
||||||
case OP_ONCE:
|
case OP_ONCE:
|
||||||
case OP_BRA:
|
case OP_BRA:
|
||||||
case OP_CBRA:
|
case OP_CBRA:
|
||||||
|
@ -637,11 +663,15 @@ for(;;)
|
||||||
code += PRIV(OP_lengths)[c];
|
code += PRIV(OP_lengths)[c];
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
/* The next opcode does not need special handling; fall through and use it
|
||||||
|
to see if the base can be possessified. */
|
||||||
|
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Check for a supported opcode, and load its properties. */
|
/* We now have the next appropriate opcode to compare with the base. Check
|
||||||
|
for a supported opcode, and load its properties. */
|
||||||
|
|
||||||
code = get_chr_property_list(code, utf, cb->fcc, list);
|
code = get_chr_property_list(code, utf, cb->fcc, list);
|
||||||
if (code == NULL) return FALSE; /* Unsupported */
|
if (code == NULL) return FALSE; /* Unsupported */
|
||||||
|
|
|
@ -6159,4 +6159,34 @@ ef) x/x,mark
|
||||||
/((?<=((*ACCEPT))X)\1?Y(*ACCEPT))\1/
|
/((?<=((*ACCEPT))X)\1?Y(*ACCEPT))\1/
|
||||||
XYYZ
|
XYYZ
|
||||||
|
|
||||||
|
/(?(DEFINE)(?<optional_a>a?)X)^(?&optional_a)a$/
|
||||||
|
aa
|
||||||
|
a
|
||||||
|
|
||||||
|
/^(a?)b(?1)a/
|
||||||
|
abaa
|
||||||
|
aba
|
||||||
|
baa
|
||||||
|
ba
|
||||||
|
|
||||||
|
/^(a?)+b(?1)a/
|
||||||
|
abaa
|
||||||
|
aba
|
||||||
|
baa
|
||||||
|
ba
|
||||||
|
|
||||||
|
/^(a?)++b(?1)a/
|
||||||
|
abaa
|
||||||
|
aba
|
||||||
|
baa
|
||||||
|
ba
|
||||||
|
|
||||||
|
/^(a?)+b/
|
||||||
|
b
|
||||||
|
ab
|
||||||
|
aaab
|
||||||
|
|
||||||
|
/(?=a+)a(a+)++b/
|
||||||
|
aab
|
||||||
|
|
||||||
# End of testinput1
|
# End of testinput1
|
||||||
|
|
|
@ -5412,4 +5412,21 @@ a)"xI
|
||||||
\= Expect no match
|
\= Expect no match
|
||||||
\na
|
\na
|
||||||
|
|
||||||
|
# These tests are matched in test 1 as they are Perl compatible. Here we are
|
||||||
|
# looking at what does and does not get auto-possessified.
|
||||||
|
|
||||||
|
/(?(DEFINE)(?<optional_a>a?))^(?&optional_a)a$/B
|
||||||
|
|
||||||
|
/(?(DEFINE)(?<optional_a>a?)X)^(?&optional_a)a$/B
|
||||||
|
|
||||||
|
/^(a?)b(?1)a/B
|
||||||
|
|
||||||
|
/^(a?)+b(?1)a/B
|
||||||
|
|
||||||
|
/^(a?)++b(?1)a/B
|
||||||
|
|
||||||
|
/^(a?)+b/B
|
||||||
|
|
||||||
|
/(?=a+)a(a+)++b/B
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
|
@ -9758,4 +9758,68 @@ No match
|
||||||
1: Y
|
1: Y
|
||||||
2:
|
2:
|
||||||
|
|
||||||
|
/(?(DEFINE)(?<optional_a>a?)X)^(?&optional_a)a$/
|
||||||
|
aa
|
||||||
|
0: aa
|
||||||
|
a
|
||||||
|
0: a
|
||||||
|
|
||||||
|
/^(a?)b(?1)a/
|
||||||
|
abaa
|
||||||
|
0: abaa
|
||||||
|
1: a
|
||||||
|
aba
|
||||||
|
0: aba
|
||||||
|
1: a
|
||||||
|
baa
|
||||||
|
0: baa
|
||||||
|
1:
|
||||||
|
ba
|
||||||
|
0: ba
|
||||||
|
1:
|
||||||
|
|
||||||
|
/^(a?)+b(?1)a/
|
||||||
|
abaa
|
||||||
|
0: abaa
|
||||||
|
1:
|
||||||
|
aba
|
||||||
|
0: aba
|
||||||
|
1:
|
||||||
|
baa
|
||||||
|
0: baa
|
||||||
|
1:
|
||||||
|
ba
|
||||||
|
0: ba
|
||||||
|
1:
|
||||||
|
|
||||||
|
/^(a?)++b(?1)a/
|
||||||
|
abaa
|
||||||
|
0: abaa
|
||||||
|
1:
|
||||||
|
aba
|
||||||
|
0: aba
|
||||||
|
1:
|
||||||
|
baa
|
||||||
|
0: baa
|
||||||
|
1:
|
||||||
|
ba
|
||||||
|
0: ba
|
||||||
|
1:
|
||||||
|
|
||||||
|
/^(a?)+b/
|
||||||
|
b
|
||||||
|
0: b
|
||||||
|
1:
|
||||||
|
ab
|
||||||
|
0: ab
|
||||||
|
1:
|
||||||
|
aaab
|
||||||
|
0: aaab
|
||||||
|
1:
|
||||||
|
|
||||||
|
/(?=a+)a(a+)++b/
|
||||||
|
aab
|
||||||
|
0: aab
|
||||||
|
1: a
|
||||||
|
|
||||||
# End of testinput1
|
# End of testinput1
|
||||||
|
|
|
@ -12701,7 +12701,7 @@ Subject length lower bound = 5
|
||||||
Ket
|
Ket
|
||||||
a
|
a
|
||||||
CBraPos 1
|
CBraPos 1
|
||||||
a++
|
a+
|
||||||
KetRpos
|
KetRpos
|
||||||
a
|
a
|
||||||
Ket
|
Ket
|
||||||
|
@ -16468,6 +16468,113 @@ No match
|
||||||
\na
|
\na
|
||||||
No match
|
No match
|
||||||
|
|
||||||
|
# These tests are matched in test 1 as they are Perl compatible. Here we are
|
||||||
|
# looking at what does and does not get auto-possessified.
|
||||||
|
|
||||||
|
/(?(DEFINE)(?<optional_a>a?))^(?&optional_a)a$/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
Cond
|
||||||
|
Cond false
|
||||||
|
CBra 1
|
||||||
|
a?
|
||||||
|
Ket
|
||||||
|
Ket
|
||||||
|
^
|
||||||
|
Recurse
|
||||||
|
a
|
||||||
|
$
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/(?(DEFINE)(?<optional_a>a?)X)^(?&optional_a)a$/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
Cond
|
||||||
|
Cond false
|
||||||
|
CBra 1
|
||||||
|
a?
|
||||||
|
Ket
|
||||||
|
X
|
||||||
|
Ket
|
||||||
|
^
|
||||||
|
Recurse
|
||||||
|
a
|
||||||
|
$
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/^(a?)b(?1)a/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
^
|
||||||
|
CBra 1
|
||||||
|
a?
|
||||||
|
Ket
|
||||||
|
b
|
||||||
|
Recurse
|
||||||
|
a
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/^(a?)+b(?1)a/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
^
|
||||||
|
SCBra 1
|
||||||
|
a?
|
||||||
|
KetRmax
|
||||||
|
b
|
||||||
|
Recurse
|
||||||
|
a
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/^(a?)++b(?1)a/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
^
|
||||||
|
SCBraPos 1
|
||||||
|
a?
|
||||||
|
KetRpos
|
||||||
|
b
|
||||||
|
Recurse
|
||||||
|
a
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/^(a?)+b/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
^
|
||||||
|
SCBra 1
|
||||||
|
a?
|
||||||
|
KetRmax
|
||||||
|
b
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/(?=a+)a(a+)++b/B
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
Assert
|
||||||
|
a++
|
||||||
|
Ket
|
||||||
|
a
|
||||||
|
CBraPos 1
|
||||||
|
a++
|
||||||
|
KetRpos
|
||||||
|
b
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
Error -65: PCRE2_ERROR_BADDATA (unknown error number)
|
Error -65: PCRE2_ERROR_BADDATA (unknown error number)
|
||||||
Error -62: bad serialized data
|
Error -62: bad serialized data
|
||||||
|
|
Loading…
Reference in New Issue