Fix mutual back reference recursion segfault.

This commit is contained in:
Philip.Hazel 2015-01-28 11:28:53 +00:00
parent 911b800984
commit 6c8945b8d8
4 changed files with 24 additions and 9 deletions

View File

@ -40,6 +40,10 @@ for being set in a conditional) that occurred in a part of the pattern where
PCRE2_DUPNAMES was not set caused the amount of memory needed for the pattern PCRE2_DUPNAMES was not set caused the amount of memory needed for the pattern
to be incorrectly calculated, leading to overwriting. to be incorrectly calculated, leading to overwriting.
8. A mutually recursive set of back references such as (\2)(\1) caused a
segfault at compile time (while trying to find the minimum matching length).
The infinite loop is now broken (with the minimum length unset, that is, zero).
Version 10.00 05-January-2015 Version 10.00 05-January-2015
----------------------------- -----------------------------

View File

@ -73,11 +73,13 @@ Arguments:
re compiled pattern block re compiled pattern block
code pointer to start of group (the bracket) code pointer to start of group (the bracket)
startcode pointer to start of the whole pattern's code startcode pointer to start of the whole pattern's code
recurse_depth RECURSE depth recurse_depth RECURSE and/or backreference depth
utf UTF flag utf UTF flag
Returns: the minimum length Returns: the minimum length
-1 if \C in UTF-8 mode or (*ACCEPT) was encountered -1 \C in UTF-8 mode
or (*ACCEPT)
or too much back reference recursion
-2 internal error (missing capturing bracket) -2 internal error (missing capturing bracket)
-3 internal error (opcode not listed) -3 internal error (opcode not listed)
*/ */
@ -375,7 +377,7 @@ for (;;)
the minimum length for the subpattern. A recursion, however, causes an the minimum length for the subpattern. A recursion, however, causes an
a flag to be set that causes the length of this branch to be ignored. The a flag to be set that causes the length of this branch to be ignored. The
logic is that a recursion can only make sense if there is another logic is that a recursion can only make sense if there is another
alternation that stops the recursing. That will provide the minimum length alternative that stops the recursing. That will provide the minimum length
(when no recursion happens). A backreference within the group that it is (when no recursion happens). A backreference within the group that it is
referencing behaves in the same way. referencing behaves in the same way.
@ -398,7 +400,7 @@ for (;;)
ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, GET2(slot, 0)); ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, GET2(slot, 0));
if (cs == NULL) return -2; if (cs == NULL) return -2;
do ce += GET(ce, 1); while (*ce == OP_ALT); do ce += GET(ce, 1); while (*ce == OP_ALT);
if (cc > cs && cc < ce) if ((cc > cs && cc < ce) || recurse_depth > 10)
{ {
d = 0; d = 0;
had_recurse = TRUE; had_recurse = TRUE;
@ -406,7 +408,7 @@ for (;;)
} }
else else
{ {
int dd = find_minlength(re, cs, startcode, recurse_depth, utf); int dd = find_minlength(re, cs, startcode, recurse_depth + 1, utf);
if (dd < d) d = dd; if (dd < d) d = dd;
} }
slot += re->name_entry_size; slot += re->name_entry_size;
@ -423,14 +425,14 @@ for (;;)
ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, GET2(cc, 1)); ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, GET2(cc, 1));
if (cs == NULL) return -2; if (cs == NULL) return -2;
do ce += GET(ce, 1); while (*ce == OP_ALT); do ce += GET(ce, 1); while (*ce == OP_ALT);
if (cc > cs && cc < ce) if ((cc > cs && cc < ce) || recurse_depth > 10)
{ {
d = 0; d = 0;
had_recurse = TRUE; had_recurse = TRUE;
} }
else else
{ {
d = find_minlength(re, cs, startcode, recurse_depth, utf); d = find_minlength(re, cs, startcode, recurse_depth + 1, utf);
} }
} }
else d = 0; else d = 0;
@ -1421,8 +1423,8 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0 &&
switch(min = find_minlength(re, code, code, 0, utf)) switch(min = find_minlength(re, code, code, 0, utf))
{ {
case -1: /* \C in UTF mode or (*ACCEPT) was encountered */ case -1: /* \C in UTF mode or (*ACCEPT) or too much backref recursion */
break; break; /* Leave minlength unchanged (will be zero) */
case -2: case -2:
return 2; /* missing capturing bracket */ return 2; /* missing capturing bracket */

3
testdata/testinput1 vendored
View File

@ -5705,4 +5705,7 @@ name)/mark
/(?:((abcd))|(((?:(?:(?:(?:abc|(?:abcdef))))b)abcdefghi)abc)|((*ACCEPT)))/ /(?:((abcd))|(((?:(?:(?:(?:abc|(?:abcdef))))b)abcdefghi)abc)|((*ACCEPT)))/
1234abcd 1234abcd
/(\2|a)(\1)/
aaa
# End of testinput1 # End of testinput1

View File

@ -9412,4 +9412,10 @@ No match
4: <unset> 4: <unset>
5: 5:
/(\2|a)(\1)/
aaa
0: aa
1: a
2: a
# End of testinput1 # End of testinput1