Fix very slow find_minlength when mutual recursion is present.
This commit is contained in:
parent
f123833bdb
commit
3e1748390b
|
@ -80,6 +80,11 @@ reference to random memory and/or a segfault. There were also some other cases
|
|||
where backtracking after \C could crash. This set of bugs was discovered by the
|
||||
LLVM fuzzer.
|
||||
|
||||
20. The function for finding the minimum length of a matching string could take
|
||||
a very long time if mutual recursion was present many times in a pattern, for
|
||||
example, /((?2){73}(?2))((?1))/. A better mutual recursion detection method has
|
||||
been implemented. This infelicity was discovered by the LLVM fuzzer.
|
||||
|
||||
|
||||
Version 10.10 06-March-2015
|
||||
---------------------------
|
||||
|
|
|
@ -677,14 +677,6 @@ static const uint8_t opcode_possessify[] = {
|
|||
};
|
||||
|
||||
|
||||
/* Structure for checking for mutual recursion when scanning compiled code. */
|
||||
|
||||
typedef struct recurse_check {
|
||||
struct recurse_check *prev;
|
||||
PCRE2_SPTR group;
|
||||
} recurse_check;
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free compiled code *
|
||||
|
|
|
@ -640,6 +640,13 @@ typedef struct pcre2_real_match_data {
|
|||
|
||||
#ifndef PCRE2_PCRE2TEST
|
||||
|
||||
/* Structure for checking for mutual recursion when scanning compiled code. */
|
||||
|
||||
typedef struct recurse_check {
|
||||
struct recurse_check *prev;
|
||||
PCRE2_SPTR group;
|
||||
} recurse_check;
|
||||
|
||||
/* Structure for maintaining a chain of pointers to the currently incomplete
|
||||
branches, for testing for left recursion while compiling. */
|
||||
|
||||
|
|
|
@ -73,23 +73,23 @@ Arguments:
|
|||
re compiled pattern block
|
||||
code pointer to start of group (the bracket)
|
||||
startcode pointer to start of the whole pattern's code
|
||||
recurse_depth RECURSE and/or backreference depth
|
||||
utf UTF flag
|
||||
recurses chain of recurse_check to catch mutual recursion
|
||||
|
||||
Returns: the minimum length
|
||||
-1 \C in UTF-8 mode
|
||||
or (*ACCEPT)
|
||||
or too much back reference recursion
|
||||
-2 internal error (missing capturing bracket)
|
||||
-3 internal error (opcode not listed)
|
||||
*/
|
||||
|
||||
static int
|
||||
find_minlength(const pcre2_real_code *re, PCRE2_SPTR code,
|
||||
PCRE2_SPTR startcode, int recurse_depth, BOOL utf)
|
||||
PCRE2_SPTR startcode, BOOL utf, recurse_check *recurses)
|
||||
{
|
||||
int length = -1;
|
||||
BOOL had_recurse = FALSE;
|
||||
recurse_check this_recurse;
|
||||
register int branchlength = 0;
|
||||
register PCRE2_UCHAR *cc = (PCRE2_UCHAR *)code + 1 + LINK_SIZE;
|
||||
|
||||
|
@ -134,7 +134,7 @@ for (;;)
|
|||
case OP_SBRAPOS:
|
||||
case OP_ONCE:
|
||||
case OP_ONCE_NC:
|
||||
d = find_minlength(re, cc, startcode, recurse_depth, utf);
|
||||
d = find_minlength(re, cc, startcode, utf, recurses);
|
||||
if (d < 0) return d;
|
||||
branchlength += d;
|
||||
do cc += GET(cc, 1); while (*cc == OP_ALT);
|
||||
|
@ -377,13 +377,12 @@ for (;;)
|
|||
}
|
||||
break;
|
||||
|
||||
/* Backreferences and subroutine calls are treated in the same way: we find
|
||||
the minimum length for the subpattern. A recursion, however, causes an
|
||||
a flag to be set that causes the length of this branch to be ignored. The
|
||||
logic is that a recursion can only make sense if there is another
|
||||
alternative that stops the recursing. That will provide the minimum length
|
||||
(when no recursion happens). A backreference within the group that it is
|
||||
referencing behaves in the same way.
|
||||
/* Backreferences and subroutine calls (OP_RECURSE) are treated in the same
|
||||
way: we find the minimum length for the subpattern. A recursion
|
||||
(backreference or subroutine) causes an a flag to be set that causes the
|
||||
length of this branch to be ignored. The logic is that a recursion can only
|
||||
make sense if there is another alternative that stops the recursing. That
|
||||
will provide the minimum length (when no recursion happens).
|
||||
|
||||
If PCRE2_MATCH_UNSET_BACKREF is set, a backreference to an unset bracket
|
||||
matches an empty string (by default it causes a matching failure), so in
|
||||
|
@ -399,12 +398,15 @@ for (;;)
|
|||
GET2(cc, 1) * re->name_entry_size;
|
||||
|
||||
d = INT_MAX;
|
||||
|
||||
/* Scan all groups with the same name */
|
||||
|
||||
while (count-- > 0)
|
||||
{
|
||||
ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, GET2(slot, 0));
|
||||
if (cs == NULL) return -2;
|
||||
do ce += GET(ce, 1); while (*ce == OP_ALT);
|
||||
if ((cc > cs && cc < ce) || recurse_depth > 10)
|
||||
if (cc > cs && cc < ce) /* Simple recursion */
|
||||
{
|
||||
d = 0;
|
||||
had_recurse = TRUE;
|
||||
|
@ -412,9 +414,23 @@ for (;;)
|
|||
}
|
||||
else
|
||||
{
|
||||
int dd = find_minlength(re, cs, startcode, recurse_depth + 1, utf);
|
||||
recurse_check *r = recurses;
|
||||
for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
|
||||
if (r != NULL) /* Mutual recursion */
|
||||
{
|
||||
d = 0;
|
||||
had_recurse = TRUE;
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
int dd;
|
||||
this_recurse.prev = recurses;
|
||||
this_recurse.group = cs;
|
||||
dd = find_minlength(re, cs, startcode, utf, &this_recurse);
|
||||
if (dd < d) d = dd;
|
||||
}
|
||||
}
|
||||
slot += re->name_entry_size;
|
||||
}
|
||||
}
|
||||
|
@ -429,14 +445,26 @@ for (;;)
|
|||
ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, GET2(cc, 1));
|
||||
if (cs == NULL) return -2;
|
||||
do ce += GET(ce, 1); while (*ce == OP_ALT);
|
||||
if ((cc > cs && cc < ce) || recurse_depth > 10)
|
||||
if (cc > cs && cc < ce) /* Simple recursion */
|
||||
{
|
||||
d = 0;
|
||||
had_recurse = TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
d = find_minlength(re, cs, startcode, recurse_depth + 1, utf);
|
||||
recurse_check *r = recurses;
|
||||
for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
|
||||
if (r != NULL) /* Mutual recursion */
|
||||
{
|
||||
d = 0;
|
||||
had_recurse = TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
this_recurse.prev = recurses;
|
||||
this_recurse.group = cs;
|
||||
d = find_minlength(re, cs, startcode, utf, &this_recurse);
|
||||
}
|
||||
}
|
||||
}
|
||||
else d = 0;
|
||||
|
@ -479,17 +507,23 @@ for (;;)
|
|||
branchlength += min * d;
|
||||
break;
|
||||
|
||||
/* We can easily detect direct recursion, but not mutual recursion. This is
|
||||
caught by a recursion depth count. */
|
||||
|
||||
case OP_RECURSE:
|
||||
cs = ce = (PCRE2_UCHAR *)startcode + GET(cc, 1);
|
||||
do ce += GET(ce, 1); while (*ce == OP_ALT);
|
||||
if ((cc > cs && cc < ce) || recurse_depth > 10)
|
||||
if (cc > cs && cc < ce) /* Simple recursion */
|
||||
had_recurse = TRUE;
|
||||
else
|
||||
{
|
||||
branchlength += find_minlength(re, cs, startcode, recurse_depth + 1, utf);
|
||||
recurse_check *r = recurses;
|
||||
for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
|
||||
if (r != NULL) /* Mutual recursion */
|
||||
had_recurse = TRUE;
|
||||
else
|
||||
{
|
||||
this_recurse.prev = recurses;
|
||||
this_recurse.group = cs;
|
||||
branchlength += find_minlength(re, cs, startcode, utf, &this_recurse);
|
||||
}
|
||||
}
|
||||
cc += 1 + LINK_SIZE;
|
||||
break;
|
||||
|
@ -1429,9 +1463,9 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0 &&
|
|||
|
||||
/* Find the minimum length of subject string. */
|
||||
|
||||
switch(min = find_minlength(re, code, code, 0, utf))
|
||||
switch(min = find_minlength(re, code, code, utf, NULL))
|
||||
{
|
||||
case -1: /* \C in UTF mode or (*ACCEPT) or too much backref recursion */
|
||||
case -1: /* \C in UTF mode or (*ACCEPT) */
|
||||
break; /* Leave minlength unchanged (will be zero) */
|
||||
|
||||
case -2:
|
||||
|
|
|
@ -4263,4 +4263,6 @@ a random value. /Ix
|
|||
/(?<=\Ka)/altglobal,aftertext
|
||||
aaaaa
|
||||
|
||||
/((?2){73}(?2))((?1))/info
|
||||
|
||||
# End of testinput2
|
||||
|
|
|
@ -14288,4 +14288,9 @@ Failed: error 125 at offset 20: lookbehind assertion is not fixed length
|
|||
0: a
|
||||
0+
|
||||
|
||||
/((?2){73}(?2))((?1))/info
|
||||
Capturing subpattern count = 2
|
||||
May match empty string
|
||||
Subject length lower bound = 0
|
||||
|
||||
# End of testinput2
|
||||
|
|
Loading…
Reference in New Issue