Fix stack overflow instead of error diagnosis for mutual recursion within

lookbehind assertion.
This commit is contained in:
Philip.Hazel 2015-04-01 15:37:54 +00:00
parent 9300cb6418
commit ade67323e7
4 changed files with 38 additions and 14 deletions

View File

@ -63,6 +63,10 @@ This bug was discovered by the LLVM fuzzer.
it should have been signed. Some other "int" variables, having been checked, it should have been signed. Some other "int" variables, having been checked,
have either been changed to uint32_t or commented as "must be signed". have either been changed to uint32_t or commented as "must be signed".
16. A mutual recursion within a lookbehind assertion such as (?<=((?2))((?1)))
caused a stack overflow instead of the diagnosis of a non-fixed length
lookbehind assertion. This bug was discovered by the LLVM fuzzer.
Version 10.10 06-March-2015 Version 10.10 06-March-2015
--------------------------- ---------------------------

View File

@ -75,8 +75,8 @@ static int
const uint32_t *, unsigned int); const uint32_t *, unsigned int);
static BOOL static BOOL
compile_regex(uint32_t, PCRE2_UCHAR **, PCRE2_SPTR *, int *, BOOL, BOOL, compile_regex(uint32_t, PCRE2_UCHAR **, PCRE2_SPTR *, int *, BOOL, BOOL,
uint32_t, int, uint32_t *, int32_t *, uint32_t *, int32_t *, uint32_t, int, uint32_t *, int32_t *, uint32_t *, int32_t *,
branch_chain *, compile_block *, size_t *); branch_chain *, compile_block *, size_t *);
@ -677,6 +677,15 @@ static const uint8_t opcode_possessify[] = {
}; };
/* Structure for checking for mutual recursion when scanning compiled code. */
typedef struct recurse_check {
struct recurse_check *prev;
PCRE2_SPTR group;
} recurse_check;
/************************************************* /*************************************************
* Free compiled code * * Free compiled code *
*************************************************/ *************************************************/
@ -785,6 +794,7 @@ Arguments:
utf TRUE in UTF mode utf TRUE in UTF mode
atend TRUE if called when the pattern is complete atend TRUE if called when the pattern is complete
cb the "compile data" structure cb the "compile data" structure
recurses chain of recurse_check to catch mutual recursion
Returns: the fixed length, Returns: the fixed length,
or -1 if there is no fixed length, or -1 if there is no fixed length,
@ -794,10 +804,11 @@ Returns: the fixed length,
*/ */
static int static int
find_fixedlength(PCRE2_UCHAR *code, BOOL utf, BOOL atend, compile_block *cb) find_fixedlength(PCRE2_UCHAR *code, BOOL utf, BOOL atend, compile_block *cb,
recurse_check *recurses)
{ {
int length = -1; int length = -1;
recurse_check this_recurse;
register int branchlength = 0; register int branchlength = 0;
register PCRE2_UCHAR *cc = code + 1 + LINK_SIZE; register PCRE2_UCHAR *cc = code + 1 + LINK_SIZE;
@ -822,7 +833,8 @@ for (;;)
case OP_ONCE: case OP_ONCE:
case OP_ONCE_NC: case OP_ONCE_NC:
case OP_COND: case OP_COND:
d = find_fixedlength(cc + ((op == OP_CBRA)? IMM2_SIZE : 0), utf, atend, cb); d = find_fixedlength(cc + ((op == OP_CBRA)? IMM2_SIZE : 0), utf, atend, cb,
recurses);
if (d < 0) return d; if (d < 0) return d;
branchlength += d; branchlength += d;
do cc += GET(cc, 1); while (*cc == OP_ALT); do cc += GET(cc, 1); while (*cc == OP_ALT);
@ -853,10 +865,18 @@ for (;;)
case OP_RECURSE: case OP_RECURSE:
if (!atend) return -3; if (!atend) return -3;
cs = ce = (PCRE2_UCHAR *)cb->start_code + GET(cc, 1); /* Start subpattern */ cs = ce = (PCRE2_UCHAR *)cb->start_code + GET(cc, 1); /* Start subpattern */
do ce += GET(ce, 1); while (*ce == OP_ALT); /* End subpattern */ do ce += GET(ce, 1); while (*ce == OP_ALT); /* End subpattern */
if (cc > cs && cc < ce) return -1; /* Recursion */ if (cc > cs && cc < ce) return -1; /* Recursion */
d = find_fixedlength(cs + IMM2_SIZE, utf, atend, cb); else /* Check for mutual recursion */
{
recurse_check *r = recurses;
for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
if (r != NULL) return -1; /* Mutual recursion */
}
this_recurse.prev = recurses;
this_recurse.group = cs;
d = find_fixedlength(cs + IMM2_SIZE, utf, atend, cb, &this_recurse);
if (d < 0) return d; if (d < 0) return d;
branchlength += d; branchlength += d;
cc += 1 + LINK_SIZE; cc += 1 + LINK_SIZE;
@ -1196,11 +1216,6 @@ Arguments:
Returns: TRUE if what is matched could be empty Returns: TRUE if what is matched could be empty
*/ */
typedef struct recurse_check {
struct recurse_check *prev;
PCRE2_SPTR group;
} recurse_check;
static BOOL static BOOL
could_be_empty_branch(PCRE2_SPTR code, PCRE2_SPTR endcode, BOOL utf, could_be_empty_branch(PCRE2_SPTR code, PCRE2_SPTR endcode, BOOL utf,
compile_block *cb, recurse_check *recurses) compile_block *cb, recurse_check *recurses)
@ -7037,7 +7052,7 @@ for (;;)
int fixed_length; int fixed_length;
*code = OP_END; *code = OP_END;
fixed_length = find_fixedlength(last_branch, (options & PCRE2_UTF) != 0, fixed_length = find_fixedlength(last_branch, (options & PCRE2_UTF) != 0,
FALSE, cb); FALSE, cb, NULL);
if (fixed_length == -3) if (fixed_length == -3)
{ {
cb->check_lookbehind = TRUE; cb->check_lookbehind = TRUE;
@ -8075,7 +8090,7 @@ if (errorcode == 0 && cb.check_lookbehind)
PCRE2_UCHAR *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE); PCRE2_UCHAR *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE);
int end_op = *be; int end_op = *be;
*be = OP_END; *be = OP_END;
fixed_length = find_fixedlength(cc, utf, TRUE, &cb); fixed_length = find_fixedlength(cc, utf, TRUE, &cb, NULL);
*be = end_op; *be = end_op;
if (fixed_length < 0) if (fixed_length < 0)
{ {

2
testdata/testinput2 vendored
View File

@ -4253,4 +4253,6 @@ a random value. /Ix
/(?<=\bABQ(3(?+7)))/ /(?<=\bABQ(3(?+7)))/
";(?<=()((?3))((?2)))"
# End of testinput2 # End of testinput2

View File

@ -14257,4 +14257,7 @@ Failed: error 115 at offset 15: reference to non-existent subpattern
/(?<=\bABQ(3(?+7)))/ /(?<=\bABQ(3(?+7)))/
Failed: error 115 at offset 15: reference to non-existent subpattern Failed: error 115 at offset 15: reference to non-existent subpattern
";(?<=()((?3))((?2)))"
Failed: error 125 at offset 20: lookbehind assertion is not fixed length
# End of testinput2 # End of testinput2