Limit function recursion in pcre2_study to avoid stack overflow issues.

This commit is contained in:
Philip.Hazel 2020-01-27 10:28:19 +00:00
parent 3a6b4948d1
commit b040e2e1cd
2 changed files with 26 additions and 9 deletions

View File

@ -56,6 +56,10 @@ lookbehind, that end was not correctly recognized. Errors such as "lookbehind
assertion is not fixed length" and also "internal error: bad code value in assertion is not fixed length" and also "internal error: bad code value in
parsed_skip()" could result. parsed_skip()" could result.
15. Put a limit of 1000 on recursive calls in pcre2_study() when searching
nested groups for starting code units, in order to avoid stack overflow issues.
If the limit is reached, it just gives up trying for this optimization.
Version 10.34 21-November-2019 Version 10.34 21-November-2019
------------------------------ ------------------------------

View File

@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2019 University of Cambridge New API code Copyright (c) 2016-2020 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
@ -58,7 +58,7 @@ collecting data (e.g. minimum matching length). */
/* Returns from set_start_bits() */ /* Returns from set_start_bits() */
enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN }; enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN, SSB_TOODEEP };
/************************************************* /*************************************************
@ -924,19 +924,24 @@ The SSB_CONTINUE return is useful for parenthesized groups in patterns such as
must continue at the outer level to find at least one mandatory code unit. At must continue at the outer level to find at least one mandatory code unit. At
the outermost level, this function fails unless the result is SSB_DONE. the outermost level, this function fails unless the result is SSB_DONE.
We restrict recursion (for nested groups) to 1000 to avoid stack overflow
issues.
Arguments: Arguments:
re points to the compiled regex block re points to the compiled regex block
code points to an expression code points to an expression
utf TRUE if in UTF mode utf TRUE if in UTF mode
depthptr pointer to recurse depth
Returns: SSB_FAIL => Failed to find any starting code units Returns: SSB_FAIL => Failed to find any starting code units
SSB_DONE => Found mandatory starting code units SSB_DONE => Found mandatory starting code units
SSB_CONTINUE => Found optional starting code units SSB_CONTINUE => Found optional starting code units
SSB_UNKNOWN => Hit an unrecognized opcode SSB_UNKNOWN => Hit an unrecognized opcode
SSB_TOODEEP => Recursion is too deep
*/ */
static int static int
set_start_bits(pcre2_real_code *re, PCRE2_SPTR code, BOOL utf) set_start_bits(pcre2_real_code *re, PCRE2_SPTR code, BOOL utf, int *depthptr)
{ {
uint32_t c; uint32_t c;
int yield = SSB_DONE; int yield = SSB_DONE;
@ -947,6 +952,9 @@ int table_limit = utf? 16:32;
int table_limit = 32; int table_limit = 32;
#endif #endif
*depthptr += 1;
if (*depthptr > 1000) return SSB_TOODEEP;
do do
{ {
BOOL try_next = TRUE; BOOL try_next = TRUE;
@ -1103,13 +1111,17 @@ do
case OP_SCRIPT_RUN: case OP_SCRIPT_RUN:
case OP_ASSERT: case OP_ASSERT:
case OP_ASSERT_NA: case OP_ASSERT_NA:
rc = set_start_bits(re, tcode, utf); rc = set_start_bits(re, tcode, utf, depthptr);
if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc; if (rc == SSB_DONE)
if (rc == SSB_DONE) try_next = FALSE; else {
try_next = FALSE;
}
else if (rc == SSB_CONTINUE)
{ {
do tcode += GET(tcode, 1); while (*tcode == OP_ALT); do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
tcode += 1 + LINK_SIZE; tcode += 1 + LINK_SIZE;
} }
else return rc; /* FAIL, UNKNOWN, or TOODEEP */
break; break;
/* If we hit ALT or KET, it means we haven't found anything mandatory in /* If we hit ALT or KET, it means we haven't found anything mandatory in
@ -1155,8 +1167,8 @@ do
case OP_BRAZERO: case OP_BRAZERO:
case OP_BRAMINZERO: case OP_BRAMINZERO:
case OP_BRAPOSZERO: case OP_BRAPOSZERO:
rc = set_start_bits(re, ++tcode, utf); rc = set_start_bits(re, ++tcode, utf, depthptr);
if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc; if (rc == SSB_FAIL || rc == SSB_UNKNOWN || rc == SSB_TOODEEP) return rc;
do tcode += GET(tcode,1); while (*tcode == OP_ALT); do tcode += GET(tcode,1); while (*tcode == OP_ALT);
tcode += 1 + LINK_SIZE; tcode += 1 + LINK_SIZE;
break; break;
@ -1664,7 +1676,8 @@ code units. */
if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0) if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0)
{ {
int rc = set_start_bits(re, code, utf); int depth = 0;
int rc = set_start_bits(re, code, utf, &depth);
if (rc == SSB_UNKNOWN) return 1; if (rc == SSB_UNKNOWN) return 1;
/* If a list of starting code units was set up, scan the list to see if only /* If a list of starting code units was set up, scan the list to see if only