diff --git a/ChangeLog b/ChangeLog index bd8862e..c90011b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -338,6 +338,10 @@ modules. * LC_ALL was displayed as "LCC_ALL"; * numbers 11, 12 & 13 should end in "th"; * use double quotes in usage message. + +53. When autopossessifying, skip empty branches without recursion, to reduce +stack usage for the benefit of clang with -fsanitize-address, which uses huge +stack frames. Example pattern: /X?(R||){3335}/. Fixes oss-fuzz issue 553. Version 10.22 29-July-2016 diff --git a/src/pcre2_auto_possess.c b/src/pcre2_auto_possess.c index ecc34fb..64ec6df 100644 --- a/src/pcre2_auto_possess.c +++ b/src/pcre2_auto_possess.c @@ -589,6 +589,7 @@ for(;;) case OP_ASSERTBACK_NOT: case OP_ONCE: case OP_ONCE_NC: + /* Atomic sub-patterns and assertions can always auto-possessify their last iterator. However, if the group was entered as a result of checking a previous iterator, this is not possible. */ @@ -606,6 +607,9 @@ for(;;) next_code = code + GET(code, 1); code += PRIV(OP_lengths)[c]; + /* Check each branch. We have to recurse a level for all but the last + branch. */ + while (*next_code == OP_ALT) { if (!compare_opcodes(code, utf, cb, base_list, base_end, rec_limit)) @@ -1067,7 +1071,7 @@ PCRE2_UCHAR c; PCRE2_SPTR end; PCRE2_UCHAR *repeat_opcode; uint32_t list[8]; -int rec_limit = 10000; +int rec_limit = 1000; /* Was 10,000 but clang+ASAN uses a lot of stack. */ for (;;) { diff --git a/testdata/testinput1 b/testdata/testinput1 index 08a9bcc..2de7728 100644 --- a/testdata/testinput1 +++ b/testdata/testinput1 @@ -5826,4 +5826,9 @@ ef) x/x,mark /[s[:digit:]\Q\E-H]+/ s09-H +/a+(?:|b)a/ + aaaa + +/X?(R||){3335}/ + # End of testinput1 diff --git a/testdata/testoutput1 b/testdata/testoutput1 index d07b657..e55a63b 100644 --- a/testdata/testoutput1 +++ b/testdata/testoutput1 @@ -9305,4 +9305,10 @@ No match s09-H 0: s09-H +/a+(?:|b)a/ + aaaa + 0: aaaa + +/X?(R||){3335}/ + # End of testinput1