From a066d1202210d853d13ce43dc080fb931bcbc045 Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Wed, 25 Mar 2015 17:01:04 +0000 Subject: [PATCH] Add recursion limit to auto-possessification code. --- ChangeLog | 5 +++++ src/pcre2_auto_possess.c | 18 +++++++++++++----- testdata/testinput1 | 2 ++ testdata/testoutput1 | 2 ++ 4 files changed, 22 insertions(+), 5 deletions(-) diff --git a/ChangeLog b/ChangeLog index af537ee..2d42176 100644 --- a/ChangeLog +++ b/ChangeLog @@ -25,6 +25,11 @@ when this assertion was used as a condition, for example (?(?!)a|b). In pcre2_match() it worked by luck; in pcre2_dfa_match() it gave an incorrect error about an unsupported item. +8. For some types of pattern, for example /Z*(|d*){216}/, the auto- +possessification code could take exponential time to complete. A recursion +depth limit of 10000 has been imposed to limit the resources used by this +optimization. This infelicity was discovered by the LLVM fuzzer. + Version 10.10 06-March-2015 --------------------------- diff --git a/src/pcre2_auto_possess.c b/src/pcre2_auto_possess.c index e25ec43..f2ada6d 100644 --- a/src/pcre2_auto_possess.c +++ b/src/pcre2_auto_possess.c @@ -561,13 +561,15 @@ Arguments: utf TRUE in UTF mode cb compile data block base_list the data list of the base opcode + base_end the end of the data list + rec_limit points to recursion depth counter Returns: TRUE if the auto-possessification is possible */ static BOOL compare_opcodes(PCRE2_SPTR code, BOOL utf, const compile_block *cb, - const uint32_t *base_list, PCRE2_SPTR base_end) + const uint32_t *base_list, PCRE2_SPTR base_end, int *rec_limit) { PCRE2_UCHAR c; uint32_t list[8]; @@ -584,6 +586,8 @@ uint32_t chr; BOOL accepted, invert_bits; BOOL entered_a_group = FALSE; +if (--(*rec_limit) <= 0) return FALSE; /* Recursion has gone too deep */ + /* Note: the base_list[1] contains whether the current opcode has a greedy (represented by a non-zero value) quantifier. This is a different from other character type lists, which store here that the character iterator @@ -660,7 +664,8 @@ for(;;) while (*next_code == OP_ALT) { - if (!compare_opcodes(code, utf, cb, base_list, base_end)) return FALSE; + if (!compare_opcodes(code, utf, cb, base_list, base_end, rec_limit)) + return FALSE; code = next_code + 1 + LINK_SIZE; next_code += GET(next_code, 1); } @@ -680,7 +685,7 @@ for(;;) /* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */ next_code += 1 + LINK_SIZE; - if (!compare_opcodes(next_code, utf, cb, base_list, base_end)) + if (!compare_opcodes(next_code, utf, cb, base_list, base_end, rec_limit)) return FALSE; code += PRIV(OP_lengths)[c]; @@ -1116,6 +1121,7 @@ register PCRE2_UCHAR c; PCRE2_SPTR end; PCRE2_UCHAR *repeat_opcode; uint32_t list[8]; +int rec_limit; for (;;) { @@ -1130,7 +1136,8 @@ for (;;) get_chr_property_list(code, utf, cb->fcc, list) : NULL; list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO; - if (end != NULL && compare_opcodes(end, utf, cb, list, end)) + rec_limit = 10000; + if (end != NULL && compare_opcodes(end, utf, cb, list, end, &rec_limit)) { switch(c) { @@ -1186,7 +1193,8 @@ for (;;) list[1] = (c & 1) == 0; - if (compare_opcodes(end, utf, cb, list, end)) + rec_limit = 10000; + if (compare_opcodes(end, utf, cb, list, end, &rec_limit)) { switch (c) { diff --git a/testdata/testinput1 b/testdata/testinput1 index 4256a66..c7376fe 100644 --- a/testdata/testinput1 +++ b/testdata/testinput1 @@ -5710,4 +5710,6 @@ name)/mark /(\2)(\1)/ +"Z*(|d*){216}" + # End of testinput1 diff --git a/testdata/testoutput1 b/testdata/testoutput1 index f052f1f..e89aee6 100644 --- a/testdata/testoutput1 +++ b/testdata/testoutput1 @@ -9420,4 +9420,6 @@ No match /(\2)(\1)/ +"Z*(|d*){216}" + # End of testinput1