From 6c8945b8d86f3aa4ab028f80be1e874ede9e37dc Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Wed, 28 Jan 2015 11:28:53 +0000 Subject: [PATCH] Fix mutual back reference recursion segfault. --- ChangeLog | 4 ++++ src/pcre2_study.c | 20 +++++++++++--------- testdata/testinput1 | 3 +++ testdata/testoutput1 | 6 ++++++ 4 files changed, 24 insertions(+), 9 deletions(-) diff --git a/ChangeLog b/ChangeLog index 9f704de..308a81f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -40,6 +40,10 @@ for being set in a conditional) that occurred in a part of the pattern where PCRE2_DUPNAMES was not set caused the amount of memory needed for the pattern to be incorrectly calculated, leading to overwriting. +8. A mutually recursive set of back references such as (\2)(\1) caused a +segfault at compile time (while trying to find the minimum matching length). +The infinite loop is now broken (with the minimum length unset, that is, zero). + Version 10.00 05-January-2015 ----------------------------- diff --git a/src/pcre2_study.c b/src/pcre2_study.c index 23a6175..16e61d7 100644 --- a/src/pcre2_study.c +++ b/src/pcre2_study.c @@ -73,11 +73,13 @@ Arguments: re compiled pattern block code pointer to start of group (the bracket) startcode pointer to start of the whole pattern's code - recurse_depth RECURSE depth + recurse_depth RECURSE and/or backreference depth utf UTF flag Returns: the minimum length - -1 if \C in UTF-8 mode or (*ACCEPT) was encountered + -1 \C in UTF-8 mode + or (*ACCEPT) + or too much back reference recursion -2 internal error (missing capturing bracket) -3 internal error (opcode not listed) */ @@ -375,7 +377,7 @@ for (;;) the minimum length for the subpattern. A recursion, however, causes an a flag to be set that causes the length of this branch to be ignored. The logic is that a recursion can only make sense if there is another - alternation that stops the recursing. That will provide the minimum length + alternative that stops the recursing. That will provide the minimum length (when no recursion happens). A backreference within the group that it is referencing behaves in the same way. @@ -398,7 +400,7 @@ for (;;) ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, GET2(slot, 0)); if (cs == NULL) return -2; do ce += GET(ce, 1); while (*ce == OP_ALT); - if (cc > cs && cc < ce) + if ((cc > cs && cc < ce) || recurse_depth > 10) { d = 0; had_recurse = TRUE; @@ -406,7 +408,7 @@ for (;;) } else { - int dd = find_minlength(re, cs, startcode, recurse_depth, utf); + int dd = find_minlength(re, cs, startcode, recurse_depth + 1, utf); if (dd < d) d = dd; } slot += re->name_entry_size; @@ -423,14 +425,14 @@ for (;;) ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, GET2(cc, 1)); if (cs == NULL) return -2; do ce += GET(ce, 1); while (*ce == OP_ALT); - if (cc > cs && cc < ce) + if ((cc > cs && cc < ce) || recurse_depth > 10) { d = 0; had_recurse = TRUE; } else { - d = find_minlength(re, cs, startcode, recurse_depth, utf); + d = find_minlength(re, cs, startcode, recurse_depth + 1, utf); } } else d = 0; @@ -1421,8 +1423,8 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0 && switch(min = find_minlength(re, code, code, 0, utf)) { - case -1: /* \C in UTF mode or (*ACCEPT) was encountered */ - break; + case -1: /* \C in UTF mode or (*ACCEPT) or too much backref recursion */ + break; /* Leave minlength unchanged (will be zero) */ case -2: return 2; /* missing capturing bracket */ diff --git a/testdata/testinput1 b/testdata/testinput1 index fa6c203..2a9e210 100644 --- a/testdata/testinput1 +++ b/testdata/testinput1 @@ -5705,4 +5705,7 @@ name)/mark /(?:((abcd))|(((?:(?:(?:(?:abc|(?:abcdef))))b)abcdefghi)abc)|((*ACCEPT)))/ 1234abcd +/(\2|a)(\1)/ + aaa + # End of testinput1 diff --git a/testdata/testoutput1 b/testdata/testoutput1 index 6bd2e9e..4215beb 100644 --- a/testdata/testoutput1 +++ b/testdata/testoutput1 @@ -9412,4 +9412,10 @@ No match 4: 5: +/(\2|a)(\1)/ + aaa + 0: aa + 1: a + 2: a + # End of testinput1