From 2327a5860b3642bfa0831fffcdb5a08ebf8f2aae Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Sun, 29 Mar 2015 17:34:04 +0000 Subject: [PATCH] Fix bad compile for possessive quantifier on group containing a subroutine reference. --- ChangeLog | 5 +++++ src/pcre2_compile.c | 23 ++++++++++++----------- testdata/testinput2 | 4 ++++ testdata/testoutput2 | 42 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 11 deletions(-) diff --git a/ChangeLog b/ChangeLog index 619029f..1fa674b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -50,6 +50,11 @@ assertion after (?(. The code was failing to check the character after (?(?< for the ! or = that would indicate a lookbehind assertion. This bug was discovered by the LLVM fuzzer. +13. A pattern such as /X((?2)()*+){2}+/ which has a possessive quantifier with +a fixed maximum following a group that contains a subroutine reference was +incorrectly compiled and could trigger buffer overflow. This bug was discovered +by the LLVM fuzzer. + Version 10.10 06-March-2015 --------------------------- diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index b349900..2b04f08 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -1331,7 +1331,7 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE); empty_branch = FALSE; do { - if (!empty_branch && could_be_empty_branch(code, endcode, utf, cb, + if (!empty_branch && could_be_empty_branch(code, endcode, utf, cb, recurses)) empty_branch = TRUE; code += GET(code, 1); } @@ -4520,6 +4520,7 @@ for (;; ptr++) { register int i; int len = (int)(code - previous); + size_t base_hwm_offset = save_hwm_offset; PCRE2_UCHAR *bralink = NULL; PCRE2_UCHAR *brazeroptr = NULL; @@ -4668,20 +4669,20 @@ for (;; ptr++) while (cb->hwm > cb->start_workspace + cb->workspace_size - WORK_SIZE_SAFETY_MARGIN - - (this_hwm_offset - save_hwm_offset)) + (this_hwm_offset - base_hwm_offset)) { *errorcodeptr = expand_workspace(cb); if (*errorcodeptr != 0) goto FAILED; } - for (hc = (PCRE2_UCHAR *)cb->start_workspace + save_hwm_offset; + for (hc = (PCRE2_UCHAR *)cb->start_workspace + base_hwm_offset; hc < (PCRE2_UCHAR *)cb->start_workspace + this_hwm_offset; hc += LINK_SIZE) { PUT(cb->hwm, 0, GET(hc, 0) + len); cb->hwm += LINK_SIZE; } - save_hwm_offset = this_hwm_offset; + base_hwm_offset = this_hwm_offset; code += len; } } @@ -4749,20 +4750,20 @@ for (;; ptr++) while (cb->hwm > cb->start_workspace + cb->workspace_size - WORK_SIZE_SAFETY_MARGIN - - (this_hwm_offset - save_hwm_offset)) + (this_hwm_offset - base_hwm_offset)) { *errorcodeptr = expand_workspace(cb); if (*errorcodeptr != 0) goto FAILED; } - for (hc = (PCRE2_UCHAR *)cb->start_workspace + save_hwm_offset; + for (hc = (PCRE2_UCHAR *)cb->start_workspace + base_hwm_offset; hc < (PCRE2_UCHAR *)cb->start_workspace + this_hwm_offset; hc += LINK_SIZE) { PUT(cb->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1)); cb->hwm += LINK_SIZE; } - save_hwm_offset = this_hwm_offset; + base_hwm_offset = this_hwm_offset; code += len; } @@ -5029,9 +5030,9 @@ for (;; ptr++) /* First deal with comments. Putting this code right at the start ensures that comments have no bad side effects. */ - + if (ptr[0] == CHAR_QUESTION_MARK && ptr[1] == CHAR_NUMBER_SIGN) - { + { ptr += 2; while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; if (*ptr != CHAR_RIGHT_PARENTHESIS) @@ -5163,7 +5164,7 @@ for (;; ptr++) *errorcodeptr = ERR60; /* Verb not recognized */ goto FAILED; } - + /* Initialization for "real" parentheses */ newoptions = options; @@ -5274,7 +5275,7 @@ for (;; ptr++) tempptr[2] == CHAR_EXCLAMATION_MARK || (tempptr[2] == CHAR_LESS_THAN_SIGN && (tempptr[3] == CHAR_EQUALS_SIGN || - tempptr[3] == CHAR_EXCLAMATION_MARK)))) + tempptr[3] == CHAR_EXCLAMATION_MARK)))) { cb->iscondassert = TRUE; break; diff --git a/testdata/testinput2 b/testdata/testinput2 index 5c26c1f..ead245a 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4245,4 +4245,8 @@ a random value. /Ix "(?(?.*!.*)?)" +"X((?2)()*+){2}+"B + +"X((?2)()*+){2}"B + # End of testinput2 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 1c6829f..fda5412 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -14209,4 +14209,46 @@ Failed: error -52: nested recursion at the same subject position "(?(?.*!.*)?)" Failed: error 128 at offset 3: assertion expected after (?( or (?(?C) +"X((?2)()*+){2}+"B +------------------------------------------------------------------ + Bra + X + Once + CBra 1 + Recurse + Braposzero + SCBraPos 2 + KetRpos + Ket + CBra 1 + Recurse + Braposzero + SCBraPos 2 + KetRpos + Ket + Ket + Ket + End +------------------------------------------------------------------ + +"X((?2)()*+){2}"B +------------------------------------------------------------------ + Bra + X + CBra 1 + Recurse + Braposzero + SCBraPos 2 + KetRpos + Ket + CBra 1 + Recurse + Braposzero + SCBraPos 2 + KetRpos + Ket + Ket + End +------------------------------------------------------------------ + # End of testinput2