diff --git a/ChangeLog b/ChangeLog index 6e04931..7a4c838 100644 --- a/ChangeLog +++ b/ChangeLog @@ -41,6 +41,10 @@ the LLVM fuzzer. other kinds of group caused stack overflow at compile time. This bug was discovered by the LLVM fuzzer. +11. A pattern such as /(?1)(?#?'){8}(a)/ which had a parenthesized comment +between a subroutine call and its quantifier was incorrectly compiled, leading +to buffer overflow or other errors. This bug was discovered by the LLVM fuzzer. + Version 10.10 06-March-2015 --------------------------- diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 6937b23..92c204d 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -5025,15 +5025,25 @@ for (;; ptr++) an offset rather than an absolute address. */ case CHAR_LEFT_PARENTHESIS: - newoptions = options; - skipbytes = 0; - bravalue = OP_CBRA; - save_hwm_offset = cb->hwm - cb->start_workspace; - reset_bracount = FALSE; - - /* First deal with various "verbs" that can be introduced by '*'. */ - ptr++; + + /* First deal with comments. Putting this code right at the start ensures + that comments have no bad side effects. */ + + if (ptr[0] == CHAR_QUESTION_MARK && ptr[1] == CHAR_NUMBER_SIGN) + { + ptr += 2; + while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; + if (*ptr != CHAR_RIGHT_PARENTHESIS) + { + *errorcodeptr = ERR18; + goto FAILED; + } + continue; + } + + /* Now deal with various "verbs" that can be introduced by '*'. */ + if (ptr[0] == CHAR_ASTERISK && (ptr[1] == ':' || (MAX_255(ptr[1]) && ((cb->ctypes[ptr[1]] & ctype_letter) != 0)))) { @@ -5153,11 +5163,19 @@ for (;; ptr++) *errorcodeptr = ERR60; /* Verb not recognized */ goto FAILED; } + + /* Initialization for "real" parentheses */ + + newoptions = options; + skipbytes = 0; + bravalue = OP_CBRA; + save_hwm_offset = cb->hwm - cb->start_workspace; + reset_bracount = FALSE; /* Deal with the extended parentheses; all are introduced by '?', and the appearance of any of them means that this is not a capturing group. */ - else if (*ptr == CHAR_QUESTION_MARK) + if (*ptr == CHAR_QUESTION_MARK) { int i, set, unset, namelen; int *optset; @@ -5166,17 +5184,6 @@ for (;; ptr++) switch (*(++ptr)) { - case CHAR_NUMBER_SIGN: /* Comment; skip to ket */ - ptr++; - while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; - if (*ptr != CHAR_RIGHT_PARENTHESIS) - { - *errorcodeptr = ERR18; - goto FAILED; - } - continue; - - /* ------------------------------------------------------------ */ case CHAR_VERTICAL_LINE: /* Reset capture count for each branch */ reset_bracount = TRUE; @@ -5188,7 +5195,6 @@ for (;; ptr++) ptr++; break; - /* ------------------------------------------------------------ */ case CHAR_LEFT_PARENTHESIS: bravalue = OP_COND; /* Conditional group */ diff --git a/testdata/testinput1 b/testdata/testinput1 index c7376fe..e5a0d0f 100644 --- a/testdata/testinput1 +++ b/testdata/testinput1 @@ -5712,4 +5712,7 @@ name)/mark "Z*(|d*){216}" +"(?1)(?#?'){8}(a)" + baaaaaaaaac + # End of testinput1 diff --git a/testdata/testinput8 b/testdata/testinput8 index d1d62fe..88928b9 100644 --- a/testdata/testinput8 +++ b/testdata/testinput8 @@ -140,4 +140,6 @@ /((?+1)(\1))/ +"(?1)(?#?'){2}(a)" + # End of testinput8 diff --git a/testdata/testoutput1 b/testdata/testoutput1 index e89aee6..bd5c222 100644 --- a/testdata/testoutput1 +++ b/testdata/testoutput1 @@ -9422,4 +9422,9 @@ No match "Z*(|d*){216}" +"(?1)(?#?'){8}(a)" + baaaaaaaaac + 0: aaaaaaaaa + 1: a + # End of testinput1 diff --git a/testdata/testoutput8-16 b/testdata/testoutput8-16 index 354f2a4..a4ea173 100644 --- a/testdata/testoutput8-16 +++ b/testdata/testoutput8-16 @@ -754,4 +754,20 @@ Memory allocation (code space): 14 22 End ------------------------------------------------------------------ +"(?1)(?#?'){2}(a)" +------------------------------------------------------------------ + 0 21 Bra + 2 4 Once + 4 14 Recurse + 6 4 Ket + 8 4 Once + 10 14 Recurse + 12 4 Ket + 14 5 CBra 1 + 17 a + 19 5 Ket + 21 21 Ket + 23 End +------------------------------------------------------------------ + # End of testinput8 diff --git a/testdata/testoutput8-32 b/testdata/testoutput8-32 index b4ca6d2..579e1b1 100644 --- a/testdata/testoutput8-32 +++ b/testdata/testoutput8-32 @@ -754,4 +754,20 @@ Memory allocation (code space): 28 22 End ------------------------------------------------------------------ +"(?1)(?#?'){2}(a)" +------------------------------------------------------------------ + 0 21 Bra + 2 4 Once + 4 14 Recurse + 6 4 Ket + 8 4 Once + 10 14 Recurse + 12 4 Ket + 14 5 CBra 1 + 17 a + 19 5 Ket + 21 21 Ket + 23 End +------------------------------------------------------------------ + # End of testinput8 diff --git a/testdata/testoutput8-8 b/testdata/testoutput8-8 index 6bdbd98..7afa8d8 100644 --- a/testdata/testoutput8-8 +++ b/testdata/testoutput8-8 @@ -754,4 +754,20 @@ Memory allocation (code space): 10 34 End ------------------------------------------------------------------ +"(?1)(?#?'){2}(a)" +------------------------------------------------------------------ + 0 31 Bra + 3 6 Once + 6 21 Recurse + 9 6 Ket + 12 6 Once + 15 21 Recurse + 18 6 Ket + 21 7 CBra 1 + 26 a + 28 7 Ket + 31 31 Ket + 34 End +------------------------------------------------------------------ + # End of testinput8