From 5cd731f314e0d4e44a151fa786b678539413fbc6 Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Thu, 16 Jul 2015 17:47:20 +0000 Subject: [PATCH] Fix crash due to incorrect pre-pass handling of callouts with string arguments, and tighten up the callout handling. --- ChangeLog | 7 ++++++ src/pcre2_compile.c | 57 ++++++++++++++++++++++++++++++++++++++++++-- testdata/testinput2 | 4 ++++ testdata/testoutput2 | 6 +++++ 4 files changed, 72 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index c13a786..6b3177e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -30,6 +30,13 @@ discovered by Karl Skomski with the LLVM fuzzer. own source module to avoid a circular dependency between src/pcre2_compile.c and src/pcre2_study.c +8. A callout with a string argument containing an opening square bracket, for +example /(?C$[$)(?<]/, was incorrectly processed and could provoke a buffer +overflow. This bug was discovered by Karl Skomski with the LLVM fuzzer. + +9. The handling of callouts during the pre-pass for named group identification +has been tightened up. + Version 10.20 30-June-2015 -------------------------- diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 84c1e65..7e0d4fc 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -2988,6 +2988,7 @@ static uint32_t scan_for_captures(PCRE2_SPTR *ptrptr, uint32_t options, compile_block *cb) { uint32_t c; +uint32_t delimiter; uint32_t nest_depth = 0; uint32_t set, unset, *optset; int errorcode = 0; @@ -2999,6 +3000,7 @@ BOOL isdupname; BOOL utf = (options & PCRE2_UTF) != 0; BOOL negate_class; PCRE2_SPTR name; +PCRE2_SPTR start; PCRE2_SPTR ptr = *ptrptr; named_group *ng; nest_save *top_nest = NULL; @@ -3176,7 +3178,6 @@ for (; ptr < cb->end_pattern; ptr++) default: ptr += 2; if (ptr[0] == CHAR_R || /* (?R) */ - ptr[0] == CHAR_C || /* (?C) */ IS_DIGIT(ptr[0]) || /* (?n) */ (ptr[0] == CHAR_MINUS && IS_DIGIT(ptr[1]))) break; /* (?-n) */ @@ -3252,7 +3253,57 @@ for (; ptr < cb->end_pattern; ptr++) else top_nest->nest_depth = nest_depth; } break; + + /* Skip over a numerical or string argument for a callout. */ + + case CHAR_C: + ptr += 2; + if (ptr[1] == CHAR_RIGHT_PARENTHESIS) break; + if (IS_DIGIT(ptr[1])) + { + while (IS_DIGIT(ptr[1])) ptr++; + if (ptr[1] != CHAR_RIGHT_PARENTHESIS) + { + errorcode = ERR39; + ptr++; + goto FAILED; + } + break; + } + /* Handle a string argument */ + + ptr++; + delimiter = 0; + for (i = 0; PRIV(callout_start_delims)[i] != 0; i++) + { + if (*ptr == PRIV(callout_start_delims)[i]) + { + delimiter = PRIV(callout_end_delims)[i]; + break; + } + } + + if (delimiter == 0) + { + errorcode = ERR82; + goto FAILED; + } + + start = ptr; + do + { + if (++ptr >= cb->end_pattern) + { + errorcode = ERR81; + ptr = start; /* To give a more useful message */ + goto FAILED; + } + if (ptr[0] == delimiter && ptr[1] == delimiter) ptr += 2; + } + while (ptr[0] != delimiter); + break; + case CHAR_NUMBER_SIGN: ptr += 3; while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; @@ -6062,7 +6113,9 @@ for (;; ptr++) } /* During the pre-compile phase, we parse the string and update the - length. There is no need to generate any code. */ + length. There is no need to generate any code. (In fact, the string + has already been parsed in the pre-pass that looks for named + parentheses, but it does no harm to leave this code in.) */ if (lengthptr != NULL) /* Only check the string */ { diff --git a/testdata/testinput2 b/testdata/testinput2 index 463943a..8ab3043 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4331,4 +4331,8 @@ a random value. /Ix /((?x)(*:0))#(?'/ +/(?C$[$)(?<]/ + +/(?C$)$)(?<]/ + # End of testinput2 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 477ea16..c446aaf 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -14476,4 +14476,10 @@ Failed: error 124 at offset 10: unrecognized character after (?< /((?x)(*:0))#(?'/ Failed: error 124 at offset 15: unrecognized character after (?< +/(?C$[$)(?<]/ +Failed: error 124 at offset 10: unrecognized character after (?< + +/(?C$)$)(?<]/ +Failed: error 124 at offset 10: unrecognized character after (?< + # End of testinput2