From 36366914f502a466de2349ce53239e1b20d0e586 Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Sun, 29 Oct 2017 16:58:38 +0000 Subject: [PATCH] Fix oss-fuzz bugs 3852 and 3891 (same bug); mis-closing external captures by *ACCEPT inside assertions. --- ChangeLog | 6 ++++++ src/pcre2_compile.c | 12 ++++++++---- src/pcre2_internal.h | 1 + src/pcre2_intmodedep.h | 4 ++-- testdata/testinput1 | 10 ++++++++++ testdata/testoutput1 | 17 +++++++++++++++++ 6 files changed, 44 insertions(+), 6 deletions(-) diff --git a/ChangeLog b/ChangeLog index 35c7ec0..a0ad0b2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -39,6 +39,12 @@ processing files with the -r option, and also (some very odd code) truncating path names to 512 characters. There is now a check on the absolute length of full path file names, which may be up to 2047 characters long. +12. When an assertion contained (*ACCEPT) it caused all open capturing groups +to be closed (as for a non-assertion ACCEPT), which was wrong and could lead to +misbehaviour for subsequent references to groups that started outside the +recursion. ACCEPT in an assertion now closes only those groups that were +started within that assertion. Fixes oss-fuzz issues 3852 and 3891. + Version 10.30 14-August-2017 ---------------------------- diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 3a02756..0b91d14 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -2194,7 +2194,7 @@ manage_callouts(PCRE2_SPTR ptr, uint32_t **pcalloutptr, BOOL auto_callout, { uint32_t *previous_callout = *pcalloutptr; -if (previous_callout != NULL) previous_callout[2] = (uint32_t)(ptr - +if (previous_callout != NULL) previous_callout[2] = (uint32_t)(ptr - cb->start_pattern - (PCRE2_SIZE)previous_callout[1]); if (!auto_callout) previous_callout = NULL; else @@ -5599,14 +5599,17 @@ for (;; pptr++) /* ===================================================================*/ /* Deal with (*VERB)s. */ - /* Check for open captures before ACCEPT and convert it to ASSERT_ACCEPT if - in an assertion. In the first pass, just accumulate the length required; + /* Check for open captures before ACCEPT and close those that are within + the same assertion level, also converting ACCEPT to ASSERT_ACCEPT in an + assertion. In the first pass, just accumulate the length required; otherwise hitting (*ACCEPT) inside many nested parentheses can cause workspace overflow. Do not set firstcu after *ACCEPT. */ case META_ACCEPT: cb->had_accept = TRUE; - for (oc = cb->open_caps; oc != NULL; oc = oc->next) + for (oc = cb->open_caps; + oc != NULL && oc->assert_depth >= cb->assert_depth; + oc = oc->next) { if (lengthptr != NULL) { @@ -7483,6 +7486,7 @@ if (*code == OP_CBRA) capitem.number = capnumber; capitem.next = cb->open_caps; capitem.flag = FALSE; + capitem.assert_depth = cb->assert_depth; cb->open_caps = &capitem; } diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h index d1cb1c0..4443c41 100644 --- a/src/pcre2_internal.h +++ b/src/pcre2_internal.h @@ -1770,6 +1770,7 @@ typedef struct open_capitem { struct open_capitem *next; /* Chain link */ uint16_t number; /* Capture number */ uint16_t flag; /* Set TRUE if recursive back ref */ + uint16_t assert_depth; /* Assertion depth when opened */ } open_capitem; /* Layout of the UCP type table that translates property names into types and diff --git a/src/pcre2_intmodedep.h b/src/pcre2_intmodedep.h index 387f65e..ad67060 100644 --- a/src/pcre2_intmodedep.h +++ b/src/pcre2_intmodedep.h @@ -723,6 +723,8 @@ typedef struct compile_block { PCRE2_SIZE erroroffset; /* Offset of error in pattern */ uint16_t names_found; /* Number of entries so far */ uint16_t name_entry_size; /* Size of each entry */ + uint16_t parens_depth; /* Depth of nested parentheses */ + uint16_t assert_depth; /* Depth of nested assertions */ open_capitem *open_caps; /* Chain of open capture items */ named_group *named_groups; /* Points to vector in pre-compile */ uint32_t named_group_list_size; /* Number of entries in the list */ @@ -741,8 +743,6 @@ typedef struct compile_block { uint32_t class_range_end; /* Overall class range end */ PCRE2_UCHAR nl[4]; /* Newline string when fixed length */ int max_lookbehind; /* Maximum lookbehind (characters) */ - int parens_depth; /* Depth of nested parentheses */ - int assert_depth; /* Depth of nested assertions */ int req_varyopt; /* "After variable item" flag for reqbyte */ BOOL had_accept; /* (*ACCEPT) encountered */ BOOL had_pruneorskip; /* (*PRUNE) or (*SKIP) encountered */ diff --git a/testdata/testinput1 b/testdata/testinput1 index 611a88c..756b4af 100644 --- a/testdata/testinput1 +++ b/testdata/testinput1 @@ -6149,4 +6149,14 @@ ef) x/x,mark /[[:digit:]-]+/ 12-24 +/((?<=((*ACCEPT)) )\1?\b) / +\= Expect no match + ((?<=((*ACCEPT)) )\\1?\\b)\x20 + +/((?<=((*ACCEPT))X)\1?Y)\1/ + XYYZ + +/((?<=((*ACCEPT))X)\1?Y(*ACCEPT))\1/ + XYYZ + # End of testinput1 diff --git a/testdata/testoutput1 b/testdata/testoutput1 index 75ac0ca..7cf647e 100644 --- a/testdata/testoutput1 +++ b/testdata/testoutput1 @@ -9741,4 +9741,21 @@ No match 12-24 0: 12-24 +/((?<=((*ACCEPT)) )\1?\b) / +\= Expect no match + ((?<=((*ACCEPT)) )\\1?\\b)\x20 +No match + +/((?<=((*ACCEPT))X)\1?Y)\1/ + XYYZ + 0: YY + 1: Y + 2: + +/((?<=((*ACCEPT))X)\1?Y(*ACCEPT))\1/ + XYYZ + 0: Y + 1: Y + 2: + # End of testinput1