Fix lookbehind within lookahead within lookbehind misbehaviour bug.

This commit is contained in:
Philip.Hazel 2019-07-16 15:06:21 +00:00
parent 66811c6c73
commit 046c5cd21c
6 changed files with 76 additions and 29 deletions

View File

@ -90,6 +90,11 @@ Clusterfuzz issue 15636.
18. Implement non-atomic positive lookaround assertions. 18. Implement non-atomic positive lookaround assertions.
19. If a lookbehind contained a lookahead that contained another lookbehind
within it, the nested lookbehind was not correctly processed. For example, if
/(?<=(?=(?<=a)))b/ was matched to "ab" it gave no match instead of matching
"b".
Version 10.33 16-April-2019 Version 10.33 16-April-2019
--------------------------- ---------------------------

View File

@ -135,6 +135,8 @@ static BOOL
set_lookbehind_lengths(uint32_t **, int *, int *, int *, set_lookbehind_lengths(uint32_t **, int *, int *, int *,
parsed_recurse_check *, compile_block *); parsed_recurse_check *, compile_block *);
static int
check_lookbehinds(uint32_t *, uint32_t **, compile_block *);
/************************************************* /*************************************************
@ -9108,16 +9110,16 @@ for (;; pptr++)
} }
break; break;
/* Lookaheads can be ignored, but we must start the skip inside the group /* Lookaheads do not contribute to the length of this branch, but they may
so that it isn't treated as a group within the branch. */ contain lookbehinds within them whose lengths need to be set. */
case META_LOOKAHEAD: case META_LOOKAHEAD:
case META_LOOKAHEADNOT: case META_LOOKAHEADNOT:
case META_LOOKAHEAD_NA: case META_LOOKAHEAD_NA:
pptr = parsed_skip(pptr + 1, PSKIP_KET); *errcodeptr = check_lookbehinds(pptr + 1, &pptr, cb);
if (pptr == NULL) goto PARSED_SKIP_FAILED; if (*errcodeptr != 0) return -1;
/* Also ignore any qualifiers that follow a lookahead assertion. */ /* Ignore any qualifiers that follow a lookahead assertion. */
switch (pptr[1]) switch (pptr[1])
{ {
@ -9454,21 +9456,29 @@ set_lookbehind_lengths() for each one. At the start, the errorcode is zero and
the error offset is marked unset. The enables the functions above not to the error offset is marked unset. The enables the functions above not to
override settings from deeper nestings. override settings from deeper nestings.
Arguments cb points to the compile block This function is called recursively from get_branchlength() for lookaheads in
order to process any lookbehinds that they may contain. It stops when it hits a
non-nested closing parenthesis in this case, returning a pointer to it.
Arguments
pptr points to where to start (start of pattern or start of lookahead)
retptr if not NULL, return the ket pointer here
cb points to the compile block
Returns: 0 on success, or an errorcode (cb->erroroffset will be set) Returns: 0 on success, or an errorcode (cb->erroroffset will be set)
*/ */
static int static int
check_lookbehinds(compile_block *cb) check_lookbehinds(uint32_t *pptr, uint32_t **retptr, compile_block *cb)
{ {
uint32_t *pptr;
int max; int max;
int errorcode = 0; int errorcode = 0;
int loopcount = 0; int loopcount = 0;
int nestlevel = 0;
cb->erroroffset = PCRE2_UNSET; cb->erroroffset = PCRE2_UNSET;
for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++) for (; *pptr != META_END; pptr++)
{ {
if (*pptr < META_END) continue; /* Literal */ if (*pptr < META_END) continue; /* Literal */
@ -9482,14 +9492,31 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
pptr += 1; pptr += 1;
break; break;
case META_KET:
if (--nestlevel < 0)
{
if (retptr != NULL) *retptr = pptr;
return 0;
}
break;
case META_ATOMIC:
case META_CAPTURE:
case META_COND_ASSERT:
case META_LOOKAHEAD:
case META_LOOKAHEADNOT:
case META_LOOKAHEAD_NA:
case META_NOCAPTURE:
case META_SCRIPT_RUN:
nestlevel++;
break;
case META_ACCEPT: case META_ACCEPT:
case META_ALT: case META_ALT:
case META_ASTERISK: case META_ASTERISK:
case META_ASTERISK_PLUS: case META_ASTERISK_PLUS:
case META_ASTERISK_QUERY: case META_ASTERISK_QUERY:
case META_ATOMIC:
case META_BACKREF: case META_BACKREF:
case META_CAPTURE:
case META_CIRCUMFLEX: case META_CIRCUMFLEX:
case META_CLASS: case META_CLASS:
case META_CLASS_EMPTY: case META_CLASS_EMPTY:
@ -9497,15 +9524,9 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
case META_CLASS_END: case META_CLASS_END:
case META_CLASS_NOT: case META_CLASS_NOT:
case META_COMMIT: case META_COMMIT:
case META_COND_ASSERT:
case META_DOLLAR: case META_DOLLAR:
case META_DOT: case META_DOT:
case META_FAIL: case META_FAIL:
case META_KET:
case META_LOOKAHEAD:
case META_LOOKAHEADNOT:
case META_LOOKAHEAD_NA:
case META_NOCAPTURE:
case META_PLUS: case META_PLUS:
case META_PLUS_PLUS: case META_PLUS_PLUS:
case META_PLUS_QUERY: case META_PLUS_QUERY:
@ -9515,7 +9536,6 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
case META_QUERY_QUERY: case META_QUERY_QUERY:
case META_RANGE_ESCAPED: case META_RANGE_ESCAPED:
case META_RANGE_LITERAL: case META_RANGE_LITERAL:
case META_SCRIPT_RUN:
case META_SKIP: case META_SKIP:
case META_THEN: case META_THEN:
break; break;
@ -10021,7 +10041,7 @@ lengths. */
if (has_lookbehind) if (has_lookbehind)
{ {
errorcode = check_lookbehinds(&cb); errorcode = check_lookbehinds(cb.parsed_pattern, NULL, &cb);
if (errorcode != 0) goto HAD_CB_ERROR; if (errorcode != 0) goto HAD_CB_ERROR;
} }

6
testdata/testinput1 vendored
View File

@ -6377,4 +6377,10 @@ ef) x/x,mark
/(?<=a(*SKIP)x)|d/ /(?<=a(*SKIP)x)|d/
abcd abcd
/(?<=(?=.(?<=x)))/aftertext
abx
/(?<=(?=(?<=a)))b/
ab
# End of testinput1 # End of testinput1

3
testdata/testinput2 vendored
View File

@ -5690,4 +5690,7 @@ a)"xI
# ---- # ----
/(?<=(?=.(?<=x)))/
ab\=ph
# End of testinput2 # End of testinput2

View File

@ -10097,4 +10097,13 @@ No match
abcd abcd
0: d 0: d
/(?<=(?=.(?<=x)))/aftertext
abx
0:
0+ x
/(?<=(?=(?<=a)))b/
ab
0: b
# End of testinput1 # End of testinput1

View File

@ -17185,6 +17185,10 @@ Subject length lower bound = 1
# ---- # ----
/(?<=(?=.(?<=x)))/
ab\=ph
No match
# End of testinput2 # End of testinput2
Error -70: PCRE2_ERROR_BADDATA (unknown error number) Error -70: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data Error -62: bad serialized data