Fix lookbehind within lookahead within lookbehind misbehaviour bug.
This commit is contained in:
parent
66811c6c73
commit
046c5cd21c
|
@ -90,6 +90,11 @@ Clusterfuzz issue 15636.
|
|||
|
||||
18. Implement non-atomic positive lookaround assertions.
|
||||
|
||||
19. If a lookbehind contained a lookahead that contained another lookbehind
|
||||
within it, the nested lookbehind was not correctly processed. For example, if
|
||||
/(?<=(?=(?<=a)))b/ was matched to "ab" it gave no match instead of matching
|
||||
"b".
|
||||
|
||||
|
||||
Version 10.33 16-April-2019
|
||||
---------------------------
|
||||
|
|
|
@ -135,6 +135,8 @@ static BOOL
|
|||
set_lookbehind_lengths(uint32_t **, int *, int *, int *,
|
||||
parsed_recurse_check *, compile_block *);
|
||||
|
||||
static int
|
||||
check_lookbehinds(uint32_t *, uint32_t **, compile_block *);
|
||||
|
||||
|
||||
/*************************************************
|
||||
|
@ -9108,16 +9110,16 @@ for (;; pptr++)
|
|||
}
|
||||
break;
|
||||
|
||||
/* Lookaheads can be ignored, but we must start the skip inside the group
|
||||
so that it isn't treated as a group within the branch. */
|
||||
/* Lookaheads do not contribute to the length of this branch, but they may
|
||||
contain lookbehinds within them whose lengths need to be set. */
|
||||
|
||||
case META_LOOKAHEAD:
|
||||
case META_LOOKAHEADNOT:
|
||||
case META_LOOKAHEAD_NA:
|
||||
pptr = parsed_skip(pptr + 1, PSKIP_KET);
|
||||
if (pptr == NULL) goto PARSED_SKIP_FAILED;
|
||||
*errcodeptr = check_lookbehinds(pptr + 1, &pptr, cb);
|
||||
if (*errcodeptr != 0) return -1;
|
||||
|
||||
/* Also ignore any qualifiers that follow a lookahead assertion. */
|
||||
/* Ignore any qualifiers that follow a lookahead assertion. */
|
||||
|
||||
switch (pptr[1])
|
||||
{
|
||||
|
@ -9454,21 +9456,29 @@ set_lookbehind_lengths() for each one. At the start, the errorcode is zero and
|
|||
the error offset is marked unset. The enables the functions above not to
|
||||
override settings from deeper nestings.
|
||||
|
||||
Arguments cb points to the compile block
|
||||
This function is called recursively from get_branchlength() for lookaheads in
|
||||
order to process any lookbehinds that they may contain. It stops when it hits a
|
||||
non-nested closing parenthesis in this case, returning a pointer to it.
|
||||
|
||||
Arguments
|
||||
pptr points to where to start (start of pattern or start of lookahead)
|
||||
retptr if not NULL, return the ket pointer here
|
||||
cb points to the compile block
|
||||
|
||||
Returns: 0 on success, or an errorcode (cb->erroroffset will be set)
|
||||
*/
|
||||
|
||||
static int
|
||||
check_lookbehinds(compile_block *cb)
|
||||
check_lookbehinds(uint32_t *pptr, uint32_t **retptr, compile_block *cb)
|
||||
{
|
||||
uint32_t *pptr;
|
||||
int max;
|
||||
int errorcode = 0;
|
||||
int loopcount = 0;
|
||||
int nestlevel = 0;
|
||||
|
||||
cb->erroroffset = PCRE2_UNSET;
|
||||
|
||||
for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
|
||||
for (; *pptr != META_END; pptr++)
|
||||
{
|
||||
if (*pptr < META_END) continue; /* Literal */
|
||||
|
||||
|
@ -9482,14 +9492,31 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
|
|||
pptr += 1;
|
||||
break;
|
||||
|
||||
case META_KET:
|
||||
if (--nestlevel < 0)
|
||||
{
|
||||
if (retptr != NULL) *retptr = pptr;
|
||||
return 0;
|
||||
}
|
||||
break;
|
||||
|
||||
case META_ATOMIC:
|
||||
case META_CAPTURE:
|
||||
case META_COND_ASSERT:
|
||||
case META_LOOKAHEAD:
|
||||
case META_LOOKAHEADNOT:
|
||||
case META_LOOKAHEAD_NA:
|
||||
case META_NOCAPTURE:
|
||||
case META_SCRIPT_RUN:
|
||||
nestlevel++;
|
||||
break;
|
||||
|
||||
case META_ACCEPT:
|
||||
case META_ALT:
|
||||
case META_ASTERISK:
|
||||
case META_ASTERISK_PLUS:
|
||||
case META_ASTERISK_QUERY:
|
||||
case META_ATOMIC:
|
||||
case META_BACKREF:
|
||||
case META_CAPTURE:
|
||||
case META_CIRCUMFLEX:
|
||||
case META_CLASS:
|
||||
case META_CLASS_EMPTY:
|
||||
|
@ -9497,15 +9524,9 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
|
|||
case META_CLASS_END:
|
||||
case META_CLASS_NOT:
|
||||
case META_COMMIT:
|
||||
case META_COND_ASSERT:
|
||||
case META_DOLLAR:
|
||||
case META_DOT:
|
||||
case META_FAIL:
|
||||
case META_KET:
|
||||
case META_LOOKAHEAD:
|
||||
case META_LOOKAHEADNOT:
|
||||
case META_LOOKAHEAD_NA:
|
||||
case META_NOCAPTURE:
|
||||
case META_PLUS:
|
||||
case META_PLUS_PLUS:
|
||||
case META_PLUS_QUERY:
|
||||
|
@ -9515,7 +9536,6 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
|
|||
case META_QUERY_QUERY:
|
||||
case META_RANGE_ESCAPED:
|
||||
case META_RANGE_LITERAL:
|
||||
case META_SCRIPT_RUN:
|
||||
case META_SKIP:
|
||||
case META_THEN:
|
||||
break;
|
||||
|
@ -10021,7 +10041,7 @@ lengths. */
|
|||
|
||||
if (has_lookbehind)
|
||||
{
|
||||
errorcode = check_lookbehinds(&cb);
|
||||
errorcode = check_lookbehinds(cb.parsed_pattern, NULL, &cb);
|
||||
if (errorcode != 0) goto HAD_CB_ERROR;
|
||||
}
|
||||
|
||||
|
|
|
@ -6377,4 +6377,10 @@ ef) x/x,mark
|
|||
/(?<=a(*SKIP)x)|d/
|
||||
abcd
|
||||
|
||||
/(?<=(?=.(?<=x)))/aftertext
|
||||
abx
|
||||
|
||||
/(?<=(?=(?<=a)))b/
|
||||
ab
|
||||
|
||||
# End of testinput1
|
||||
|
|
|
@ -5690,4 +5690,7 @@ a)"xI
|
|||
|
||||
# ----
|
||||
|
||||
/(?<=(?=.(?<=x)))/
|
||||
ab\=ph
|
||||
|
||||
# End of testinput2
|
||||
|
|
|
@ -10097,4 +10097,13 @@ No match
|
|||
abcd
|
||||
0: d
|
||||
|
||||
/(?<=(?=.(?<=x)))/aftertext
|
||||
abx
|
||||
0:
|
||||
0+ x
|
||||
|
||||
/(?<=(?=(?<=a)))b/
|
||||
ab
|
||||
0: b
|
||||
|
||||
# End of testinput1
|
||||
|
|
|
@ -17185,6 +17185,10 @@ Subject length lower bound = 1
|
|||
|
||||
# ----
|
||||
|
||||
/(?<=(?=.(?<=x)))/
|
||||
ab\=ph
|
||||
No match
|
||||
|
||||
# End of testinput2
|
||||
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
|
||||
Error -62: bad serialized data
|
||||
|
|
Loading…
Reference in New Issue