Fix lookbehind within lookahead within lookbehind misbehaviour bug.
This commit is contained in:
parent
66811c6c73
commit
046c5cd21c
|
@ -90,6 +90,11 @@ Clusterfuzz issue 15636.
|
||||||
|
|
||||||
18. Implement non-atomic positive lookaround assertions.
|
18. Implement non-atomic positive lookaround assertions.
|
||||||
|
|
||||||
|
19. If a lookbehind contained a lookahead that contained another lookbehind
|
||||||
|
within it, the nested lookbehind was not correctly processed. For example, if
|
||||||
|
/(?<=(?=(?<=a)))b/ was matched to "ab" it gave no match instead of matching
|
||||||
|
"b".
|
||||||
|
|
||||||
|
|
||||||
Version 10.33 16-April-2019
|
Version 10.33 16-April-2019
|
||||||
---------------------------
|
---------------------------
|
||||||
|
|
|
@ -135,6 +135,8 @@ static BOOL
|
||||||
set_lookbehind_lengths(uint32_t **, int *, int *, int *,
|
set_lookbehind_lengths(uint32_t **, int *, int *, int *,
|
||||||
parsed_recurse_check *, compile_block *);
|
parsed_recurse_check *, compile_block *);
|
||||||
|
|
||||||
|
static int
|
||||||
|
check_lookbehinds(uint32_t *, uint32_t **, compile_block *);
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
/*************************************************
|
||||||
|
@ -9108,16 +9110,16 @@ for (;; pptr++)
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* Lookaheads can be ignored, but we must start the skip inside the group
|
/* Lookaheads do not contribute to the length of this branch, but they may
|
||||||
so that it isn't treated as a group within the branch. */
|
contain lookbehinds within them whose lengths need to be set. */
|
||||||
|
|
||||||
case META_LOOKAHEAD:
|
case META_LOOKAHEAD:
|
||||||
case META_LOOKAHEADNOT:
|
case META_LOOKAHEADNOT:
|
||||||
case META_LOOKAHEAD_NA:
|
case META_LOOKAHEAD_NA:
|
||||||
pptr = parsed_skip(pptr + 1, PSKIP_KET);
|
*errcodeptr = check_lookbehinds(pptr + 1, &pptr, cb);
|
||||||
if (pptr == NULL) goto PARSED_SKIP_FAILED;
|
if (*errcodeptr != 0) return -1;
|
||||||
|
|
||||||
/* Also ignore any qualifiers that follow a lookahead assertion. */
|
/* Ignore any qualifiers that follow a lookahead assertion. */
|
||||||
|
|
||||||
switch (pptr[1])
|
switch (pptr[1])
|
||||||
{
|
{
|
||||||
|
@ -9454,21 +9456,29 @@ set_lookbehind_lengths() for each one. At the start, the errorcode is zero and
|
||||||
the error offset is marked unset. The enables the functions above not to
|
the error offset is marked unset. The enables the functions above not to
|
||||||
override settings from deeper nestings.
|
override settings from deeper nestings.
|
||||||
|
|
||||||
Arguments cb points to the compile block
|
This function is called recursively from get_branchlength() for lookaheads in
|
||||||
|
order to process any lookbehinds that they may contain. It stops when it hits a
|
||||||
|
non-nested closing parenthesis in this case, returning a pointer to it.
|
||||||
|
|
||||||
|
Arguments
|
||||||
|
pptr points to where to start (start of pattern or start of lookahead)
|
||||||
|
retptr if not NULL, return the ket pointer here
|
||||||
|
cb points to the compile block
|
||||||
|
|
||||||
Returns: 0 on success, or an errorcode (cb->erroroffset will be set)
|
Returns: 0 on success, or an errorcode (cb->erroroffset will be set)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static int
|
static int
|
||||||
check_lookbehinds(compile_block *cb)
|
check_lookbehinds(uint32_t *pptr, uint32_t **retptr, compile_block *cb)
|
||||||
{
|
{
|
||||||
uint32_t *pptr;
|
|
||||||
int max;
|
int max;
|
||||||
int errorcode = 0;
|
int errorcode = 0;
|
||||||
int loopcount = 0;
|
int loopcount = 0;
|
||||||
|
int nestlevel = 0;
|
||||||
|
|
||||||
cb->erroroffset = PCRE2_UNSET;
|
cb->erroroffset = PCRE2_UNSET;
|
||||||
|
|
||||||
for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
|
for (; *pptr != META_END; pptr++)
|
||||||
{
|
{
|
||||||
if (*pptr < META_END) continue; /* Literal */
|
if (*pptr < META_END) continue; /* Literal */
|
||||||
|
|
||||||
|
@ -9482,14 +9492,31 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
|
||||||
pptr += 1;
|
pptr += 1;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case META_KET:
|
||||||
|
if (--nestlevel < 0)
|
||||||
|
{
|
||||||
|
if (retptr != NULL) *retptr = pptr;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case META_ATOMIC:
|
||||||
|
case META_CAPTURE:
|
||||||
|
case META_COND_ASSERT:
|
||||||
|
case META_LOOKAHEAD:
|
||||||
|
case META_LOOKAHEADNOT:
|
||||||
|
case META_LOOKAHEAD_NA:
|
||||||
|
case META_NOCAPTURE:
|
||||||
|
case META_SCRIPT_RUN:
|
||||||
|
nestlevel++;
|
||||||
|
break;
|
||||||
|
|
||||||
case META_ACCEPT:
|
case META_ACCEPT:
|
||||||
case META_ALT:
|
case META_ALT:
|
||||||
case META_ASTERISK:
|
case META_ASTERISK:
|
||||||
case META_ASTERISK_PLUS:
|
case META_ASTERISK_PLUS:
|
||||||
case META_ASTERISK_QUERY:
|
case META_ASTERISK_QUERY:
|
||||||
case META_ATOMIC:
|
|
||||||
case META_BACKREF:
|
case META_BACKREF:
|
||||||
case META_CAPTURE:
|
|
||||||
case META_CIRCUMFLEX:
|
case META_CIRCUMFLEX:
|
||||||
case META_CLASS:
|
case META_CLASS:
|
||||||
case META_CLASS_EMPTY:
|
case META_CLASS_EMPTY:
|
||||||
|
@ -9497,15 +9524,9 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
|
||||||
case META_CLASS_END:
|
case META_CLASS_END:
|
||||||
case META_CLASS_NOT:
|
case META_CLASS_NOT:
|
||||||
case META_COMMIT:
|
case META_COMMIT:
|
||||||
case META_COND_ASSERT:
|
|
||||||
case META_DOLLAR:
|
case META_DOLLAR:
|
||||||
case META_DOT:
|
case META_DOT:
|
||||||
case META_FAIL:
|
case META_FAIL:
|
||||||
case META_KET:
|
|
||||||
case META_LOOKAHEAD:
|
|
||||||
case META_LOOKAHEADNOT:
|
|
||||||
case META_LOOKAHEAD_NA:
|
|
||||||
case META_NOCAPTURE:
|
|
||||||
case META_PLUS:
|
case META_PLUS:
|
||||||
case META_PLUS_PLUS:
|
case META_PLUS_PLUS:
|
||||||
case META_PLUS_QUERY:
|
case META_PLUS_QUERY:
|
||||||
|
@ -9515,7 +9536,6 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
|
||||||
case META_QUERY_QUERY:
|
case META_QUERY_QUERY:
|
||||||
case META_RANGE_ESCAPED:
|
case META_RANGE_ESCAPED:
|
||||||
case META_RANGE_LITERAL:
|
case META_RANGE_LITERAL:
|
||||||
case META_SCRIPT_RUN:
|
|
||||||
case META_SKIP:
|
case META_SKIP:
|
||||||
case META_THEN:
|
case META_THEN:
|
||||||
break;
|
break;
|
||||||
|
@ -10021,7 +10041,7 @@ lengths. */
|
||||||
|
|
||||||
if (has_lookbehind)
|
if (has_lookbehind)
|
||||||
{
|
{
|
||||||
errorcode = check_lookbehinds(&cb);
|
errorcode = check_lookbehinds(cb.parsed_pattern, NULL, &cb);
|
||||||
if (errorcode != 0) goto HAD_CB_ERROR;
|
if (errorcode != 0) goto HAD_CB_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -6377,4 +6377,10 @@ ef) x/x,mark
|
||||||
/(?<=a(*SKIP)x)|d/
|
/(?<=a(*SKIP)x)|d/
|
||||||
abcd
|
abcd
|
||||||
|
|
||||||
|
/(?<=(?=.(?<=x)))/aftertext
|
||||||
|
abx
|
||||||
|
|
||||||
|
/(?<=(?=(?<=a)))b/
|
||||||
|
ab
|
||||||
|
|
||||||
# End of testinput1
|
# End of testinput1
|
||||||
|
|
|
@ -5690,4 +5690,7 @@ a)"xI
|
||||||
|
|
||||||
# ----
|
# ----
|
||||||
|
|
||||||
|
/(?<=(?=.(?<=x)))/
|
||||||
|
ab\=ph
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
|
@ -10097,4 +10097,13 @@ No match
|
||||||
abcd
|
abcd
|
||||||
0: d
|
0: d
|
||||||
|
|
||||||
|
/(?<=(?=.(?<=x)))/aftertext
|
||||||
|
abx
|
||||||
|
0:
|
||||||
|
0+ x
|
||||||
|
|
||||||
|
/(?<=(?=(?<=a)))b/
|
||||||
|
ab
|
||||||
|
0: b
|
||||||
|
|
||||||
# End of testinput1
|
# End of testinput1
|
||||||
|
|
|
@ -17185,6 +17185,10 @@ Subject length lower bound = 1
|
||||||
|
|
||||||
# ----
|
# ----
|
||||||
|
|
||||||
|
/(?<=(?=.(?<=x)))/
|
||||||
|
ab\=ph
|
||||||
|
No match
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
|
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
|
||||||
Error -62: bad serialized data
|
Error -62: bad serialized data
|
||||||
|
|
Loading…
Reference in New Issue