Fix crash for forward reference in lookbehind with PCRE2_ANCHORED. Fixes
oss-fuzz issue 865.
This commit is contained in:
parent
ee1f7900e9
commit
d7da2dcb37
|
@ -46,6 +46,12 @@ available for backwards compatibility.
|
||||||
for a character with a code point greater than 0x10ffff (the Unicode maximum)
|
for a character with a code point greater than 0x10ffff (the Unicode maximum)
|
||||||
caused a crash.
|
caused a crash.
|
||||||
|
|
||||||
|
5. If a lookbehind assertion that contained a back reference to a group
|
||||||
|
appearing later in the pattern was compiled with the PCRE2_ANCHORED option,
|
||||||
|
undefined actions (often a segmentation fault) could occur, depending on what
|
||||||
|
other options were set. An example assertion is (?<!\1(abc)) where the
|
||||||
|
reference \1 precedes the group (abc). This fixes oss-fuzz issue 865.
|
||||||
|
|
||||||
|
|
||||||
Version 10.23 14-February-2017
|
Version 10.23 14-February-2017
|
||||||
------------------------------
|
------------------------------
|
||||||
|
|
|
@ -8090,6 +8090,10 @@ the end of the branch, it is called to skip over an internal lookaround, and it
|
||||||
is also called to skip to the end of a class, during which it will never
|
is also called to skip to the end of a class, during which it will never
|
||||||
encounter nested groups (but there's no need to have special code for that).
|
encounter nested groups (but there's no need to have special code for that).
|
||||||
|
|
||||||
|
When called to find the end of a branch or group, pptr must point to the first
|
||||||
|
meta code inside the branch, not the branch-starting code. In other cases it
|
||||||
|
can point to the item that causes the function to be called.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
pptr current pointer to skip from
|
pptr current pointer to skip from
|
||||||
skiptype PSKIP_CLASS when skipping to end of class
|
skiptype PSKIP_CLASS when skipping to end of class
|
||||||
|
@ -8106,7 +8110,7 @@ parsed_skip(uint32_t *pptr, uint32_t skiptype)
|
||||||
{
|
{
|
||||||
uint32_t nestlevel = 0;
|
uint32_t nestlevel = 0;
|
||||||
|
|
||||||
for (pptr += 1;; pptr++)
|
for (;; pptr++)
|
||||||
{
|
{
|
||||||
uint32_t meta = META_CODE(*pptr);
|
uint32_t meta = META_CODE(*pptr);
|
||||||
|
|
||||||
|
@ -8201,11 +8205,12 @@ return pptr;
|
||||||
/* This is called for nested groups within a branch of a lookbehind whose
|
/* This is called for nested groups within a branch of a lookbehind whose
|
||||||
length is being computed. If all the branches in the nested group have the same
|
length is being computed. If all the branches in the nested group have the same
|
||||||
length, that is OK. On entry, the pointer must be at the first element after
|
length, that is OK. On entry, the pointer must be at the first element after
|
||||||
the group initializing code. Caching is used to improve processing speed when
|
the group initializing code. On exit it points to OP_KET. Caching is used to
|
||||||
the same capturing group occurs many times.
|
improve processing speed when the same capturing group occurs many times.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
pptrptr pointer to pointer in the parsed pattern
|
pptrptr pointer to pointer in the parsed pattern
|
||||||
|
isinline FALSE if a reference or recursion; TRUE for inline group
|
||||||
errcodeptr pointer to the errorcode
|
errcodeptr pointer to the errorcode
|
||||||
lcptr pointer to the loop counter
|
lcptr pointer to the loop counter
|
||||||
group number of captured group or -1 for a non-capturing group
|
group number of captured group or -1 for a non-capturing group
|
||||||
|
@ -8216,27 +8221,29 @@ Returns: the group length or a negative number
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static int
|
static int
|
||||||
get_grouplength(uint32_t **pptrptr, int *errcodeptr, int *lcptr,
|
get_grouplength(uint32_t **pptrptr, BOOL isinline, int *errcodeptr, int *lcptr,
|
||||||
int group, parsed_recurse_check *recurses, compile_block *cb)
|
int group, parsed_recurse_check *recurses, compile_block *cb)
|
||||||
{
|
{
|
||||||
int branchlength;
|
int branchlength;
|
||||||
int grouplength = -1;
|
int grouplength = -1;
|
||||||
|
|
||||||
/* The cache can be used only if there is no possibility of there being two
|
/* The cache can be used only if there is no possibility of there being two
|
||||||
groups with the same number. */
|
groups with the same number. We do not need to set the end pointer for a group
|
||||||
|
that is being processed as a back reference or recursion, but we must do so for
|
||||||
|
an inline group. */
|
||||||
|
|
||||||
if (group > 0)
|
if (group > 0 && (cb->external_flags & PCRE2_DUPCAPUSED) == 0)
|
||||||
{
|
{
|
||||||
uint32_t groupinfo = cb->groupinfo[group];
|
uint32_t groupinfo = cb->groupinfo[group];
|
||||||
if ((cb->external_flags & PCRE2_DUPCAPUSED) == 0)
|
if ((groupinfo & GI_NOT_FIXED_LENGTH) != 0) return -1;
|
||||||
|
if ((groupinfo & GI_SET_FIXED_LENGTH) != 0)
|
||||||
{
|
{
|
||||||
if ((groupinfo & GI_NOT_FIXED_LENGTH) != 0) return -1;
|
if (isinline) *pptrptr = parsed_skip(*pptrptr, PSKIP_KET);
|
||||||
if ((groupinfo & GI_SET_FIXED_LENGTH) != 0)
|
return groupinfo & GI_FIXED_LENGTH_MASK;
|
||||||
return groupinfo & GI_FIXED_LENGTH_MASK;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Scan the group */
|
/* Scan the group. In this case we find the end pointer of necessity. */
|
||||||
|
|
||||||
for(;;)
|
for(;;)
|
||||||
{
|
{
|
||||||
|
@ -8394,11 +8401,12 @@ for (;; pptr++)
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* Lookaheads can be ignored. */
|
/* Lookaheads can be ignored, but we must start the skip inside the group
|
||||||
|
so that it isn't treated as a group within the branch. */
|
||||||
|
|
||||||
case META_LOOKAHEAD:
|
case META_LOOKAHEAD:
|
||||||
case META_LOOKAHEADNOT:
|
case META_LOOKAHEADNOT:
|
||||||
pptr = parsed_skip(pptr, PSKIP_KET);
|
pptr = parsed_skip(pptr + 1, PSKIP_KET);
|
||||||
if (pptr == NULL) goto PARSED_SKIP_FAILED;
|
if (pptr == NULL) goto PARSED_SKIP_FAILED;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -8496,15 +8504,24 @@ for (;; pptr++)
|
||||||
else if (*gptr == (META_CAPTURE | group)) break;
|
else if (*gptr == (META_CAPTURE | group)) break;
|
||||||
}
|
}
|
||||||
|
|
||||||
gptrend = parsed_skip(gptr, PSKIP_KET);
|
/* We must start the search for the end of the group at the first meta code
|
||||||
|
inside the group. Otherwise it will be treated as an enclosed group. */
|
||||||
|
|
||||||
|
gptrend = parsed_skip(gptr + 1, PSKIP_KET);
|
||||||
if (gptrend == NULL) goto PARSED_SKIP_FAILED;
|
if (gptrend == NULL) goto PARSED_SKIP_FAILED;
|
||||||
if (pptr > gptr && pptr < gptrend) goto ISNOTFIXED; /* Local recursion */
|
if (pptr > gptr && pptr < gptrend) goto ISNOTFIXED; /* Local recursion */
|
||||||
for (r = recurses; r != NULL; r = r->prev) if (r->groupptr == gptr) break;
|
for (r = recurses; r != NULL; r = r->prev) if (r->groupptr == gptr) break;
|
||||||
if (r != NULL) goto ISNOTFIXED; /* Mutual recursion */
|
if (r != NULL) goto ISNOTFIXED; /* Mutual recursion */
|
||||||
this_recurse.prev = recurses;
|
this_recurse.prev = recurses;
|
||||||
this_recurse.groupptr = gptr;
|
this_recurse.groupptr = gptr;
|
||||||
|
|
||||||
|
/* We do not need to know the position of the end of the group, that is,
|
||||||
|
gptr is not used after the call to get_grouplength(). Setting the second
|
||||||
|
argument FALSE stops it scanning for the end when the length can be found
|
||||||
|
in the cache. */
|
||||||
|
|
||||||
gptr++;
|
gptr++;
|
||||||
grouplength = get_grouplength(&gptr, errcodeptr, lcptr, group,
|
grouplength = get_grouplength(&gptr, FALSE, errcodeptr, lcptr, group,
|
||||||
&this_recurse, cb);
|
&this_recurse, cb);
|
||||||
if (grouplength < 0)
|
if (grouplength < 0)
|
||||||
{
|
{
|
||||||
|
@ -8541,7 +8558,8 @@ for (;; pptr++)
|
||||||
case META_NOCAPTURE:
|
case META_NOCAPTURE:
|
||||||
pptr++;
|
pptr++;
|
||||||
CHECK_GROUP:
|
CHECK_GROUP:
|
||||||
grouplength = get_grouplength(&pptr, errcodeptr, lcptr, group, recurses, cb);
|
grouplength = get_grouplength(&pptr, TRUE, errcodeptr, lcptr, group,
|
||||||
|
recurses, cb);
|
||||||
if (grouplength < 0) return -1;
|
if (grouplength < 0) return -1;
|
||||||
itemlength = grouplength;
|
itemlength = grouplength;
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -5015,4 +5015,6 @@ a)"xI
|
||||||
/\[(a)]{60}/expand
|
/\[(a)]{60}/expand
|
||||||
aaaa
|
aaaa
|
||||||
|
|
||||||
|
/(?<!\1((?U)1((?U))))(*F)/never_backslash_c,alt_bsux,anchored,extended
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
|
@ -15568,6 +15568,8 @@ Subject length lower bound = 11
|
||||||
aaaa
|
aaaa
|
||||||
No match
|
No match
|
||||||
|
|
||||||
|
/(?<!\1((?U)1((?U))))(*F)/never_backslash_c,alt_bsux,anchored,extended
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
Error -63: PCRE2_ERROR_BADDATA (unknown error number)
|
Error -63: PCRE2_ERROR_BADDATA (unknown error number)
|
||||||
Error -62: bad serialized data
|
Error -62: bad serialized data
|
||||||
|
|
Loading…
Reference in New Issue