Fix crash for forward reference in lookbehind with PCRE2_ANCHORED. Fixes

oss-fuzz issue 865.
This commit is contained in:
Philip.Hazel 2017-03-16 17:17:47 +00:00
parent ee1f7900e9
commit d7da2dcb37
4 changed files with 45 additions and 17 deletions

View File

@ -46,6 +46,12 @@ available for backwards compatibility.
for a character with a code point greater than 0x10ffff (the Unicode maximum) for a character with a code point greater than 0x10ffff (the Unicode maximum)
caused a crash. caused a crash.
5. If a lookbehind assertion that contained a back reference to a group
appearing later in the pattern was compiled with the PCRE2_ANCHORED option,
undefined actions (often a segmentation fault) could occur, depending on what
other options were set. An example assertion is (?<!\1(abc)) where the
reference \1 precedes the group (abc). This fixes oss-fuzz issue 865.
Version 10.23 14-February-2017 Version 10.23 14-February-2017
------------------------------ ------------------------------

View File

@ -8090,6 +8090,10 @@ the end of the branch, it is called to skip over an internal lookaround, and it
is also called to skip to the end of a class, during which it will never is also called to skip to the end of a class, during which it will never
encounter nested groups (but there's no need to have special code for that). encounter nested groups (but there's no need to have special code for that).
When called to find the end of a branch or group, pptr must point to the first
meta code inside the branch, not the branch-starting code. In other cases it
can point to the item that causes the function to be called.
Arguments: Arguments:
pptr current pointer to skip from pptr current pointer to skip from
skiptype PSKIP_CLASS when skipping to end of class skiptype PSKIP_CLASS when skipping to end of class
@ -8106,7 +8110,7 @@ parsed_skip(uint32_t *pptr, uint32_t skiptype)
{ {
uint32_t nestlevel = 0; uint32_t nestlevel = 0;
for (pptr += 1;; pptr++) for (;; pptr++)
{ {
uint32_t meta = META_CODE(*pptr); uint32_t meta = META_CODE(*pptr);
@ -8201,11 +8205,12 @@ return pptr;
/* This is called for nested groups within a branch of a lookbehind whose /* This is called for nested groups within a branch of a lookbehind whose
length is being computed. If all the branches in the nested group have the same length is being computed. If all the branches in the nested group have the same
length, that is OK. On entry, the pointer must be at the first element after length, that is OK. On entry, the pointer must be at the first element after
the group initializing code. Caching is used to improve processing speed when the group initializing code. On exit it points to OP_KET. Caching is used to
the same capturing group occurs many times. improve processing speed when the same capturing group occurs many times.
Arguments: Arguments:
pptrptr pointer to pointer in the parsed pattern pptrptr pointer to pointer in the parsed pattern
isinline FALSE if a reference or recursion; TRUE for inline group
errcodeptr pointer to the errorcode errcodeptr pointer to the errorcode
lcptr pointer to the loop counter lcptr pointer to the loop counter
group number of captured group or -1 for a non-capturing group group number of captured group or -1 for a non-capturing group
@ -8216,27 +8221,29 @@ Returns: the group length or a negative number
*/ */
static int static int
get_grouplength(uint32_t **pptrptr, int *errcodeptr, int *lcptr, get_grouplength(uint32_t **pptrptr, BOOL isinline, int *errcodeptr, int *lcptr,
int group, parsed_recurse_check *recurses, compile_block *cb) int group, parsed_recurse_check *recurses, compile_block *cb)
{ {
int branchlength; int branchlength;
int grouplength = -1; int grouplength = -1;
/* The cache can be used only if there is no possibility of there being two /* The cache can be used only if there is no possibility of there being two
groups with the same number. */ groups with the same number. We do not need to set the end pointer for a group
that is being processed as a back reference or recursion, but we must do so for
an inline group. */
if (group > 0) if (group > 0 && (cb->external_flags & PCRE2_DUPCAPUSED) == 0)
{ {
uint32_t groupinfo = cb->groupinfo[group]; uint32_t groupinfo = cb->groupinfo[group];
if ((cb->external_flags & PCRE2_DUPCAPUSED) == 0) if ((groupinfo & GI_NOT_FIXED_LENGTH) != 0) return -1;
if ((groupinfo & GI_SET_FIXED_LENGTH) != 0)
{ {
if ((groupinfo & GI_NOT_FIXED_LENGTH) != 0) return -1; if (isinline) *pptrptr = parsed_skip(*pptrptr, PSKIP_KET);
if ((groupinfo & GI_SET_FIXED_LENGTH) != 0) return groupinfo & GI_FIXED_LENGTH_MASK;
return groupinfo & GI_FIXED_LENGTH_MASK;
} }
} }
/* Scan the group */ /* Scan the group. In this case we find the end pointer of necessity. */
for(;;) for(;;)
{ {
@ -8394,11 +8401,12 @@ for (;; pptr++)
} }
break; break;
/* Lookaheads can be ignored. */ /* Lookaheads can be ignored, but we must start the skip inside the group
so that it isn't treated as a group within the branch. */
case META_LOOKAHEAD: case META_LOOKAHEAD:
case META_LOOKAHEADNOT: case META_LOOKAHEADNOT:
pptr = parsed_skip(pptr, PSKIP_KET); pptr = parsed_skip(pptr + 1, PSKIP_KET);
if (pptr == NULL) goto PARSED_SKIP_FAILED; if (pptr == NULL) goto PARSED_SKIP_FAILED;
break; break;
@ -8496,15 +8504,24 @@ for (;; pptr++)
else if (*gptr == (META_CAPTURE | group)) break; else if (*gptr == (META_CAPTURE | group)) break;
} }
gptrend = parsed_skip(gptr, PSKIP_KET); /* We must start the search for the end of the group at the first meta code
inside the group. Otherwise it will be treated as an enclosed group. */
gptrend = parsed_skip(gptr + 1, PSKIP_KET);
if (gptrend == NULL) goto PARSED_SKIP_FAILED; if (gptrend == NULL) goto PARSED_SKIP_FAILED;
if (pptr > gptr && pptr < gptrend) goto ISNOTFIXED; /* Local recursion */ if (pptr > gptr && pptr < gptrend) goto ISNOTFIXED; /* Local recursion */
for (r = recurses; r != NULL; r = r->prev) if (r->groupptr == gptr) break; for (r = recurses; r != NULL; r = r->prev) if (r->groupptr == gptr) break;
if (r != NULL) goto ISNOTFIXED; /* Mutual recursion */ if (r != NULL) goto ISNOTFIXED; /* Mutual recursion */
this_recurse.prev = recurses; this_recurse.prev = recurses;
this_recurse.groupptr = gptr; this_recurse.groupptr = gptr;
/* We do not need to know the position of the end of the group, that is,
gptr is not used after the call to get_grouplength(). Setting the second
argument FALSE stops it scanning for the end when the length can be found
in the cache. */
gptr++; gptr++;
grouplength = get_grouplength(&gptr, errcodeptr, lcptr, group, grouplength = get_grouplength(&gptr, FALSE, errcodeptr, lcptr, group,
&this_recurse, cb); &this_recurse, cb);
if (grouplength < 0) if (grouplength < 0)
{ {
@ -8541,7 +8558,8 @@ for (;; pptr++)
case META_NOCAPTURE: case META_NOCAPTURE:
pptr++; pptr++;
CHECK_GROUP: CHECK_GROUP:
grouplength = get_grouplength(&pptr, errcodeptr, lcptr, group, recurses, cb); grouplength = get_grouplength(&pptr, TRUE, errcodeptr, lcptr, group,
recurses, cb);
if (grouplength < 0) return -1; if (grouplength < 0) return -1;
itemlength = grouplength; itemlength = grouplength;
break; break;

2
testdata/testinput2 vendored
View File

@ -5015,4 +5015,6 @@ a)"xI
/\[(a)]{60}/expand /\[(a)]{60}/expand
aaaa aaaa
/(?<!\1((?U)1((?U))))(*F)/never_backslash_c,alt_bsux,anchored,extended
# End of testinput2 # End of testinput2

View File

@ -15568,6 +15568,8 @@ Subject length lower bound = 11
aaaa aaaa
No match No match
/(?<!\1((?U)1((?U))))(*F)/never_backslash_c,alt_bsux,anchored,extended
# End of testinput2 # End of testinput2
Error -63: PCRE2_ERROR_BADDATA (unknown error number) Error -63: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data Error -62: bad serialized data