Fix crash for pattern with very many captures. Fixes oss-fuzz issue 783.
This commit is contained in:
parent
143c136ac6
commit
26e92bc554
|
@ -24,11 +24,17 @@ released code, but are noted here for the record.
|
||||||
a match, because the external block was being set from non-existent
|
a match, because the external block was being set from non-existent
|
||||||
internal ovector fields. Fixes oss-fuzz issue 781.
|
internal ovector fields. Fixes oss-fuzz issue 781.
|
||||||
|
|
||||||
|
(b) A pattern with very many capturing parentheses (when the internal frame
|
||||||
|
size was greater than the initial frame vector on the stack) caused a
|
||||||
|
crash. A vector on the heap is now set up at the start of matching if the
|
||||||
|
vector on the stack is not big enough to handle at least 10 frames.
|
||||||
|
Fixes oss-fuzz issue 783.
|
||||||
|
|
||||||
2. Hardened pcre2test so as to reduce the number of bugs reported by fuzzers:
|
2. Hardened pcre2test so as to reduce the number of bugs reported by fuzzers:
|
||||||
|
|
||||||
(a) Check for malloc failures when getting memory for the ovector (POSIX) or
|
(a) Check for malloc failures when getting memory for the ovector (POSIX) or
|
||||||
the match data block (non-POSIX).
|
the match data block (non-POSIX).
|
||||||
|
|
||||||
3. In the 32-bit library in non-UTF mode, an attempt to find a Unicode property
|
3. In the 32-bit library in non-UTF mode, an attempt to find a Unicode property
|
||||||
for a character with a code point greater than 0x10ffff (the Unicode maximum)
|
for a character with a code point greater than 0x10ffff (the Unicode maximum)
|
||||||
caused a crash.
|
caused a crash.
|
||||||
|
|
|
@ -816,9 +816,9 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||||
|
|
||||||
ovector[0] = Fstart_match - mb->start_subject;
|
ovector[0] = Fstart_match - mb->start_subject;
|
||||||
ovector[1] = Feptr - mb->start_subject;
|
ovector[1] = Feptr - mb->start_subject;
|
||||||
|
|
||||||
/* Set i to the smaller of the sizes of the external and frame ovectors. */
|
/* Set i to the smaller of the sizes of the external and frame ovectors. */
|
||||||
|
|
||||||
i = 2 * ((top_bracket + 1 > oveccount)? oveccount : top_bracket + 1);
|
i = 2 * ((top_bracket + 1 > oveccount)? oveccount : top_bracket + 1);
|
||||||
memcpy(ovector + 2, Fovector, (i - 2) * sizeof(PCRE2_SIZE));
|
memcpy(ovector + 2, Fovector, (i - 2) * sizeof(PCRE2_SIZE));
|
||||||
while (--i >= Foffset_top + 2) ovector[i] = PCRE2_UNSET;
|
while (--i >= Foffset_top + 2) ovector[i] = PCRE2_UNSET;
|
||||||
|
@ -5231,7 +5231,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||||
/* The variable Flength will be added to Fecode when the condition is
|
/* The variable Flength will be added to Fecode when the condition is
|
||||||
false, to get to the second branch. Setting it to the offset to the ALT or
|
false, to get to the second branch. Setting it to the offset to the ALT or
|
||||||
KET, then incrementing Fecode achieves this effect. However, if the second
|
KET, then incrementing Fecode achieves this effect. However, if the second
|
||||||
branch is non-existent, we must point to the KET so that the end of the
|
branch is non-existent, we must point to the KET so that the end of the
|
||||||
group is correctly processed. We now have Fecode pointing to the condition
|
group is correctly processed. We now have Fecode pointing to the condition
|
||||||
or callout. */
|
or callout. */
|
||||||
|
|
||||||
|
@ -5478,8 +5478,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||||
|
|
||||||
/* If we are at the end of an assertion that is a condition, return a
|
/* If we are at the end of an assertion that is a condition, return a
|
||||||
match, discarding any intermediate backtracking points. Copy back the
|
match, discarding any intermediate backtracking points. Copy back the
|
||||||
captures into the frame before N so that they are set on return. Doing
|
captures into the frame before N so that they are set on return. Doing
|
||||||
this for all assertions, both positive and negative, seems to match what
|
this for all assertions, both positive and negative, seems to match what
|
||||||
Perl does. */
|
Perl does. */
|
||||||
|
|
||||||
if (GF_IDMASK(N->group_frame_type) == GF_CONDASSERT)
|
if (GF_IDMASK(N->group_frame_type) == GF_CONDASSERT)
|
||||||
|
@ -5545,7 +5545,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||||
case OP_SCBRA:
|
case OP_SCBRA:
|
||||||
case OP_SCBRAPOS:
|
case OP_SCBRAPOS:
|
||||||
number = GET2(bracode, 1+LINK_SIZE);
|
number = GET2(bracode, 1+LINK_SIZE);
|
||||||
|
|
||||||
/* Handle a recursively called group. We reinstate the previous set of
|
/* Handle a recursively called group. We reinstate the previous set of
|
||||||
captures and then carry on. */
|
captures and then carry on. */
|
||||||
|
|
||||||
|
@ -6197,45 +6197,6 @@ mb->name_count = re->name_count;
|
||||||
mb->name_entry_size = re->name_entry_size;
|
mb->name_entry_size = re->name_entry_size;
|
||||||
mb->start_code = mb->name_table + re->name_count * re->name_entry_size;
|
mb->start_code = mb->name_table + re->name_count * re->name_entry_size;
|
||||||
|
|
||||||
/* The backtracking frames have fixed data at the front, and a PCRE2_SIZE
|
|
||||||
vector at the end, whose size depends on the number of capturing parentheses in
|
|
||||||
the pattern. It is not used at all if there are no capturing parentheses.
|
|
||||||
|
|
||||||
frame_size is the total size of each frame
|
|
||||||
mb->frame_vector_size is the total usable size of the vector (rounded down
|
|
||||||
to a whole number of frames)
|
|
||||||
|
|
||||||
The last of these may be changed if the frame vector has to be expanded. We
|
|
||||||
therefore put it into the match block so that it is correct when calling
|
|
||||||
match() more than once for non-anchored patterns. */
|
|
||||||
|
|
||||||
frame_size = sizeof(heapframe) + ((re->top_bracket - 1) * 2 * sizeof(PCRE2_SIZE));
|
|
||||||
mb->frame_vector_size = ((START_FRAMES_SIZE/frame_size) * frame_size);
|
|
||||||
|
|
||||||
/* Set up the initial frame set. Write to the ovector within the first frame to
|
|
||||||
mark every capture unset and to avoid uninitialized memory read errors when it
|
|
||||||
is copied to a new frame. */
|
|
||||||
|
|
||||||
memset((char *)(mb->stack_frames) + offsetof(heapframe,ovector), 0xff,
|
|
||||||
re->top_bracket * 2 * sizeof(PCRE2_SIZE));
|
|
||||||
mb->match_frames = mb->stack_frames;
|
|
||||||
mb->match_frames_top =
|
|
||||||
(heapframe *)((char *)mb->match_frames + mb->frame_vector_size);
|
|
||||||
|
|
||||||
/* Limits set in the pattern override the match context only if they are
|
|
||||||
smaller. */
|
|
||||||
|
|
||||||
mb->match_limit = (mcontext->match_limit < re->limit_match)?
|
|
||||||
mcontext->match_limit : re->limit_match;
|
|
||||||
mb->match_limit_recursion = (mcontext->recursion_limit < re->limit_recursion)?
|
|
||||||
mcontext->recursion_limit : re->limit_recursion;
|
|
||||||
|
|
||||||
/* Pointers to the individual character tables */
|
|
||||||
|
|
||||||
mb->lcc = re->tables + lcc_offset;
|
|
||||||
mb->fcc = re->tables + fcc_offset;
|
|
||||||
mb->ctypes = re->tables + ctypes_offset;
|
|
||||||
|
|
||||||
/* Process the \R and newline settings. */
|
/* Process the \R and newline settings. */
|
||||||
|
|
||||||
mb->bsr_convention = re->bsr_convention;
|
mb->bsr_convention = re->bsr_convention;
|
||||||
|
@ -6269,6 +6230,60 @@ switch(re->newline_convention)
|
||||||
default: return PCRE2_ERROR_INTERNAL;
|
default: return PCRE2_ERROR_INTERNAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* The backtracking frames have fixed data at the front, and a PCRE2_SIZE
|
||||||
|
vector at the end, whose size depends on the number of capturing parentheses in
|
||||||
|
the pattern. It is not used at all if there are no capturing parentheses.
|
||||||
|
|
||||||
|
frame_size is the total size of each frame
|
||||||
|
mb->frame_vector_size is the total usable size of the vector (rounded down
|
||||||
|
to a whole number of frames)
|
||||||
|
|
||||||
|
The last of these is changed within the match() function if the frame vector
|
||||||
|
has to be expanded. We therefore put it into the match block so that it is
|
||||||
|
correct when calling match() more than once for non-anchored patterns. */
|
||||||
|
|
||||||
|
frame_size = sizeof(heapframe) + ((re->top_bracket - 1) * 2 * sizeof(PCRE2_SIZE));
|
||||||
|
|
||||||
|
/* If a pattern has very many capturing parentheses, the frame size may be very
|
||||||
|
large. Ensure that there are at least 10 available frames by getting an initial
|
||||||
|
vector on the heap if necessary. */
|
||||||
|
|
||||||
|
if (frame_size <= START_FRAMES_SIZE/10)
|
||||||
|
{
|
||||||
|
mb->match_frames = mb->stack_frames; /* Initial frame vector on the stack */
|
||||||
|
mb->frame_vector_size = ((START_FRAMES_SIZE/frame_size) * frame_size);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
mb->frame_vector_size = frame_size * 10;
|
||||||
|
mb->match_frames = mb->memctl.malloc(mb->frame_vector_size,
|
||||||
|
mb->memctl.memory_data);
|
||||||
|
if (mb->match_frames == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||||
|
}
|
||||||
|
|
||||||
|
mb->match_frames_top =
|
||||||
|
(heapframe *)((char *)mb->match_frames + mb->frame_vector_size);
|
||||||
|
|
||||||
|
/* Write to the ovector within the first frame to mark every capture unset and
|
||||||
|
to avoid uninitialized memory read errors when it is copied to a new frame. */
|
||||||
|
|
||||||
|
memset((char *)(mb->match_frames) + offsetof(heapframe, ovector), 0xff,
|
||||||
|
re->top_bracket * 2 * sizeof(PCRE2_SIZE));
|
||||||
|
|
||||||
|
/* Limits set in the pattern override the match context only if they are
|
||||||
|
smaller. */
|
||||||
|
|
||||||
|
mb->match_limit = (mcontext->match_limit < re->limit_match)?
|
||||||
|
mcontext->match_limit : re->limit_match;
|
||||||
|
mb->match_limit_recursion = (mcontext->recursion_limit < re->limit_recursion)?
|
||||||
|
mcontext->recursion_limit : re->limit_recursion;
|
||||||
|
|
||||||
|
/* Pointers to the individual character tables */
|
||||||
|
|
||||||
|
mb->lcc = re->tables + lcc_offset;
|
||||||
|
mb->fcc = re->tables + fcc_offset;
|
||||||
|
mb->ctypes = re->tables + ctypes_offset;
|
||||||
|
|
||||||
/* Set up the first code unit to match, if available. The first_codeunit value
|
/* Set up the first code unit to match, if available. The first_codeunit value
|
||||||
is never set for an anchored regular expression, but the anchoring may be
|
is never set for an anchored regular expression, but the anchoring may be
|
||||||
forced at run time, so we have to test for anchoring. The first code unit may
|
forced at run time, so we have to test for anchoring. The first code unit may
|
||||||
|
|
|
@ -5009,4 +5009,10 @@ a)"xI
|
||||||
'(?:a(*:aa))b|ac' mark
|
'(?:a(*:aa))b|ac' mark
|
||||||
ac
|
ac
|
||||||
|
|
||||||
|
/(R?){65}/
|
||||||
|
(R?){65}
|
||||||
|
|
||||||
|
/\[(a)]{60}/expand
|
||||||
|
aaaa
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
|
@ -15559,6 +15559,15 @@ Subject length lower bound = 11
|
||||||
ac
|
ac
|
||||||
0: ac
|
0: ac
|
||||||
|
|
||||||
|
/(R?){65}/
|
||||||
|
(R?){65}
|
||||||
|
0:
|
||||||
|
1:
|
||||||
|
|
||||||
|
/\[(a)]{60}/expand
|
||||||
|
aaaa
|
||||||
|
No match
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
Error -63: PCRE2_ERROR_BADDATA (unknown error number)
|
Error -63: PCRE2_ERROR_BADDATA (unknown error number)
|
||||||
Error -62: bad serialized data
|
Error -62: bad serialized data
|
||||||
|
|
Loading…
Reference in New Issue