Fix crash for pattern with very many captures. Fixes oss-fuzz issue 783.
This commit is contained in:
parent
143c136ac6
commit
26e92bc554
|
@ -24,11 +24,17 @@ released code, but are noted here for the record.
|
|||
a match, because the external block was being set from non-existent
|
||||
internal ovector fields. Fixes oss-fuzz issue 781.
|
||||
|
||||
(b) A pattern with very many capturing parentheses (when the internal frame
|
||||
size was greater than the initial frame vector on the stack) caused a
|
||||
crash. A vector on the heap is now set up at the start of matching if the
|
||||
vector on the stack is not big enough to handle at least 10 frames.
|
||||
Fixes oss-fuzz issue 783.
|
||||
|
||||
2. Hardened pcre2test so as to reduce the number of bugs reported by fuzzers:
|
||||
|
||||
(a) Check for malloc failures when getting memory for the ovector (POSIX) or
|
||||
the match data block (non-POSIX).
|
||||
|
||||
|
||||
3. In the 32-bit library in non-UTF mode, an attempt to find a Unicode property
|
||||
for a character with a code point greater than 0x10ffff (the Unicode maximum)
|
||||
caused a crash.
|
||||
|
|
|
@ -816,9 +816,9 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
|
||||
ovector[0] = Fstart_match - mb->start_subject;
|
||||
ovector[1] = Feptr - mb->start_subject;
|
||||
|
||||
|
||||
/* Set i to the smaller of the sizes of the external and frame ovectors. */
|
||||
|
||||
|
||||
i = 2 * ((top_bracket + 1 > oveccount)? oveccount : top_bracket + 1);
|
||||
memcpy(ovector + 2, Fovector, (i - 2) * sizeof(PCRE2_SIZE));
|
||||
while (--i >= Foffset_top + 2) ovector[i] = PCRE2_UNSET;
|
||||
|
@ -5231,7 +5231,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
/* The variable Flength will be added to Fecode when the condition is
|
||||
false, to get to the second branch. Setting it to the offset to the ALT or
|
||||
KET, then incrementing Fecode achieves this effect. However, if the second
|
||||
branch is non-existent, we must point to the KET so that the end of the
|
||||
branch is non-existent, we must point to the KET so that the end of the
|
||||
group is correctly processed. We now have Fecode pointing to the condition
|
||||
or callout. */
|
||||
|
||||
|
@ -5478,8 +5478,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
|
||||
/* If we are at the end of an assertion that is a condition, return a
|
||||
match, discarding any intermediate backtracking points. Copy back the
|
||||
captures into the frame before N so that they are set on return. Doing
|
||||
this for all assertions, both positive and negative, seems to match what
|
||||
captures into the frame before N so that they are set on return. Doing
|
||||
this for all assertions, both positive and negative, seems to match what
|
||||
Perl does. */
|
||||
|
||||
if (GF_IDMASK(N->group_frame_type) == GF_CONDASSERT)
|
||||
|
@ -5545,7 +5545,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
case OP_SCBRA:
|
||||
case OP_SCBRAPOS:
|
||||
number = GET2(bracode, 1+LINK_SIZE);
|
||||
|
||||
|
||||
/* Handle a recursively called group. We reinstate the previous set of
|
||||
captures and then carry on. */
|
||||
|
||||
|
@ -6197,45 +6197,6 @@ mb->name_count = re->name_count;
|
|||
mb->name_entry_size = re->name_entry_size;
|
||||
mb->start_code = mb->name_table + re->name_count * re->name_entry_size;
|
||||
|
||||
/* The backtracking frames have fixed data at the front, and a PCRE2_SIZE
|
||||
vector at the end, whose size depends on the number of capturing parentheses in
|
||||
the pattern. It is not used at all if there are no capturing parentheses.
|
||||
|
||||
frame_size is the total size of each frame
|
||||
mb->frame_vector_size is the total usable size of the vector (rounded down
|
||||
to a whole number of frames)
|
||||
|
||||
The last of these may be changed if the frame vector has to be expanded. We
|
||||
therefore put it into the match block so that it is correct when calling
|
||||
match() more than once for non-anchored patterns. */
|
||||
|
||||
frame_size = sizeof(heapframe) + ((re->top_bracket - 1) * 2 * sizeof(PCRE2_SIZE));
|
||||
mb->frame_vector_size = ((START_FRAMES_SIZE/frame_size) * frame_size);
|
||||
|
||||
/* Set up the initial frame set. Write to the ovector within the first frame to
|
||||
mark every capture unset and to avoid uninitialized memory read errors when it
|
||||
is copied to a new frame. */
|
||||
|
||||
memset((char *)(mb->stack_frames) + offsetof(heapframe,ovector), 0xff,
|
||||
re->top_bracket * 2 * sizeof(PCRE2_SIZE));
|
||||
mb->match_frames = mb->stack_frames;
|
||||
mb->match_frames_top =
|
||||
(heapframe *)((char *)mb->match_frames + mb->frame_vector_size);
|
||||
|
||||
/* Limits set in the pattern override the match context only if they are
|
||||
smaller. */
|
||||
|
||||
mb->match_limit = (mcontext->match_limit < re->limit_match)?
|
||||
mcontext->match_limit : re->limit_match;
|
||||
mb->match_limit_recursion = (mcontext->recursion_limit < re->limit_recursion)?
|
||||
mcontext->recursion_limit : re->limit_recursion;
|
||||
|
||||
/* Pointers to the individual character tables */
|
||||
|
||||
mb->lcc = re->tables + lcc_offset;
|
||||
mb->fcc = re->tables + fcc_offset;
|
||||
mb->ctypes = re->tables + ctypes_offset;
|
||||
|
||||
/* Process the \R and newline settings. */
|
||||
|
||||
mb->bsr_convention = re->bsr_convention;
|
||||
|
@ -6269,6 +6230,60 @@ switch(re->newline_convention)
|
|||
default: return PCRE2_ERROR_INTERNAL;
|
||||
}
|
||||
|
||||
/* The backtracking frames have fixed data at the front, and a PCRE2_SIZE
|
||||
vector at the end, whose size depends on the number of capturing parentheses in
|
||||
the pattern. It is not used at all if there are no capturing parentheses.
|
||||
|
||||
frame_size is the total size of each frame
|
||||
mb->frame_vector_size is the total usable size of the vector (rounded down
|
||||
to a whole number of frames)
|
||||
|
||||
The last of these is changed within the match() function if the frame vector
|
||||
has to be expanded. We therefore put it into the match block so that it is
|
||||
correct when calling match() more than once for non-anchored patterns. */
|
||||
|
||||
frame_size = sizeof(heapframe) + ((re->top_bracket - 1) * 2 * sizeof(PCRE2_SIZE));
|
||||
|
||||
/* If a pattern has very many capturing parentheses, the frame size may be very
|
||||
large. Ensure that there are at least 10 available frames by getting an initial
|
||||
vector on the heap if necessary. */
|
||||
|
||||
if (frame_size <= START_FRAMES_SIZE/10)
|
||||
{
|
||||
mb->match_frames = mb->stack_frames; /* Initial frame vector on the stack */
|
||||
mb->frame_vector_size = ((START_FRAMES_SIZE/frame_size) * frame_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
mb->frame_vector_size = frame_size * 10;
|
||||
mb->match_frames = mb->memctl.malloc(mb->frame_vector_size,
|
||||
mb->memctl.memory_data);
|
||||
if (mb->match_frames == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
}
|
||||
|
||||
mb->match_frames_top =
|
||||
(heapframe *)((char *)mb->match_frames + mb->frame_vector_size);
|
||||
|
||||
/* Write to the ovector within the first frame to mark every capture unset and
|
||||
to avoid uninitialized memory read errors when it is copied to a new frame. */
|
||||
|
||||
memset((char *)(mb->match_frames) + offsetof(heapframe, ovector), 0xff,
|
||||
re->top_bracket * 2 * sizeof(PCRE2_SIZE));
|
||||
|
||||
/* Limits set in the pattern override the match context only if they are
|
||||
smaller. */
|
||||
|
||||
mb->match_limit = (mcontext->match_limit < re->limit_match)?
|
||||
mcontext->match_limit : re->limit_match;
|
||||
mb->match_limit_recursion = (mcontext->recursion_limit < re->limit_recursion)?
|
||||
mcontext->recursion_limit : re->limit_recursion;
|
||||
|
||||
/* Pointers to the individual character tables */
|
||||
|
||||
mb->lcc = re->tables + lcc_offset;
|
||||
mb->fcc = re->tables + fcc_offset;
|
||||
mb->ctypes = re->tables + ctypes_offset;
|
||||
|
||||
/* Set up the first code unit to match, if available. The first_codeunit value
|
||||
is never set for an anchored regular expression, but the anchoring may be
|
||||
forced at run time, so we have to test for anchoring. The first code unit may
|
||||
|
|
|
@ -5009,4 +5009,10 @@ a)"xI
|
|||
'(?:a(*:aa))b|ac' mark
|
||||
ac
|
||||
|
||||
/(R?){65}/
|
||||
(R?){65}
|
||||
|
||||
/\[(a)]{60}/expand
|
||||
aaaa
|
||||
|
||||
# End of testinput2
|
||||
|
|
|
@ -15559,6 +15559,15 @@ Subject length lower bound = 11
|
|||
ac
|
||||
0: ac
|
||||
|
||||
/(R?){65}/
|
||||
(R?){65}
|
||||
0:
|
||||
1:
|
||||
|
||||
/\[(a)]{60}/expand
|
||||
aaaa
|
||||
No match
|
||||
|
||||
# End of testinput2
|
||||
Error -63: PCRE2_ERROR_BADDATA (unknown error number)
|
||||
Error -62: bad serialized data
|
||||
|
|
Loading…
Reference in New Issue