Fix crash for pattern with very many captures. Fixes oss-fuzz issue 783.
This commit is contained in:
parent
143c136ac6
commit
26e92bc554
|
@ -24,6 +24,12 @@ released code, but are noted here for the record.
|
||||||
a match, because the external block was being set from non-existent
|
a match, because the external block was being set from non-existent
|
||||||
internal ovector fields. Fixes oss-fuzz issue 781.
|
internal ovector fields. Fixes oss-fuzz issue 781.
|
||||||
|
|
||||||
|
(b) A pattern with very many capturing parentheses (when the internal frame
|
||||||
|
size was greater than the initial frame vector on the stack) caused a
|
||||||
|
crash. A vector on the heap is now set up at the start of matching if the
|
||||||
|
vector on the stack is not big enough to handle at least 10 frames.
|
||||||
|
Fixes oss-fuzz issue 783.
|
||||||
|
|
||||||
2. Hardened pcre2test so as to reduce the number of bugs reported by fuzzers:
|
2. Hardened pcre2test so as to reduce the number of bugs reported by fuzzers:
|
||||||
|
|
||||||
(a) Check for malloc failures when getting memory for the ovector (POSIX) or
|
(a) Check for malloc failures when getting memory for the ovector (POSIX) or
|
||||||
|
|
|
@ -6197,45 +6197,6 @@ mb->name_count = re->name_count;
|
||||||
mb->name_entry_size = re->name_entry_size;
|
mb->name_entry_size = re->name_entry_size;
|
||||||
mb->start_code = mb->name_table + re->name_count * re->name_entry_size;
|
mb->start_code = mb->name_table + re->name_count * re->name_entry_size;
|
||||||
|
|
||||||
/* The backtracking frames have fixed data at the front, and a PCRE2_SIZE
|
|
||||||
vector at the end, whose size depends on the number of capturing parentheses in
|
|
||||||
the pattern. It is not used at all if there are no capturing parentheses.
|
|
||||||
|
|
||||||
frame_size is the total size of each frame
|
|
||||||
mb->frame_vector_size is the total usable size of the vector (rounded down
|
|
||||||
to a whole number of frames)
|
|
||||||
|
|
||||||
The last of these may be changed if the frame vector has to be expanded. We
|
|
||||||
therefore put it into the match block so that it is correct when calling
|
|
||||||
match() more than once for non-anchored patterns. */
|
|
||||||
|
|
||||||
frame_size = sizeof(heapframe) + ((re->top_bracket - 1) * 2 * sizeof(PCRE2_SIZE));
|
|
||||||
mb->frame_vector_size = ((START_FRAMES_SIZE/frame_size) * frame_size);
|
|
||||||
|
|
||||||
/* Set up the initial frame set. Write to the ovector within the first frame to
|
|
||||||
mark every capture unset and to avoid uninitialized memory read errors when it
|
|
||||||
is copied to a new frame. */
|
|
||||||
|
|
||||||
memset((char *)(mb->stack_frames) + offsetof(heapframe,ovector), 0xff,
|
|
||||||
re->top_bracket * 2 * sizeof(PCRE2_SIZE));
|
|
||||||
mb->match_frames = mb->stack_frames;
|
|
||||||
mb->match_frames_top =
|
|
||||||
(heapframe *)((char *)mb->match_frames + mb->frame_vector_size);
|
|
||||||
|
|
||||||
/* Limits set in the pattern override the match context only if they are
|
|
||||||
smaller. */
|
|
||||||
|
|
||||||
mb->match_limit = (mcontext->match_limit < re->limit_match)?
|
|
||||||
mcontext->match_limit : re->limit_match;
|
|
||||||
mb->match_limit_recursion = (mcontext->recursion_limit < re->limit_recursion)?
|
|
||||||
mcontext->recursion_limit : re->limit_recursion;
|
|
||||||
|
|
||||||
/* Pointers to the individual character tables */
|
|
||||||
|
|
||||||
mb->lcc = re->tables + lcc_offset;
|
|
||||||
mb->fcc = re->tables + fcc_offset;
|
|
||||||
mb->ctypes = re->tables + ctypes_offset;
|
|
||||||
|
|
||||||
/* Process the \R and newline settings. */
|
/* Process the \R and newline settings. */
|
||||||
|
|
||||||
mb->bsr_convention = re->bsr_convention;
|
mb->bsr_convention = re->bsr_convention;
|
||||||
|
@ -6269,6 +6230,60 @@ switch(re->newline_convention)
|
||||||
default: return PCRE2_ERROR_INTERNAL;
|
default: return PCRE2_ERROR_INTERNAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* The backtracking frames have fixed data at the front, and a PCRE2_SIZE
|
||||||
|
vector at the end, whose size depends on the number of capturing parentheses in
|
||||||
|
the pattern. It is not used at all if there are no capturing parentheses.
|
||||||
|
|
||||||
|
frame_size is the total size of each frame
|
||||||
|
mb->frame_vector_size is the total usable size of the vector (rounded down
|
||||||
|
to a whole number of frames)
|
||||||
|
|
||||||
|
The last of these is changed within the match() function if the frame vector
|
||||||
|
has to be expanded. We therefore put it into the match block so that it is
|
||||||
|
correct when calling match() more than once for non-anchored patterns. */
|
||||||
|
|
||||||
|
frame_size = sizeof(heapframe) + ((re->top_bracket - 1) * 2 * sizeof(PCRE2_SIZE));
|
||||||
|
|
||||||
|
/* If a pattern has very many capturing parentheses, the frame size may be very
|
||||||
|
large. Ensure that there are at least 10 available frames by getting an initial
|
||||||
|
vector on the heap if necessary. */
|
||||||
|
|
||||||
|
if (frame_size <= START_FRAMES_SIZE/10)
|
||||||
|
{
|
||||||
|
mb->match_frames = mb->stack_frames; /* Initial frame vector on the stack */
|
||||||
|
mb->frame_vector_size = ((START_FRAMES_SIZE/frame_size) * frame_size);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
mb->frame_vector_size = frame_size * 10;
|
||||||
|
mb->match_frames = mb->memctl.malloc(mb->frame_vector_size,
|
||||||
|
mb->memctl.memory_data);
|
||||||
|
if (mb->match_frames == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||||
|
}
|
||||||
|
|
||||||
|
mb->match_frames_top =
|
||||||
|
(heapframe *)((char *)mb->match_frames + mb->frame_vector_size);
|
||||||
|
|
||||||
|
/* Write to the ovector within the first frame to mark every capture unset and
|
||||||
|
to avoid uninitialized memory read errors when it is copied to a new frame. */
|
||||||
|
|
||||||
|
memset((char *)(mb->match_frames) + offsetof(heapframe, ovector), 0xff,
|
||||||
|
re->top_bracket * 2 * sizeof(PCRE2_SIZE));
|
||||||
|
|
||||||
|
/* Limits set in the pattern override the match context only if they are
|
||||||
|
smaller. */
|
||||||
|
|
||||||
|
mb->match_limit = (mcontext->match_limit < re->limit_match)?
|
||||||
|
mcontext->match_limit : re->limit_match;
|
||||||
|
mb->match_limit_recursion = (mcontext->recursion_limit < re->limit_recursion)?
|
||||||
|
mcontext->recursion_limit : re->limit_recursion;
|
||||||
|
|
||||||
|
/* Pointers to the individual character tables */
|
||||||
|
|
||||||
|
mb->lcc = re->tables + lcc_offset;
|
||||||
|
mb->fcc = re->tables + fcc_offset;
|
||||||
|
mb->ctypes = re->tables + ctypes_offset;
|
||||||
|
|
||||||
/* Set up the first code unit to match, if available. The first_codeunit value
|
/* Set up the first code unit to match, if available. The first_codeunit value
|
||||||
is never set for an anchored regular expression, but the anchoring may be
|
is never set for an anchored regular expression, but the anchoring may be
|
||||||
forced at run time, so we have to test for anchoring. The first code unit may
|
forced at run time, so we have to test for anchoring. The first code unit may
|
||||||
|
|
|
@ -5009,4 +5009,10 @@ a)"xI
|
||||||
'(?:a(*:aa))b|ac' mark
|
'(?:a(*:aa))b|ac' mark
|
||||||
ac
|
ac
|
||||||
|
|
||||||
|
/(R?){65}/
|
||||||
|
(R?){65}
|
||||||
|
|
||||||
|
/\[(a)]{60}/expand
|
||||||
|
aaaa
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
|
@ -15559,6 +15559,15 @@ Subject length lower bound = 11
|
||||||
ac
|
ac
|
||||||
0: ac
|
0: ac
|
||||||
|
|
||||||
|
/(R?){65}/
|
||||||
|
(R?){65}
|
||||||
|
0:
|
||||||
|
1:
|
||||||
|
|
||||||
|
/\[(a)]{60}/expand
|
||||||
|
aaaa
|
||||||
|
No match
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
Error -63: PCRE2_ERROR_BADDATA (unknown error number)
|
Error -63: PCRE2_ERROR_BADDATA (unknown error number)
|
||||||
Error -62: bad serialized data
|
Error -62: bad serialized data
|
||||||
|
|
Loading…
Reference in New Issue