Fix lookbehind with zero-length branch in DFA matching. Fixes oss-fuzz 1959.

This commit is contained in:
Philip.Hazel 2017-05-27 15:49:29 +00:00
parent 3d80fa4fc2
commit 3f96e07e6b
4 changed files with 53 additions and 26 deletions

View File

@ -171,6 +171,9 @@ one: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES.
34. Implement newline type PCRE2_NEWLINE_NUL. 34. Implement newline type PCRE2_NEWLINE_NUL.
35. A lookbehind assertion that had a zero-length branch caused undefined
behaviour when processed by pcre2_dfa_match(). This is oss-fuzz issue 1859.
Version 10.23 14-February-2017 Version 10.23 14-February-2017
------------------------------ ------------------------------

View File

@ -375,14 +375,10 @@ internal_dfa_match(
{ {
stateblock *active_states, *new_states, *temp_states; stateblock *active_states, *new_states, *temp_states;
stateblock *next_active_state, *next_new_state; stateblock *next_active_state, *next_new_state;
const uint8_t *ctypes, *lcc, *fcc; const uint8_t *ctypes, *lcc, *fcc;
PCRE2_SPTR ptr; PCRE2_SPTR ptr;
PCRE2_SPTR end_code; PCRE2_SPTR end_code;
PCRE2_SPTR first_op;
dfa_recursion_info new_recursive; dfa_recursion_info new_recursive;
int active_count, new_count, match_count; int active_count, new_count, match_count;
/* Some fields in the mb block are frequently referenced, so we load them into /* Some fields in the mb block are frequently referenced, so we load them into
@ -417,21 +413,15 @@ active_states = (stateblock *)(workspace + 2);
next_new_state = new_states = active_states + wscount; next_new_state = new_states = active_states + wscount;
new_count = 0; new_count = 0;
first_op = this_start_code + 1 + LINK_SIZE +
((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
*this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
? IMM2_SIZE:0);
/* The first thing in any (sub) pattern is a bracket of some sort. Push all /* The first thing in any (sub) pattern is a bracket of some sort. Push all
the alternative states onto the list, and find out where the end is. This the alternative states onto the list, and find out where the end is. This
makes is possible to use this function recursively, when we want to stop at a makes is possible to use this function recursively, when we want to stop at a
matching internal ket rather than at the end. matching internal ket rather than at the end.
If the first opcode in the first alternative is OP_REVERSE, we are dealing with If we are dealing with a backward assertion we have to find out the maximum
a backward assertion. In that case, we have to find out the maximum amount to amount to move back, and set up each alternative appropriately. */
move back, and set up each alternative appropriately. */
if (*first_op == OP_REVERSE) if (*this_start_code == OP_ASSERTBACK || *this_start_code == OP_ASSERTBACK_NOT)
{ {
size_t max_back = 0; size_t max_back = 0;
size_t gone_back; size_t gone_back;
@ -476,15 +466,17 @@ if (*first_op == OP_REVERSE)
if (current_subject < mb->start_used_ptr) if (current_subject < mb->start_used_ptr)
mb->start_used_ptr = current_subject; mb->start_used_ptr = current_subject;
/* Now we can process the individual branches. */ /* Now we can process the individual branches. There will be an OP_REVERSE at
the start of each branch, except when the length of the branch is zero. */
end_code = this_start_code; end_code = this_start_code;
do do
{ {
size_t back = (size_t)GET(end_code, 2+LINK_SIZE); uint32_t revlen = (end_code[1+LINK_SIZE] == OP_REVERSE)? 1 + LINK_SIZE : 0;
size_t back = (revlen == 0)? 0 : (size_t)GET(end_code, 2+LINK_SIZE);
if (back <= gone_back) if (back <= gone_back)
{ {
int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE); int bstate = (int)(end_code - start_code + 1 + LINK_SIZE + revlen);
ADD_NEW_DATA(-bstate, 0, (int)(gone_back - back)); ADD_NEW_DATA(-bstate, 0, (int)(gone_back - back));
} }
end_code += GET(end_code, 1); end_code += GET(end_code, 1);
@ -544,7 +536,7 @@ for (;;)
BOOL partial_newline = FALSE; BOOL partial_newline = FALSE;
BOOL could_continue = reset_could_continue; BOOL could_continue = reset_could_continue;
reset_could_continue = FALSE; reset_could_continue = FALSE;
if (ptr > mb->last_used_ptr) mb->last_used_ptr = ptr; if (ptr > mb->last_used_ptr) mb->last_used_ptr = ptr;
/* Make the new state list into the active state list and empty the /* Make the new state list into the active state list and empty the
@ -597,7 +589,7 @@ for (;;)
int state_offset = current_state->offset; int state_offset = current_state->offset;
int rrc; int rrc;
int count; int count;
/* A negative offset is a special case meaning "hold off going to this /* A negative offset is a special case meaning "hold off going to this
(negated) state until the number of characters in the data field have (negated) state until the number of characters in the data field have
been skipped". If the could_continue flag was passed over from a previous been skipped". If the could_continue flag was passed over from a previous
@ -633,7 +625,7 @@ for (;;)
code = start_code + state_offset; code = start_code + state_offset;
codevalue = *code; codevalue = *code;
/* If this opcode inspects a character, but we are at the end of the /* If this opcode inspects a character, but we are at the end of the
subject, remember the fact for use when testing for a partial match. */ subject, remember the fact for use when testing for a partial match. */
@ -3078,13 +3070,13 @@ for (;;)
ptr += clen; /* Advance to next subject character */ ptr += clen; /* Advance to next subject character */
} /* Loop to move along the subject string */ } /* Loop to move along the subject string */
/* Control gets here from "break" a few lines above. If we have a match and /* Control gets here from "break" a few lines above. If we have a match and
PCRE2_ENDANCHORED is set, the match fails. */ PCRE2_ENDANCHORED is set, the match fails. */
if (match_count >= 0 && if (match_count >= 0 &&
((mb->moptions | mb->poptions) & PCRE2_ENDANCHORED) != 0 && ((mb->moptions | mb->poptions) & PCRE2_ENDANCHORED) != 0 &&
ptr < end_subject) ptr < end_subject)
match_count = PCRE2_ERROR_NOMATCH; match_count = PCRE2_ERROR_NOMATCH;
return match_count; return match_count;
} }
@ -3157,12 +3149,12 @@ if (re == NULL || subject == NULL || workspace == NULL || match_data == NULL)
if (wscount < 20) return PCRE2_ERROR_DFA_WSSIZE; if (wscount < 20) return PCRE2_ERROR_DFA_WSSIZE;
if (start_offset > length) return PCRE2_ERROR_BADOFFSET; if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
/* Partial matching and PCRE2_ENDANCHORED are currently not allowed at the same /* Partial matching and PCRE2_ENDANCHORED are currently not allowed at the same
time. */ time. */
if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0 && if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0 &&
((re->overall_options | options) & PCRE2_ENDANCHORED) != 0) ((re->overall_options | options) & PCRE2_ENDANCHORED) != 0)
return PCRE2_ERROR_BADOPTION; return PCRE2_ERROR_BADOPTION;
/* Check that the first field in the block is the magic number. If it is not, /* Check that the first field in the block is the magic number. If it is not,
return with PCRE2_ERROR_BADMAGIC. */ return with PCRE2_ERROR_BADMAGIC. */

12
testdata/testinput6 vendored
View File

@ -4929,4 +4929,16 @@
\= Expect no match \= Expect no match
X\x00Y X\x00Y
/(?<=abc|)/
abcde\=aftertext
/(?<=|abc)/
abcde\=aftertext
/(?<=abc|)/endanchored
abcde\=aftertext
/(?<=|abc)/endanchored
abcde\=aftertext
# End of testinput6 # End of testinput6

20
testdata/testoutput6 vendored
View File

@ -7746,4 +7746,24 @@ No match
X\x00Y X\x00Y
No match No match
/(?<=abc|)/
abcde\=aftertext
0:
0+ abcde
/(?<=|abc)/
abcde\=aftertext
0:
0+ abcde
/(?<=abc|)/endanchored
abcde\=aftertext
0:
0+
/(?<=|abc)/endanchored
abcde\=aftertext
0:
0+
# End of testinput6 # End of testinput6