Fix lookbehind with zero-length branch in DFA matching. Fixes oss-fuzz 1959.

This commit is contained in:
Philip.Hazel 2017-05-27 15:49:29 +00:00
parent 3d80fa4fc2
commit 3f96e07e6b
4 changed files with 53 additions and 26 deletions

View File

@ -171,6 +171,9 @@ one: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES.
34. Implement newline type PCRE2_NEWLINE_NUL.
35. A lookbehind assertion that had a zero-length branch caused undefined
behaviour when processed by pcre2_dfa_match(). This is oss-fuzz issue 1859.
Version 10.23 14-February-2017
------------------------------

View File

@ -375,14 +375,10 @@ internal_dfa_match(
{
stateblock *active_states, *new_states, *temp_states;
stateblock *next_active_state, *next_new_state;
const uint8_t *ctypes, *lcc, *fcc;
PCRE2_SPTR ptr;
PCRE2_SPTR end_code;
PCRE2_SPTR first_op;
dfa_recursion_info new_recursive;
int active_count, new_count, match_count;
/* Some fields in the mb block are frequently referenced, so we load them into
@ -417,21 +413,15 @@ active_states = (stateblock *)(workspace + 2);
next_new_state = new_states = active_states + wscount;
new_count = 0;
first_op = this_start_code + 1 + LINK_SIZE +
((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
*this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
? IMM2_SIZE:0);
/* The first thing in any (sub) pattern is a bracket of some sort. Push all
the alternative states onto the list, and find out where the end is. This
makes is possible to use this function recursively, when we want to stop at a
matching internal ket rather than at the end.
If the first opcode in the first alternative is OP_REVERSE, we are dealing with
a backward assertion. In that case, we have to find out the maximum amount to
move back, and set up each alternative appropriately. */
If we are dealing with a backward assertion we have to find out the maximum
amount to move back, and set up each alternative appropriately. */
if (*first_op == OP_REVERSE)
if (*this_start_code == OP_ASSERTBACK || *this_start_code == OP_ASSERTBACK_NOT)
{
size_t max_back = 0;
size_t gone_back;
@ -476,15 +466,17 @@ if (*first_op == OP_REVERSE)
if (current_subject < mb->start_used_ptr)
mb->start_used_ptr = current_subject;
/* Now we can process the individual branches. */
/* Now we can process the individual branches. There will be an OP_REVERSE at
the start of each branch, except when the length of the branch is zero. */
end_code = this_start_code;
do
{
size_t back = (size_t)GET(end_code, 2+LINK_SIZE);
uint32_t revlen = (end_code[1+LINK_SIZE] == OP_REVERSE)? 1 + LINK_SIZE : 0;
size_t back = (revlen == 0)? 0 : (size_t)GET(end_code, 2+LINK_SIZE);
if (back <= gone_back)
{
int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE);
int bstate = (int)(end_code - start_code + 1 + LINK_SIZE + revlen);
ADD_NEW_DATA(-bstate, 0, (int)(gone_back - back));
}
end_code += GET(end_code, 1);

12
testdata/testinput6 vendored
View File

@ -4929,4 +4929,16 @@
\= Expect no match
X\x00Y
/(?<=abc|)/
abcde\=aftertext
/(?<=|abc)/
abcde\=aftertext
/(?<=abc|)/endanchored
abcde\=aftertext
/(?<=|abc)/endanchored
abcde\=aftertext
# End of testinput6

20
testdata/testoutput6 vendored
View File

@ -7746,4 +7746,24 @@ No match
X\x00Y
No match
/(?<=abc|)/
abcde\=aftertext
0:
0+ abcde
/(?<=|abc)/
abcde\=aftertext
0:
0+ abcde
/(?<=abc|)/endanchored
abcde\=aftertext
0:
0+
/(?<=|abc)/endanchored
abcde\=aftertext
0:
0+
# End of testinput6