Fix other cases where backtracking after \C could cause a crash.

This commit is contained in:
Philip.Hazel 2015-04-08 16:53:22 +00:00
parent aa8ee3ded5
commit f123833bdb
4 changed files with 36 additions and 8 deletions

View File

@ -76,7 +76,8 @@ the code there did catch the loop.
19. If a greedy quantified \X was preceded by \C in UTF mode (e.g. \C\X*), 19. If a greedy quantified \X was preceded by \C in UTF mode (e.g. \C\X*),
and a subsequent item in the pattern caused a non-match, backtracking over the and a subsequent item in the pattern caused a non-match, backtracking over the
repeated \X did not stop, but carried on past the start of the subject, causing repeated \X did not stop, but carried on past the start of the subject, causing
reference to random memory and/or a segfault. This bug was discovered by the reference to random memory and/or a segfault. There were also some other cases
where backtracking after \C could crash. This set of bugs was discovered by the
LLVM fuzzer. LLVM fuzzer.

View File

@ -3576,9 +3576,13 @@ for (;;)
} }
if (possessive) continue; /* No backtracking */ if (possessive) continue; /* No backtracking */
/* After \C in UTF mode, pp might be in the middle of a Unicode
character. Use <= pp to ensure backtracking doesn't go too far. */
for(;;) for(;;)
{ {
if (eptr == pp) goto TAIL_RECURSE; if (eptr <= pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, mb, eptrb, RM23); RMATCH(eptr, ecode, offset_top, mb, eptrb, RM23);
if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (rrc != MATCH_NOMATCH) RRETURN(rrc);
eptr--; eptr--;
@ -3973,9 +3977,13 @@ for (;;)
eptr += len; eptr += len;
} }
if (possessive) continue; /* No backtracking */ if (possessive) continue; /* No backtracking */
/* After \C in UTF mode, pp might be in the middle of a Unicode
character. Use <= pp to ensure backtracking doesn't go too far. */
for(;;) for(;;)
{ {
if (eptr == pp) goto TAIL_RECURSE; if (eptr <= pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, mb, eptrb, RM30); RMATCH(eptr, ecode, offset_top, mb, eptrb, RM30);
if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (rrc != MATCH_NOMATCH) RRETURN(rrc);
eptr--; eptr--;
@ -4108,9 +4116,13 @@ for (;;)
eptr += len; eptr += len;
} }
if (possessive) continue; /* No backtracking */ if (possessive) continue; /* No backtracking */
/* After \C in UTF mode, pp might be in the middle of a Unicode
character. Use <= pp to ensure backtracking doesn't go too far. */
for(;;) for(;;)
{ {
if (eptr == pp) goto TAIL_RECURSE; if (eptr <= pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, mb, eptrb, RM34); RMATCH(eptr, ecode, offset_top, mb, eptrb, RM34);
if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (rrc != MATCH_NOMATCH) RRETURN(rrc);
eptr--; eptr--;
@ -5679,9 +5691,13 @@ for (;;)
/* eptr is now past the end of the maximum run */ /* eptr is now past the end of the maximum run */
if (possessive) continue; /* No backtracking */ if (possessive) continue; /* No backtracking */
/* After \C in UTF mode, pp might be in the middle of a Unicode
character. Use <= pp to ensure backtracking doesn't go too far. */
for(;;) for(;;)
{ {
if (eptr == pp) goto TAIL_RECURSE; if (eptr <= pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, mb, eptrb, RM44); RMATCH(eptr, ecode, offset_top, mb, eptrb, RM44);
if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (rrc != MATCH_NOMATCH) RRETURN(rrc);
eptr--; eptr--;
@ -5999,9 +6015,13 @@ for (;;)
} }
if (possessive) continue; /* No backtracking */ if (possessive) continue; /* No backtracking */
/* After \C in UTF mode, pp might be in the middle of a Unicode
character. Use <= pp to ensure backtracking doesn't go too far. */
for(;;) for(;;)
{ {
if (eptr == pp) goto TAIL_RECURSE; if (eptr <= pp) goto TAIL_RECURSE;
RMATCH(eptr, ecode, offset_top, mb, eptrb, RM46); RMATCH(eptr, ecode, offset_top, mb, eptrb, RM46);
if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (rrc != MATCH_NOMATCH) RRETURN(rrc);
eptr--; eptr--;

3
testdata/testinput4 vendored
View File

@ -2226,4 +2226,7 @@
/utf /utf
Ӆ\x0a Ӆ\x0a
/\C(\W?ſ)'?{{/utf
\\C(\\W?ſ)'?{{
# End of testinput4 # End of testinput4

View File

@ -3747,4 +3747,8 @@ No match
Ӆ\x0a Ӆ\x0a
No match No match
/\C(\W?ſ)'?{{/utf
\\C(\\W?ſ)'?{{
No match
# End of testinput4 # End of testinput4