From f123833bdb99b2a53e15ed3345624afe0d1e7791 Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Wed, 8 Apr 2015 16:53:22 +0000 Subject: [PATCH] Fix other cases where backtracking after \C could cause a crash. --- ChangeLog | 7 ++++--- src/pcre2_match.c | 30 +++++++++++++++++++++++++----- testdata/testinput4 | 3 +++ testdata/testoutput4 | 4 ++++ 4 files changed, 36 insertions(+), 8 deletions(-) diff --git a/ChangeLog b/ChangeLog index ef41e9b..37977cd 100644 --- a/ChangeLog +++ b/ChangeLog @@ -74,9 +74,10 @@ lookbehind assertion. This bug was discovered by the LLVM fuzzer. the code there did catch the loop. 19. If a greedy quantified \X was preceded by \C in UTF mode (e.g. \C\X*), -and a subsequent item in the pattern caused a non-match, backtracking over the -repeated \X did not stop, but carried on past the start of the subject, causing -reference to random memory and/or a segfault. This bug was discovered by the +and a subsequent item in the pattern caused a non-match, backtracking over the +repeated \X did not stop, but carried on past the start of the subject, causing +reference to random memory and/or a segfault. There were also some other cases +where backtracking after \C could crash. This set of bugs was discovered by the LLVM fuzzer. diff --git a/src/pcre2_match.c b/src/pcre2_match.c index 3ff0b63..6719e40 100644 --- a/src/pcre2_match.c +++ b/src/pcre2_match.c @@ -3576,9 +3576,13 @@ for (;;) } if (possessive) continue; /* No backtracking */ + + /* After \C in UTF mode, pp might be in the middle of a Unicode + character. Use <= pp to ensure backtracking doesn't go too far. */ + for(;;) { - if (eptr == pp) goto TAIL_RECURSE; + if (eptr <= pp) goto TAIL_RECURSE; RMATCH(eptr, ecode, offset_top, mb, eptrb, RM23); if (rrc != MATCH_NOMATCH) RRETURN(rrc); eptr--; @@ -3973,9 +3977,13 @@ for (;;) eptr += len; } if (possessive) continue; /* No backtracking */ + + /* After \C in UTF mode, pp might be in the middle of a Unicode + character. Use <= pp to ensure backtracking doesn't go too far. */ + for(;;) { - if (eptr == pp) goto TAIL_RECURSE; + if (eptr <= pp) goto TAIL_RECURSE; RMATCH(eptr, ecode, offset_top, mb, eptrb, RM30); if (rrc != MATCH_NOMATCH) RRETURN(rrc); eptr--; @@ -4108,9 +4116,13 @@ for (;;) eptr += len; } if (possessive) continue; /* No backtracking */ + + /* After \C in UTF mode, pp might be in the middle of a Unicode + character. Use <= pp to ensure backtracking doesn't go too far. */ + for(;;) { - if (eptr == pp) goto TAIL_RECURSE; + if (eptr <= pp) goto TAIL_RECURSE; RMATCH(eptr, ecode, offset_top, mb, eptrb, RM34); if (rrc != MATCH_NOMATCH) RRETURN(rrc); eptr--; @@ -5679,9 +5691,13 @@ for (;;) /* eptr is now past the end of the maximum run */ if (possessive) continue; /* No backtracking */ + + /* After \C in UTF mode, pp might be in the middle of a Unicode + character. Use <= pp to ensure backtracking doesn't go too far. */ + for(;;) { - if (eptr == pp) goto TAIL_RECURSE; + if (eptr <= pp) goto TAIL_RECURSE; RMATCH(eptr, ecode, offset_top, mb, eptrb, RM44); if (rrc != MATCH_NOMATCH) RRETURN(rrc); eptr--; @@ -5999,9 +6015,13 @@ for (;;) } if (possessive) continue; /* No backtracking */ + + /* After \C in UTF mode, pp might be in the middle of a Unicode + character. Use <= pp to ensure backtracking doesn't go too far. */ + for(;;) { - if (eptr == pp) goto TAIL_RECURSE; + if (eptr <= pp) goto TAIL_RECURSE; RMATCH(eptr, ecode, offset_top, mb, eptrb, RM46); if (rrc != MATCH_NOMATCH) RRETURN(rrc); eptr--; diff --git a/testdata/testinput4 b/testdata/testinput4 index 8e51fea..421b3fb 100644 --- a/testdata/testinput4 +++ b/testdata/testinput4 @@ -2226,4 +2226,7 @@ /utf Ӆ\x0a +/\C(\W?ſ)'?{{/utf + \\C(\\W?ſ)'?{{ + # End of testinput4 diff --git a/testdata/testoutput4 b/testdata/testoutput4 index 39924bf..e8090a9 100644 --- a/testdata/testoutput4 +++ b/testdata/testoutput4 @@ -3747,4 +3747,8 @@ No match Ӆ\x0a No match +/\C(\W?ſ)'?{{/utf + \\C(\\W?ſ)'?{{ +No match + # End of testinput4