From f123833bdb99b2a53e15ed3345624afe0d1e7791 Mon Sep 17 00:00:00 2001
From: "Philip.Hazel" <Philip.Hazel@gmail.com>
Date: Wed, 8 Apr 2015 16:53:22 +0000
Subject: [PATCH] Fix other cases where backtracking after \C could cause a
 crash.

---
 ChangeLog            |  7 ++++---
 src/pcre2_match.c    | 30 +++++++++++++++++++++++++-----
 testdata/testinput4  |  3 +++
 testdata/testoutput4 |  4 ++++
 4 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index ef41e9b..37977cd 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -74,9 +74,10 @@ lookbehind assertion. This bug was discovered by the LLVM fuzzer.
 the code there did catch the loop.
 
 19. If a greedy quantified \X was preceded by \C in UTF mode (e.g. \C\X*), 
-and a subsequent item in the pattern caused a non-match, backtracking over the 
-repeated \X did not stop, but carried on past the start of the subject, causing 
-reference to random memory and/or a segfault.  This bug was discovered by the
+and a subsequent item in the pattern caused a non-match, backtracking over the
+repeated \X did not stop, but carried on past the start of the subject, causing
+reference to random memory and/or a segfault. There were also some other cases
+where backtracking after \C could crash. This set of bugs was discovered by the
 LLVM fuzzer.
 
 
diff --git a/src/pcre2_match.c b/src/pcre2_match.c
index 3ff0b63..6719e40 100644
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@@ -3576,9 +3576,13 @@ for (;;)
             }
 
           if (possessive) continue;    /* No backtracking */
+
+          /* After \C in UTF mode, pp might be in the middle of a Unicode
+          character. Use <= pp to ensure backtracking doesn't go too far. */
+
           for(;;)
             {
-            if (eptr == pp) goto TAIL_RECURSE;
+            if (eptr <= pp) goto TAIL_RECURSE;
             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM23);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
             eptr--;
@@ -3973,9 +3977,13 @@ for (;;)
             eptr += len;
             }
           if (possessive) continue;    /* No backtracking */
+
+          /* After \C in UTF mode, pp might be in the middle of a Unicode
+          character. Use <= pp to ensure backtracking doesn't go too far. */
+
           for(;;)
             {
-            if (eptr == pp) goto TAIL_RECURSE;
+            if (eptr <= pp) goto TAIL_RECURSE;
             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM30);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
             eptr--;
@@ -4108,9 +4116,13 @@ for (;;)
             eptr += len;
             }
           if (possessive) continue;    /* No backtracking */
+
+          /* After \C in UTF mode, pp might be in the middle of a Unicode
+          character. Use <= pp to ensure backtracking doesn't go too far. */
+
           for(;;)
             {
-            if (eptr == pp) goto TAIL_RECURSE;
+            if (eptr <= pp) goto TAIL_RECURSE;
             RMATCH(eptr, ecode, offset_top, mb, eptrb, RM34);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
             eptr--;
@@ -5679,9 +5691,13 @@ for (;;)
         /* eptr is now past the end of the maximum run */
 
         if (possessive) continue;    /* No backtracking */
+
+        /* After \C in UTF mode, pp might be in the middle of a Unicode
+        character. Use <= pp to ensure backtracking doesn't go too far. */
+
         for(;;)
           {
-          if (eptr == pp) goto TAIL_RECURSE;
+          if (eptr <= pp) goto TAIL_RECURSE;
           RMATCH(eptr, ecode, offset_top, mb, eptrb, RM44);
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
           eptr--;
@@ -5999,9 +6015,13 @@ for (;;)
           }
 
         if (possessive) continue;    /* No backtracking */
+
+        /* After \C in UTF mode, pp might be in the middle of a Unicode
+        character. Use <= pp to ensure backtracking doesn't go too far. */
+
         for(;;)
           {
-          if (eptr == pp) goto TAIL_RECURSE;
+          if (eptr <= pp) goto TAIL_RECURSE;
           RMATCH(eptr, ecode, offset_top, mb, eptrb, RM46);
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
           eptr--;
diff --git a/testdata/testinput4 b/testdata/testinput4
index 8e51fea..421b3fb 100644
--- a/testdata/testinput4
+++ b/testdata/testinput4
@@ -2226,4 +2226,7 @@
 /utf
     Ӆ\x0a
 
+/\C(\W?ſ)'?{{/utf
+    \\C(\\W?ſ)'?{{
+
 # End of testinput4
diff --git a/testdata/testoutput4 b/testdata/testoutput4
index 39924bf..e8090a9 100644
--- a/testdata/testoutput4
+++ b/testdata/testoutput4
@@ -3747,4 +3747,8 @@ No match
     Ӆ\x0a
 No match
 
+/\C(\W?ſ)'?{{/utf
+    \\C(\\W?ſ)'?{{
+No match
+
 # End of testinput4