Fix \C bug with repeated character classes in UTF-8 mode.

2018-02-19 17:26:33 +00:00 · 2018-02-19 17:26:33 +00:00 · b26aa366ba
parent 553bf8a1dc
commit b26aa366ba
6 changed files with 32 additions and 4 deletions
--- a/5
+++ b/5
@ -20,6 +20,11 @@ Unicode newlines" in the default case when --enable-bsr-anycrlf has not been
 specified. Similarly, running "pcfre2test -C bsr" never produced the result 
 ANY.
 4. Matching the pattern /(*UTF)\C[^\v]+\x80/ against an 8-bit string containing 
 multi-code-unit characters caused bad behaviour and possibly a crash. This 
 issue was fixed for other kinds of repeat in release 10.20 by change 19, but
 repeating character classes were overlooked.
 Version 10.31 12-February-2018
 ------------------------------
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@ -1962,11 +1962,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
          if (reptype == REPTYPE_POS) continue;    /* No backtracking */
          /* After \C in UTF mode, Lstart_eptr might be in the middle of a
          Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
          go too far. */
          for (;;)
            {
            RMATCH(Fecode, RM201);
            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-            if (Feptr-- == Lstart_eptr) break;  /* Tried at original position */
+            if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
            BACKCHAR(Feptr);
            }
          }
@ -2126,11 +2130,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
        /* After \C in UTF mode, Lstart_eptr might be in the middle of a
        Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
        go too far. */
        for(;;)
          {
          RMATCH(Fecode, RM101);
          if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-          if (Feptr-- == Lstart_eptr) break;  /* Tried at original position */
+          if (Feptr-- <= Lstart_eptr) break;  /* Tried at original position */
 #ifdef SUPPORT_UNICODE
          if (utf) BACKCHAR(Feptr);
 #endif
@ -4002,8 +4010,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
        if (reptype == REPTYPE_POS) continue;    /* No backtracking */
        /* After \C in UTF mode, Lstart_eptr might be in the middle of a
-        Unicode character. Use <= pp to ensure backtracking doesn't go too far.
+        Unicode character. Use <= Lstart_eptr to ensure backtracking doesn't
-        */
+        go too far. */
        for(;;)
          {
--- a/testdata/testinput22
+++ b/testdata/testinput22
@ -98,4 +98,7 @@
 \= Expect no match - tests \C at end of subject
    ab
 /\C[^\v]+\x80/utf
    [AΏBŀC]
 # End of testinput22
--- a/testdata/testoutput22-16
+++ b/testdata/testoutput22-16
@ -171,4 +171,8 @@ No match
    ab
 No match
 /\C[^\v]+\x80/utf
    [AΏBŀC]
 No match
 # End of testinput22
--- a/testdata/testoutput22-32
+++ b/testdata/testoutput22-32
@ -169,4 +169,8 @@ No match
    ab
 No match
 /\C[^\v]+\x80/utf
    [AΏBŀC]
 No match
 # End of testinput22
--- a/testdata/testoutput22-8
+++ b/testdata/testoutput22-8
@ -173,4 +173,8 @@ No match
    ab
 No match
 /\C[^\v]+\x80/utf
    [AΏBŀC]
 No match
 # End of testinput22