Fix Bugzilla #2642: no match bug in 8-bit mode for caseless invalid utf

matching.
2020-09-15 14:36:23 +00:00 · 2020-09-15 14:36:23 +00:00 · f8cbb1f58d
parent 0cf247f558
commit f8cbb1f58d
4 changed files with 22 additions and 2 deletions
--- a/7
+++ b/7
@ -66,6 +66,13 @@ this case have been moved from test 1 to test 2.
 12. Further to 10 above, pcre2test has been updated to detect and grumble if a 
 delimiter other than / is used after #perltest.

+13. Fixed a bug with PCRE2_MATCH_INVALID_UTF in 8-bit mode when PCRE2_CASELESS 
+was set and PCRE2_NO_START_OPTIMIZE was not set. The optimization for finding 
+the start of a match was not resetting correctly after a failed match on the 
+first valid fragment of the subject, possibly causing incorrect "no match" 
+returns on subsequent fragments. For example, the pattern /A/ failed to match 
+the subject \xe5A. Fixes Bugzilla #2642. 
+

 Version 10.35 09-May-2020
 ---------------------------
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@ -6115,8 +6115,8 @@ BOOL has_req_cu = FALSE;
 BOOL startline;

 #if PCRE2_CODE_UNIT_WIDTH == 8
-BOOL memchr_not_found_first_cu = FALSE;
-BOOL memchr_not_found_first_cu2 = FALSE;
+BOOL memchr_not_found_first_cu;
+BOOL memchr_not_found_first_cu2;
 #endif

 PCRE2_UCHAR first_cu = 0;
@ -6709,6 +6709,11 @@ FRAGMENT_RESTART:
 start_partial = match_partial = NULL;
 mb->hitend = FALSE;

+#if PCRE2_CODE_UNIT_WIDTH == 8
+memchr_not_found_first_cu = FALSE;
+memchr_not_found_first_cu2 = FALSE;
+#endif
+
 for(;;)
  {
  PCRE2_SPTR new_start_match;
@ -7187,6 +7192,7 @@ if (utf && end_subject != true_end_subject &&
    starting code units in 8-bit and 16-bit modes. */

    start_match = end_subject + 1;
+    
 #if PCRE2_CODE_UNIT_WIDTH != 32
    while (start_match < true_end_subject && NOT_FIRSTCU(*start_match))
      start_match++;
--- a/testdata/testinput10
+++ b/testdata/testinput10
@ -610,4 +610,7 @@
 /X(\x{e1})Y/replace=>\U$1<,substitute_extended
    X\x{e1}Y

+/A/utf,match_invalid_utf,caseless
+    \xe5A
+
 # End of testinput10
--- a/testdata/testoutput10
+++ b/testdata/testoutput10
@ -1871,4 +1871,8 @@ Subject length lower bound = 1
    X\x{e1}Y
 1: >\xe1<

+/A/utf,match_invalid_utf,caseless
+    \xe5A
+ 0: A
+
 # End of testinput10