Fix dodgy code for UTF-32 checking.

2015-11-01 16:54:17 +00:00 · 2015-11-01 16:54:17 +00:00 · aaa23388c7
parent a0d04b9fa3
commit aaa23388c7
2 changed files with 13 additions and 3 deletions
--- a/4
+++ b/4
@ -248,6 +248,10 @@ given.
 71. In pcre2_substitute() there was the possibility of reading one code unit 
 beyond the end of the replacement string.

+72. The code for checking a subject's UTF-32 validity for a pattern with a
+lookbehind involved an out-of-bounds pointer, which could potentially cause 
+trouble in some environments.
+

 Version 10.20 30-June-2015
 --------------------------
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@ -6566,9 +6566,15 @@ if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
 #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
        check_subject--;
      }
-#else   /* In the 32-bit library, one code unit equals one character. */
-    check_subject -= re->max_lookbehind;
-    if (check_subject < subject) check_subject = subject;
+#else
+    /* In the 32-bit library, one code unit equals one character. However,
+    we cannot just subtract the lookbehind and then compare pointers, because
+    a very large lookbehind could create an invalid pointer. */
+
+    if (start_offset >= re->max_lookbehind)
+      check_subject -= re->max_lookbehind;
+    else
+      check_subject = subject;
 #endif  /* PCRE2_CODE_UNIT_WIDTH != 32 */
    }