From aaa23388c7cd9eb9218da09a3730b1b0770c6f36 Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Sun, 1 Nov 2015 16:54:17 +0000 Subject: [PATCH] Fix dodgy code for UTF-32 checking. --- ChangeLog | 4 ++++ src/pcre2_match.c | 12 +++++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index f1fed5e..4156dca 100644 --- a/ChangeLog +++ b/ChangeLog @@ -248,6 +248,10 @@ given. 71. In pcre2_substitute() there was the possibility of reading one code unit beyond the end of the replacement string. +72. The code for checking a subject's UTF-32 validity for a pattern with a +lookbehind involved an out-of-bounds pointer, which could potentially cause +trouble in some environments. + Version 10.20 30-June-2015 -------------------------- diff --git a/src/pcre2_match.c b/src/pcre2_match.c index ad04a1b..35ee5ae 100644 --- a/src/pcre2_match.c +++ b/src/pcre2_match.c @@ -6566,9 +6566,15 @@ if (utf && (options & PCRE2_NO_UTF_CHECK) == 0) #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ check_subject--; } -#else /* In the 32-bit library, one code unit equals one character. */ - check_subject -= re->max_lookbehind; - if (check_subject < subject) check_subject = subject; +#else + /* In the 32-bit library, one code unit equals one character. However, + we cannot just subtract the lookbehind and then compare pointers, because + a very large lookbehind could create an invalid pointer. */ + + if (start_offset >= re->max_lookbehind) + check_subject -= re->max_lookbehind; + else + check_subject = subject; #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ }