diff --git a/ChangeLog b/ChangeLog index 7ca7f24..5933f06 100644 --- a/ChangeLog +++ b/ChangeLog @@ -48,6 +48,10 @@ Some bugs in the refactored code were subsequently fixed before release: (b) In utf mode, the length of a *MARK (or other verb) name was being checked in characters instead of code units, which could lead to bad code being compiled, leading to unpredictable behaviour. + + (c) In extended /x mode, characters whose code was greater than 255 caused + a lookup outside one of the global tables. A similar bug existed for wide + characters in *VERB names. 4. Back references are now permitted in lookbehind assertions when there are no duplicated group numbers (that is, (?| has not been used), and, if the diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index a76ca0f..b7df32d 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -2226,7 +2226,9 @@ while (ptr < ptrend) and \E and escaped characters are allowed (no character types such as \d). If PCRE2_EXTENDED is also set, we must ignore white space and # comments. Do this by not entering the special (*VERB:NAME) processing - they are then - picked up below. */ + picked up below. Note that c is a character, not a code unit, so we must not + use MAX_255 to test its size because MAX_255 tests code units and is assumed + TRUE in 8-bit mode. */ if (inverbname && ( @@ -2234,7 +2236,7 @@ while (ptr < ptrend) ((options & (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) != (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) || /* OR: character > 255 */ - !MAX_255(c) || + c > 255 || /* OR: not a # comment or white space */ (c != CHAR_NUMBER_SIGN && (cb->ctypes[c] & ctype_space) == 0) )) @@ -2306,11 +2308,13 @@ while (ptr < ptrend) } } - /* Skip over whitespace and # comments in extended mode. */ + /* Skip over whitespace and # comments in extended mode. Note that c is a + character, not a code unit, so we must not use MAX_255 to test its size + because MAX_255 tests code units and is assumed TRUE in 8-bit mode. */ if ((options & PCRE2_EXTENDED) != 0) { - if (MAX_255(c) && (cb->ctypes[c] & ctype_space) != 0) continue; + if (c < 256 && (cb->ctypes[c] & ctype_space) != 0) continue; if (c == CHAR_NUMBER_SIGN) { while (ptr < ptrend) @@ -8866,7 +8870,7 @@ if (pattern == NULL) *errorptr = ERR16; return NULL; } - + /* Check that all undefined public option bits are zero. */ if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0) diff --git a/src/pcre2_intmodedep.h b/src/pcre2_intmodedep.h index d062144..0579bd6 100644 --- a/src/pcre2_intmodedep.h +++ b/src/pcre2_intmodedep.h @@ -200,11 +200,11 @@ arithmetic results in a signed value. Hence the cast. */ #endif /* Other macros that are different for 8-bit mode. The MAX_255 macro checks -whether its argument is less than 256. The maximum length of a MARK name must -fit in one code unit; currently it is set to 255 or 65535. The TABLE_GET macro -is used to access elements of tables containing exactly 256 items. When code -points can be greater than 255, a check is needed before accessing these -tables. */ +whether its argument, which is assumed to be one code unit, is less than 256. +The maximum length of a MARK name must fit in one code unit; currently it is +set to 255 or 65535. The TABLE_GET macro is used to access elements of tables +containing exactly 256 items. When code points can be greater than 255, a check +is needed before accessing these tables. */ #if PCRE2_CODE_UNIT_WIDTH == 8 #define MAX_255(c) TRUE diff --git a/testdata/testinput5 b/testdata/testinput5 index a55fb32..7e6e6a7 100644 --- a/testdata/testinput5 +++ b/testdata/testinput5 @@ -1738,4 +1738,8 @@ /../utf,auto_callout \n\x{123}\x{123}\x{123}\x{123} +# This tests processing wide characters in extended mode. + +/XȀ/x,utf + # End of testinput5 diff --git a/testdata/testoutput5 b/testdata/testoutput5 index 30bfb02..bd85613 100644 --- a/testdata/testoutput5 +++ b/testdata/testoutput5 @@ -4182,4 +4182,8 @@ Failed: error 125 at offset 2: lookbehind assertion is not fixed length +2 ^ ^ 0: \x{123}\x{123} +# This tests processing wide characters in extended mode. + +/XȀ/x,utf + # End of testinput5