Fix bad lookup in global table for wide characters in extended mode and in
*VERB names.
This commit is contained in:
parent
56084cc922
commit
8eae402315
|
@ -49,6 +49,10 @@ Some bugs in the refactored code were subsequently fixed before release:
|
|||
in characters instead of code units, which could lead to bad code being
|
||||
compiled, leading to unpredictable behaviour.
|
||||
|
||||
(c) In extended /x mode, characters whose code was greater than 255 caused
|
||||
a lookup outside one of the global tables. A similar bug existed for wide
|
||||
characters in *VERB names.
|
||||
|
||||
4. Back references are now permitted in lookbehind assertions when there are
|
||||
no duplicated group numbers (that is, (?| has not been used), and, if the
|
||||
reference is by name, there is only one group of that name. The referenced
|
||||
|
|
|
@ -2226,7 +2226,9 @@ while (ptr < ptrend)
|
|||
and \E and escaped characters are allowed (no character types such as \d). If
|
||||
PCRE2_EXTENDED is also set, we must ignore white space and # comments. Do
|
||||
this by not entering the special (*VERB:NAME) processing - they are then
|
||||
picked up below. */
|
||||
picked up below. Note that c is a character, not a code unit, so we must not
|
||||
use MAX_255 to test its size because MAX_255 tests code units and is assumed
|
||||
TRUE in 8-bit mode. */
|
||||
|
||||
if (inverbname &&
|
||||
(
|
||||
|
@ -2234,7 +2236,7 @@ while (ptr < ptrend)
|
|||
((options & (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) !=
|
||||
(PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) ||
|
||||
/* OR: character > 255 */
|
||||
!MAX_255(c) ||
|
||||
c > 255 ||
|
||||
/* OR: not a # comment or white space */
|
||||
(c != CHAR_NUMBER_SIGN && (cb->ctypes[c] & ctype_space) == 0)
|
||||
))
|
||||
|
@ -2306,11 +2308,13 @@ while (ptr < ptrend)
|
|||
}
|
||||
}
|
||||
|
||||
/* Skip over whitespace and # comments in extended mode. */
|
||||
/* Skip over whitespace and # comments in extended mode. Note that c is a
|
||||
character, not a code unit, so we must not use MAX_255 to test its size
|
||||
because MAX_255 tests code units and is assumed TRUE in 8-bit mode. */
|
||||
|
||||
if ((options & PCRE2_EXTENDED) != 0)
|
||||
{
|
||||
if (MAX_255(c) && (cb->ctypes[c] & ctype_space) != 0) continue;
|
||||
if (c < 256 && (cb->ctypes[c] & ctype_space) != 0) continue;
|
||||
if (c == CHAR_NUMBER_SIGN)
|
||||
{
|
||||
while (ptr < ptrend)
|
||||
|
|
|
@ -200,11 +200,11 @@ arithmetic results in a signed value. Hence the cast. */
|
|||
#endif
|
||||
|
||||
/* Other macros that are different for 8-bit mode. The MAX_255 macro checks
|
||||
whether its argument is less than 256. The maximum length of a MARK name must
|
||||
fit in one code unit; currently it is set to 255 or 65535. The TABLE_GET macro
|
||||
is used to access elements of tables containing exactly 256 items. When code
|
||||
points can be greater than 255, a check is needed before accessing these
|
||||
tables. */
|
||||
whether its argument, which is assumed to be one code unit, is less than 256.
|
||||
The maximum length of a MARK name must fit in one code unit; currently it is
|
||||
set to 255 or 65535. The TABLE_GET macro is used to access elements of tables
|
||||
containing exactly 256 items. When code points can be greater than 255, a check
|
||||
is needed before accessing these tables. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#define MAX_255(c) TRUE
|
||||
|
|
|
@ -1738,4 +1738,8 @@
|
|||
/../utf,auto_callout
|
||||
\n\x{123}\x{123}\x{123}\x{123}
|
||||
|
||||
# This tests processing wide characters in extended mode.
|
||||
|
||||
/XȀ/x,utf
|
||||
|
||||
# End of testinput5
|
||||
|
|
|
@ -4182,4 +4182,8 @@ Failed: error 125 at offset 2: lookbehind assertion is not fixed length
|
|||
+2 ^ ^
|
||||
0: \x{123}\x{123}
|
||||
|
||||
# This tests processing wide characters in extended mode.
|
||||
|
||||
/XȀ/x,utf
|
||||
|
||||
# End of testinput5
|
||||
|
|
Loading…
Reference in New Issue