Fix bad lookup in global table for wide characters in extended mode and in
*VERB names.
This commit is contained in:
parent
56084cc922
commit
8eae402315
|
@ -49,6 +49,10 @@ Some bugs in the refactored code were subsequently fixed before release:
|
||||||
in characters instead of code units, which could lead to bad code being
|
in characters instead of code units, which could lead to bad code being
|
||||||
compiled, leading to unpredictable behaviour.
|
compiled, leading to unpredictable behaviour.
|
||||||
|
|
||||||
|
(c) In extended /x mode, characters whose code was greater than 255 caused
|
||||||
|
a lookup outside one of the global tables. A similar bug existed for wide
|
||||||
|
characters in *VERB names.
|
||||||
|
|
||||||
4. Back references are now permitted in lookbehind assertions when there are
|
4. Back references are now permitted in lookbehind assertions when there are
|
||||||
no duplicated group numbers (that is, (?| has not been used), and, if the
|
no duplicated group numbers (that is, (?| has not been used), and, if the
|
||||||
reference is by name, there is only one group of that name. The referenced
|
reference is by name, there is only one group of that name. The referenced
|
||||||
|
|
|
@ -2226,7 +2226,9 @@ while (ptr < ptrend)
|
||||||
and \E and escaped characters are allowed (no character types such as \d). If
|
and \E and escaped characters are allowed (no character types such as \d). If
|
||||||
PCRE2_EXTENDED is also set, we must ignore white space and # comments. Do
|
PCRE2_EXTENDED is also set, we must ignore white space and # comments. Do
|
||||||
this by not entering the special (*VERB:NAME) processing - they are then
|
this by not entering the special (*VERB:NAME) processing - they are then
|
||||||
picked up below. */
|
picked up below. Note that c is a character, not a code unit, so we must not
|
||||||
|
use MAX_255 to test its size because MAX_255 tests code units and is assumed
|
||||||
|
TRUE in 8-bit mode. */
|
||||||
|
|
||||||
if (inverbname &&
|
if (inverbname &&
|
||||||
(
|
(
|
||||||
|
@ -2234,7 +2236,7 @@ while (ptr < ptrend)
|
||||||
((options & (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) !=
|
((options & (PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) !=
|
||||||
(PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) ||
|
(PCRE2_EXTENDED | PCRE2_ALT_VERBNAMES)) ||
|
||||||
/* OR: character > 255 */
|
/* OR: character > 255 */
|
||||||
!MAX_255(c) ||
|
c > 255 ||
|
||||||
/* OR: not a # comment or white space */
|
/* OR: not a # comment or white space */
|
||||||
(c != CHAR_NUMBER_SIGN && (cb->ctypes[c] & ctype_space) == 0)
|
(c != CHAR_NUMBER_SIGN && (cb->ctypes[c] & ctype_space) == 0)
|
||||||
))
|
))
|
||||||
|
@ -2306,11 +2308,13 @@ while (ptr < ptrend)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Skip over whitespace and # comments in extended mode. */
|
/* Skip over whitespace and # comments in extended mode. Note that c is a
|
||||||
|
character, not a code unit, so we must not use MAX_255 to test its size
|
||||||
|
because MAX_255 tests code units and is assumed TRUE in 8-bit mode. */
|
||||||
|
|
||||||
if ((options & PCRE2_EXTENDED) != 0)
|
if ((options & PCRE2_EXTENDED) != 0)
|
||||||
{
|
{
|
||||||
if (MAX_255(c) && (cb->ctypes[c] & ctype_space) != 0) continue;
|
if (c < 256 && (cb->ctypes[c] & ctype_space) != 0) continue;
|
||||||
if (c == CHAR_NUMBER_SIGN)
|
if (c == CHAR_NUMBER_SIGN)
|
||||||
{
|
{
|
||||||
while (ptr < ptrend)
|
while (ptr < ptrend)
|
||||||
|
|
|
@ -200,11 +200,11 @@ arithmetic results in a signed value. Hence the cast. */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Other macros that are different for 8-bit mode. The MAX_255 macro checks
|
/* Other macros that are different for 8-bit mode. The MAX_255 macro checks
|
||||||
whether its argument is less than 256. The maximum length of a MARK name must
|
whether its argument, which is assumed to be one code unit, is less than 256.
|
||||||
fit in one code unit; currently it is set to 255 or 65535. The TABLE_GET macro
|
The maximum length of a MARK name must fit in one code unit; currently it is
|
||||||
is used to access elements of tables containing exactly 256 items. When code
|
set to 255 or 65535. The TABLE_GET macro is used to access elements of tables
|
||||||
points can be greater than 255, a check is needed before accessing these
|
containing exactly 256 items. When code points can be greater than 255, a check
|
||||||
tables. */
|
is needed before accessing these tables. */
|
||||||
|
|
||||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
#define MAX_255(c) TRUE
|
#define MAX_255(c) TRUE
|
||||||
|
|
|
@ -1738,4 +1738,8 @@
|
||||||
/../utf,auto_callout
|
/../utf,auto_callout
|
||||||
\n\x{123}\x{123}\x{123}\x{123}
|
\n\x{123}\x{123}\x{123}\x{123}
|
||||||
|
|
||||||
|
# This tests processing wide characters in extended mode.
|
||||||
|
|
||||||
|
/XȀ/x,utf
|
||||||
|
|
||||||
# End of testinput5
|
# End of testinput5
|
||||||
|
|
|
@ -4182,4 +4182,8 @@ Failed: error 125 at offset 2: lookbehind assertion is not fixed length
|
||||||
+2 ^ ^
|
+2 ^ ^
|
||||||
0: \x{123}\x{123}
|
0: \x{123}\x{123}
|
||||||
|
|
||||||
|
# This tests processing wide characters in extended mode.
|
||||||
|
|
||||||
|
/XȀ/x,utf
|
||||||
|
|
||||||
# End of testinput5
|
# End of testinput5
|
||||||
|
|
Loading…
Reference in New Issue