Fix sometimes failing caseless non-ASCII matching in assertion.
This commit is contained in:
parent
6f41a5a01a
commit
3c869816ac
|
@ -177,6 +177,11 @@ sanitizer complaint (regexec is supposed to be thread safe).
|
|||
37. Add NEON vectorization to JIT to speed up matching of first character and
|
||||
pairs of characters on ARM64 CPUs.
|
||||
|
||||
38. If a non-ASCII character was the first in a starting assertion in a
|
||||
caseless match, the "first code unit" optimization did not get the casing
|
||||
right, and the assertion failed to match a character in the other case if it
|
||||
did not start with the same code unit.
|
||||
|
||||
|
||||
Version 10.33 16-April-2019
|
||||
---------------------------
|
||||
|
|
|
@ -8741,6 +8741,19 @@ do {
|
|||
case OP_MINPLUSI:
|
||||
case OP_POSPLUSI:
|
||||
if (inassert == 0) return 0;
|
||||
|
||||
/* If the character is more than one code unit long, we cannot set its
|
||||
first code unit when matching caselessly. Later scanning may pick up
|
||||
multiple code units. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (scode[1] >= 0x80) return 0;
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
if (scode[1] >= 0xd800 && scode[1] <= 0xdfff) return 0;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if (cflags < 0) { c = scode[1]; cflags = REQ_CASELESS; }
|
||||
else if (c != scode[1]) return 0;
|
||||
break;
|
||||
|
|
|
@ -2483,4 +2483,12 @@
|
|||
/\X*/
|
||||
\xF3aaa\xE4\xEA\xEB\xFEa
|
||||
|
||||
/Я/i,utf
|
||||
\x{42f}
|
||||
\x{44f}
|
||||
|
||||
/(?=Я)/i,utf
|
||||
\x{42f}
|
||||
\x{44f}
|
||||
|
||||
# End of testinput4
|
||||
|
|
|
@ -4016,4 +4016,16 @@ No match
|
|||
\xF3aaa\xE4\xEA\xEB\xFEa
|
||||
0: \xf3aaa\xe4\xea\xeb\xfea
|
||||
|
||||
/Я/i,utf
|
||||
\x{42f}
|
||||
0: \x{42f}
|
||||
\x{44f}
|
||||
0: \x{44f}
|
||||
|
||||
/(?=Я)/i,utf
|
||||
\x{42f}
|
||||
0:
|
||||
\x{44f}
|
||||
0:
|
||||
|
||||
# End of testinput4
|
||||
|
|
Loading…
Reference in New Issue