Fix sometimes failing caseless non-ASCII matching in assertion.
This commit is contained in:
parent
6f41a5a01a
commit
3c869816ac
|
@ -177,6 +177,11 @@ sanitizer complaint (regexec is supposed to be thread safe).
|
||||||
37. Add NEON vectorization to JIT to speed up matching of first character and
|
37. Add NEON vectorization to JIT to speed up matching of first character and
|
||||||
pairs of characters on ARM64 CPUs.
|
pairs of characters on ARM64 CPUs.
|
||||||
|
|
||||||
|
38. If a non-ASCII character was the first in a starting assertion in a
|
||||||
|
caseless match, the "first code unit" optimization did not get the casing
|
||||||
|
right, and the assertion failed to match a character in the other case if it
|
||||||
|
did not start with the same code unit.
|
||||||
|
|
||||||
|
|
||||||
Version 10.33 16-April-2019
|
Version 10.33 16-April-2019
|
||||||
---------------------------
|
---------------------------
|
||||||
|
|
|
@ -8741,6 +8741,19 @@ do {
|
||||||
case OP_MINPLUSI:
|
case OP_MINPLUSI:
|
||||||
case OP_POSPLUSI:
|
case OP_POSPLUSI:
|
||||||
if (inassert == 0) return 0;
|
if (inassert == 0) return 0;
|
||||||
|
|
||||||
|
/* If the character is more than one code unit long, we cannot set its
|
||||||
|
first code unit when matching caselessly. Later scanning may pick up
|
||||||
|
multiple code units. */
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
if (scode[1] >= 0x80) return 0;
|
||||||
|
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||||
|
if (scode[1] >= 0xd800 && scode[1] <= 0xdfff) return 0;
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
if (cflags < 0) { c = scode[1]; cflags = REQ_CASELESS; }
|
if (cflags < 0) { c = scode[1]; cflags = REQ_CASELESS; }
|
||||||
else if (c != scode[1]) return 0;
|
else if (c != scode[1]) return 0;
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -2483,4 +2483,12 @@
|
||||||
/\X*/
|
/\X*/
|
||||||
\xF3aaa\xE4\xEA\xEB\xFEa
|
\xF3aaa\xE4\xEA\xEB\xFEa
|
||||||
|
|
||||||
|
/Я/i,utf
|
||||||
|
\x{42f}
|
||||||
|
\x{44f}
|
||||||
|
|
||||||
|
/(?=Я)/i,utf
|
||||||
|
\x{42f}
|
||||||
|
\x{44f}
|
||||||
|
|
||||||
# End of testinput4
|
# End of testinput4
|
||||||
|
|
|
@ -4016,4 +4016,16 @@ No match
|
||||||
\xF3aaa\xE4\xEA\xEB\xFEa
|
\xF3aaa\xE4\xEA\xEB\xFEa
|
||||||
0: \xf3aaa\xe4\xea\xeb\xfea
|
0: \xf3aaa\xe4\xea\xeb\xfea
|
||||||
|
|
||||||
|
/Я/i,utf
|
||||||
|
\x{42f}
|
||||||
|
0: \x{42f}
|
||||||
|
\x{44f}
|
||||||
|
0: \x{44f}
|
||||||
|
|
||||||
|
/(?=Я)/i,utf
|
||||||
|
\x{42f}
|
||||||
|
0:
|
||||||
|
\x{44f}
|
||||||
|
0:
|
||||||
|
|
||||||
# End of testinput4
|
# End of testinput4
|
||||||
|
|
Loading…
Reference in New Issue