Fix sometimes failing caseless non-ASCII matching in assertion.

This commit is contained in:
Philip.Hazel 2019-11-16 17:30:07 +00:00
parent 6f41a5a01a
commit 3c869816ac
4 changed files with 38 additions and 0 deletions

View File

@ -177,6 +177,11 @@ sanitizer complaint (regexec is supposed to be thread safe).
37. Add NEON vectorization to JIT to speed up matching of first character and 37. Add NEON vectorization to JIT to speed up matching of first character and
pairs of characters on ARM64 CPUs. pairs of characters on ARM64 CPUs.
38. If a non-ASCII character was the first in a starting assertion in a
caseless match, the "first code unit" optimization did not get the casing
right, and the assertion failed to match a character in the other case if it
did not start with the same code unit.
Version 10.33 16-April-2019 Version 10.33 16-April-2019
--------------------------- ---------------------------

View File

@ -8741,6 +8741,19 @@ do {
case OP_MINPLUSI: case OP_MINPLUSI:
case OP_POSPLUSI: case OP_POSPLUSI:
if (inassert == 0) return 0; if (inassert == 0) return 0;
/* If the character is more than one code unit long, we cannot set its
first code unit when matching caselessly. Later scanning may pick up
multiple code units. */
#ifdef SUPPORT_UNICODE
#if PCRE2_CODE_UNIT_WIDTH == 8
if (scode[1] >= 0x80) return 0;
#elif PCRE2_CODE_UNIT_WIDTH == 16
if (scode[1] >= 0xd800 && scode[1] <= 0xdfff) return 0;
#endif
#endif
if (cflags < 0) { c = scode[1]; cflags = REQ_CASELESS; } if (cflags < 0) { c = scode[1]; cflags = REQ_CASELESS; }
else if (c != scode[1]) return 0; else if (c != scode[1]) return 0;
break; break;

8
testdata/testinput4 vendored
View File

@ -2483,4 +2483,12 @@
/\X*/ /\X*/
\xF3aaa\xE4\xEA\xEB\xFEa \xF3aaa\xE4\xEA\xEB\xFEa
/Я/i,utf
\x{42f}
\x{44f}
/(?=Я)/i,utf
\x{42f}
\x{44f}
# End of testinput4 # End of testinput4

12
testdata/testoutput4 vendored
View File

@ -4016,4 +4016,16 @@ No match
\xF3aaa\xE4\xEA\xEB\xFEa \xF3aaa\xE4\xEA\xEB\xFEa
0: \xf3aaa\xe4\xea\xeb\xfea 0: \xf3aaa\xe4\xea\xeb\xfea
/Я/i,utf
\x{42f}
0: \x{42f}
\x{44f}
0: \x{44f}
/(?=Я)/i,utf
\x{42f}
0:
\x{44f}
0:
# End of testinput4 # End of testinput4