Fix single-character POSIX class bug in UCP mode.

This commit is contained in:
Philip.Hazel 2015-11-17 17:13:43 +00:00
parent c0d0f2f65e
commit 6650a2fd9a
6 changed files with 74 additions and 11 deletions

View File

@ -314,6 +314,10 @@ with JIT (possibly caused by SSE2?).
94. Support offset_limit in JIT. 94. Support offset_limit in JIT.
95. A sequence such as [[:punct:]b] that is, a POSIX character class followed
by a single ASCII character in a class item, was incorrectly compiled in UCP
mode. The POSIX class got lost, but only if the single character followed it.
Version 10.20 30-June-2015 Version 10.20 30-June-2015
-------------------------- --------------------------

View File

@ -1352,7 +1352,7 @@ if ((cb->external_flags & PCRE2_DUPCAPUSED) == 0 &&
/* A large and/or complex regex can take too long to process. We have to assume /* A large and/or complex regex can take too long to process. We have to assume
it can match an empty string. This can happen more often when (?| groups are it can match an empty string. This can happen more often when (?| groups are
present in the pattern and the caching is disabled. Setting the cap at 1100 present in the pattern and the caching is disabled. Setting the cap at 1100
allows the test for more than 1023 capturing patterns to work. */ allows the test for more than 1023 capturing patterns to work. */
if ((*countptr)++ > 1100) return CBE_TOOCOMPLICATED; if ((*countptr)++ > 1100) return CBE_TOOCOMPLICATED;
@ -4729,16 +4729,20 @@ for (;; ptr++)
CLASS_SINGLE_CHARACTER: CLASS_SINGLE_CHARACTER:
if (class_one_char < 2) class_one_char++; if (class_one_char < 2) class_one_char++;
/* If class_one_char is 1, we have the first single character in the /* If class_one_char is 1 and xclass_has_prop is false, we have the first
class, and there have been no prior ranges, or XCLASS items generated by single character in the class, and there have been no prior ranges, or
escapes. If this is the final character in the class, we can optimize by XCLASS items generated by escapes. If this is the final character in the
turning the item into a 1-character OP_CHAR[I] if it's positive, or class, we can optimize by turning the item into a 1-character OP_CHAR[I]
OP_NOT[I] if it's negative. In the positive case, it can cause firstcu if it's positive, or OP_NOT[I] if it's negative. In the positive case, it
to be set. Otherwise, there can be no first char if this item is first, can cause firstcu to be set. Otherwise, there can be no first char if
whatever repeat count may follow. In the case of reqcu, save the this item is first, whatever repeat count may follow. In the case of
previous value for reinstating. */ reqcu, save the previous value for reinstating. */
if (!inescq && class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) if (!inescq &&
#ifdef SUPPORT_UNICODE
!xclass_has_prop &&
#endif
class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
{ {
ptr++; ptr++;
zeroreqcu = reqcu; zeroreqcu = reqcu;
@ -7287,7 +7291,7 @@ for (;; ptr++)
else else
{ {
if (escape == ESC_C) cb->external_flags |= PCRE2_HASBKC; /* Record */ if (escape == ESC_C) cb->external_flags |= PCRE2_HASBKC; /* Record */
if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) && if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) &&
cb->max_lookbehind == 0) cb->max_lookbehind == 0)
cb->max_lookbehind = 1; cb->max_lookbehind = 1;

4
testdata/testinput2 vendored
View File

@ -4685,4 +4685,8 @@ a)"xI
"(*ANYCRLF)(?m)^(.*[^0-9\r\n].*|)$"g,replace=NaN "(*ANYCRLF)(?m)^(.*[^0-9\r\n].*|)$"g,replace=NaN
15\r\nfoo\r\n20\r\nbar\r\nbaz\r\n\r\n20 15\r\nfoo\r\n20\r\nbar\r\nbaz\r\n\r\n20
/a[[:punct:]b]/bincode
/a[b[:punct:]]/bincode
# End of testinput2 # End of testinput2

6
testdata/testinput5 vendored
View File

@ -1691,4 +1691,10 @@
/abc\Cdef/info,utf /abc\Cdef/info,utf
/a[[:punct:]b]/ucp,bincode
/a[[:punct:]b]/utf,ucp,bincode
/a[b[:punct:]]/utf,ucp,bincode
# End of testinput5 # End of testinput5

18
testdata/testoutput2 vendored
View File

@ -14888,4 +14888,22 @@ Subject length lower bound = 0
15\r\nfoo\r\n20\r\nbar\r\nbaz\r\n\r\n20 15\r\nfoo\r\n20\r\nbar\r\nbaz\r\n\r\n20
4: 15\x0d\x0aNaN\x0d\x0a20\x0d\x0aNaN\x0d\x0aNaN\x0d\x0aNaN\x0d\x0a20 4: 15\x0d\x0aNaN\x0d\x0a20\x0d\x0aNaN\x0d\x0aNaN\x0d\x0aNaN\x0d\x0a20
/a[[:punct:]b]/bincode
------------------------------------------------------------------
Bra
a
[!-/:-@[-`b{-~]
Ket
End
------------------------------------------------------------------
/a[b[:punct:]]/bincode
------------------------------------------------------------------
Bra
a
[!-/:-@[-`b{-~]
Ket
End
------------------------------------------------------------------
# End of testinput2 # End of testinput2

27
testdata/testoutput5 vendored
View File

@ -4070,4 +4070,31 @@ First code unit = 'a'
Last code unit = 'f' Last code unit = 'f'
Subject length lower bound = 0 Subject length lower bound = 0
/a[[:punct:]b]/ucp,bincode
------------------------------------------------------------------
Bra
a
[b[:punct:]]
Ket
End
------------------------------------------------------------------
/a[[:punct:]b]/utf,ucp,bincode
------------------------------------------------------------------
Bra
a
[b[:punct:]]
Ket
End
------------------------------------------------------------------
/a[b[:punct:]]/utf,ucp,bincode
------------------------------------------------------------------
Bra
a
[b[:punct:]]
Ket
End
------------------------------------------------------------------
# End of testinput5 # End of testinput5