Fix single-character POSIX class bug in UCP mode.

This commit is contained in:
Philip.Hazel 2015-11-17 17:13:43 +00:00
parent c0d0f2f65e
commit 6650a2fd9a
6 changed files with 74 additions and 11 deletions

View File

@ -314,6 +314,10 @@ with JIT (possibly caused by SSE2?).
94. Support offset_limit in JIT. 94. Support offset_limit in JIT.
95. A sequence such as [[:punct:]b] that is, a POSIX character class followed
by a single ASCII character in a class item, was incorrectly compiled in UCP
mode. The POSIX class got lost, but only if the single character followed it.
Version 10.20 30-June-2015 Version 10.20 30-June-2015
-------------------------- --------------------------

View File

@ -4729,16 +4729,20 @@ for (;; ptr++)
CLASS_SINGLE_CHARACTER: CLASS_SINGLE_CHARACTER:
if (class_one_char < 2) class_one_char++; if (class_one_char < 2) class_one_char++;
/* If class_one_char is 1, we have the first single character in the /* If class_one_char is 1 and xclass_has_prop is false, we have the first
class, and there have been no prior ranges, or XCLASS items generated by single character in the class, and there have been no prior ranges, or
escapes. If this is the final character in the class, we can optimize by XCLASS items generated by escapes. If this is the final character in the
turning the item into a 1-character OP_CHAR[I] if it's positive, or class, we can optimize by turning the item into a 1-character OP_CHAR[I]
OP_NOT[I] if it's negative. In the positive case, it can cause firstcu if it's positive, or OP_NOT[I] if it's negative. In the positive case, it
to be set. Otherwise, there can be no first char if this item is first, can cause firstcu to be set. Otherwise, there can be no first char if
whatever repeat count may follow. In the case of reqcu, save the this item is first, whatever repeat count may follow. In the case of
previous value for reinstating. */ reqcu, save the previous value for reinstating. */
if (!inescq && class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) if (!inescq &&
#ifdef SUPPORT_UNICODE
!xclass_has_prop &&
#endif
class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
{ {
ptr++; ptr++;
zeroreqcu = reqcu; zeroreqcu = reqcu;

4
testdata/testinput2 vendored
View File

@ -4685,4 +4685,8 @@ a)"xI
"(*ANYCRLF)(?m)^(.*[^0-9\r\n].*|)$"g,replace=NaN "(*ANYCRLF)(?m)^(.*[^0-9\r\n].*|)$"g,replace=NaN
15\r\nfoo\r\n20\r\nbar\r\nbaz\r\n\r\n20 15\r\nfoo\r\n20\r\nbar\r\nbaz\r\n\r\n20
/a[[:punct:]b]/bincode
/a[b[:punct:]]/bincode
# End of testinput2 # End of testinput2

6
testdata/testinput5 vendored
View File

@ -1691,4 +1691,10 @@
/abc\Cdef/info,utf /abc\Cdef/info,utf
/a[[:punct:]b]/ucp,bincode
/a[[:punct:]b]/utf,ucp,bincode
/a[b[:punct:]]/utf,ucp,bincode
# End of testinput5 # End of testinput5

18
testdata/testoutput2 vendored
View File

@ -14888,4 +14888,22 @@ Subject length lower bound = 0
15\r\nfoo\r\n20\r\nbar\r\nbaz\r\n\r\n20 15\r\nfoo\r\n20\r\nbar\r\nbaz\r\n\r\n20
4: 15\x0d\x0aNaN\x0d\x0a20\x0d\x0aNaN\x0d\x0aNaN\x0d\x0aNaN\x0d\x0a20 4: 15\x0d\x0aNaN\x0d\x0a20\x0d\x0aNaN\x0d\x0aNaN\x0d\x0aNaN\x0d\x0a20
/a[[:punct:]b]/bincode
------------------------------------------------------------------
Bra
a
[!-/:-@[-`b{-~]
Ket
End
------------------------------------------------------------------
/a[b[:punct:]]/bincode
------------------------------------------------------------------
Bra
a
[!-/:-@[-`b{-~]
Ket
End
------------------------------------------------------------------
# End of testinput2 # End of testinput2

27
testdata/testoutput5 vendored
View File

@ -4070,4 +4070,31 @@ First code unit = 'a'
Last code unit = 'f' Last code unit = 'f'
Subject length lower bound = 0 Subject length lower bound = 0
/a[[:punct:]b]/ucp,bincode
------------------------------------------------------------------
Bra
a
[b[:punct:]]
Ket
End
------------------------------------------------------------------
/a[[:punct:]b]/utf,ucp,bincode
------------------------------------------------------------------
Bra
a
[b[:punct:]]
Ket
End
------------------------------------------------------------------
/a[b[:punct:]]/utf,ucp,bincode
------------------------------------------------------------------
Bra
a
[b[:punct:]]
Ket
End
------------------------------------------------------------------
# End of testinput5 # End of testinput5