Fix single-character POSIX class bug in UCP mode.
This commit is contained in:
parent
c0d0f2f65e
commit
6650a2fd9a
|
@ -314,6 +314,10 @@ with JIT (possibly caused by SSE2?).
|
|||
|
||||
94. Support offset_limit in JIT.
|
||||
|
||||
95. A sequence such as [[:punct:]b] that is, a POSIX character class followed
|
||||
by a single ASCII character in a class item, was incorrectly compiled in UCP
|
||||
mode. The POSIX class got lost, but only if the single character followed it.
|
||||
|
||||
|
||||
Version 10.20 30-June-2015
|
||||
--------------------------
|
||||
|
|
|
@ -1352,7 +1352,7 @@ if ((cb->external_flags & PCRE2_DUPCAPUSED) == 0 &&
|
|||
|
||||
/* A large and/or complex regex can take too long to process. We have to assume
|
||||
it can match an empty string. This can happen more often when (?| groups are
|
||||
present in the pattern and the caching is disabled. Setting the cap at 1100
|
||||
present in the pattern and the caching is disabled. Setting the cap at 1100
|
||||
allows the test for more than 1023 capturing patterns to work. */
|
||||
|
||||
if ((*countptr)++ > 1100) return CBE_TOOCOMPLICATED;
|
||||
|
@ -4729,16 +4729,20 @@ for (;; ptr++)
|
|||
CLASS_SINGLE_CHARACTER:
|
||||
if (class_one_char < 2) class_one_char++;
|
||||
|
||||
/* If class_one_char is 1, we have the first single character in the
|
||||
class, and there have been no prior ranges, or XCLASS items generated by
|
||||
escapes. If this is the final character in the class, we can optimize by
|
||||
turning the item into a 1-character OP_CHAR[I] if it's positive, or
|
||||
OP_NOT[I] if it's negative. In the positive case, it can cause firstcu
|
||||
to be set. Otherwise, there can be no first char if this item is first,
|
||||
whatever repeat count may follow. In the case of reqcu, save the
|
||||
previous value for reinstating. */
|
||||
/* If class_one_char is 1 and xclass_has_prop is false, we have the first
|
||||
single character in the class, and there have been no prior ranges, or
|
||||
XCLASS items generated by escapes. If this is the final character in the
|
||||
class, we can optimize by turning the item into a 1-character OP_CHAR[I]
|
||||
if it's positive, or OP_NOT[I] if it's negative. In the positive case, it
|
||||
can cause firstcu to be set. Otherwise, there can be no first char if
|
||||
this item is first, whatever repeat count may follow. In the case of
|
||||
reqcu, save the previous value for reinstating. */
|
||||
|
||||
if (!inescq && class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
|
||||
if (!inescq &&
|
||||
#ifdef SUPPORT_UNICODE
|
||||
!xclass_has_prop &&
|
||||
#endif
|
||||
class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
|
||||
{
|
||||
ptr++;
|
||||
zeroreqcu = reqcu;
|
||||
|
@ -7287,7 +7291,7 @@ for (;; ptr++)
|
|||
|
||||
else
|
||||
{
|
||||
if (escape == ESC_C) cb->external_flags |= PCRE2_HASBKC; /* Record */
|
||||
if (escape == ESC_C) cb->external_flags |= PCRE2_HASBKC; /* Record */
|
||||
if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) &&
|
||||
cb->max_lookbehind == 0)
|
||||
cb->max_lookbehind = 1;
|
||||
|
|
|
@ -4685,4 +4685,8 @@ a)"xI
|
|||
"(*ANYCRLF)(?m)^(.*[^0-9\r\n].*|)$"g,replace=NaN
|
||||
15\r\nfoo\r\n20\r\nbar\r\nbaz\r\n\r\n20
|
||||
|
||||
/a[[:punct:]b]/bincode
|
||||
|
||||
/a[b[:punct:]]/bincode
|
||||
|
||||
# End of testinput2
|
||||
|
|
|
@ -1691,4 +1691,10 @@
|
|||
|
||||
/abc\Cdef/info,utf
|
||||
|
||||
/a[[:punct:]b]/ucp,bincode
|
||||
|
||||
/a[[:punct:]b]/utf,ucp,bincode
|
||||
|
||||
/a[b[:punct:]]/utf,ucp,bincode
|
||||
|
||||
# End of testinput5
|
||||
|
|
|
@ -14888,4 +14888,22 @@ Subject length lower bound = 0
|
|||
15\r\nfoo\r\n20\r\nbar\r\nbaz\r\n\r\n20
|
||||
4: 15\x0d\x0aNaN\x0d\x0a20\x0d\x0aNaN\x0d\x0aNaN\x0d\x0aNaN\x0d\x0a20
|
||||
|
||||
/a[[:punct:]b]/bincode
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
a
|
||||
[!-/:-@[-`b{-~]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/a[b[:punct:]]/bincode
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
a
|
||||
[!-/:-@[-`b{-~]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
# End of testinput2
|
||||
|
|
|
@ -4070,4 +4070,31 @@ First code unit = 'a'
|
|||
Last code unit = 'f'
|
||||
Subject length lower bound = 0
|
||||
|
||||
/a[[:punct:]b]/ucp,bincode
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
a
|
||||
[b[:punct:]]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/a[[:punct:]b]/utf,ucp,bincode
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
a
|
||||
[b[:punct:]]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/a[b[:punct:]]/utf,ucp,bincode
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
a
|
||||
[b[:punct:]]
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
# End of testinput5
|
||||
|
|
Loading…
Reference in New Issue