diff --git a/ChangeLog b/ChangeLog index 2472fa0..4e3164f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -314,6 +314,10 @@ with JIT (possibly caused by SSE2?). 94. Support offset_limit in JIT. +95. A sequence such as [[:punct:]b] that is, a POSIX character class followed +by a single ASCII character in a class item, was incorrectly compiled in UCP +mode. The POSIX class got lost, but only if the single character followed it. + Version 10.20 30-June-2015 -------------------------- diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index c6e84ce..ff7bebd 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -1352,7 +1352,7 @@ if ((cb->external_flags & PCRE2_DUPCAPUSED) == 0 && /* A large and/or complex regex can take too long to process. We have to assume it can match an empty string. This can happen more often when (?| groups are -present in the pattern and the caching is disabled. Setting the cap at 1100 +present in the pattern and the caching is disabled. Setting the cap at 1100 allows the test for more than 1023 capturing patterns to work. */ if ((*countptr)++ > 1100) return CBE_TOOCOMPLICATED; @@ -4729,16 +4729,20 @@ for (;; ptr++) CLASS_SINGLE_CHARACTER: if (class_one_char < 2) class_one_char++; - /* If class_one_char is 1, we have the first single character in the - class, and there have been no prior ranges, or XCLASS items generated by - escapes. If this is the final character in the class, we can optimize by - turning the item into a 1-character OP_CHAR[I] if it's positive, or - OP_NOT[I] if it's negative. In the positive case, it can cause firstcu - to be set. Otherwise, there can be no first char if this item is first, - whatever repeat count may follow. In the case of reqcu, save the - previous value for reinstating. */ + /* If class_one_char is 1 and xclass_has_prop is false, we have the first + single character in the class, and there have been no prior ranges, or + XCLASS items generated by escapes. If this is the final character in the + class, we can optimize by turning the item into a 1-character OP_CHAR[I] + if it's positive, or OP_NOT[I] if it's negative. In the positive case, it + can cause firstcu to be set. Otherwise, there can be no first char if + this item is first, whatever repeat count may follow. In the case of + reqcu, save the previous value for reinstating. */ - if (!inescq && class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) + if (!inescq && +#ifdef SUPPORT_UNICODE + !xclass_has_prop && +#endif + class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) { ptr++; zeroreqcu = reqcu; @@ -7287,7 +7291,7 @@ for (;; ptr++) else { - if (escape == ESC_C) cb->external_flags |= PCRE2_HASBKC; /* Record */ + if (escape == ESC_C) cb->external_flags |= PCRE2_HASBKC; /* Record */ if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) && cb->max_lookbehind == 0) cb->max_lookbehind = 1; diff --git a/testdata/testinput2 b/testdata/testinput2 index a6cfd85..dfd1aa8 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4685,4 +4685,8 @@ a)"xI "(*ANYCRLF)(?m)^(.*[^0-9\r\n].*|)$"g,replace=NaN 15\r\nfoo\r\n20\r\nbar\r\nbaz\r\n\r\n20 +/a[[:punct:]b]/bincode + +/a[b[:punct:]]/bincode + # End of testinput2 diff --git a/testdata/testinput5 b/testdata/testinput5 index a288459..2432d19 100644 --- a/testdata/testinput5 +++ b/testdata/testinput5 @@ -1691,4 +1691,10 @@ /abc\Cdef/info,utf +/a[[:punct:]b]/ucp,bincode + +/a[[:punct:]b]/utf,ucp,bincode + +/a[b[:punct:]]/utf,ucp,bincode + # End of testinput5 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 9408756..e5350d5 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -14888,4 +14888,22 @@ Subject length lower bound = 0 15\r\nfoo\r\n20\r\nbar\r\nbaz\r\n\r\n20 4: 15\x0d\x0aNaN\x0d\x0a20\x0d\x0aNaN\x0d\x0aNaN\x0d\x0aNaN\x0d\x0a20 +/a[[:punct:]b]/bincode +------------------------------------------------------------------ + Bra + a + [!-/:-@[-`b{-~] + Ket + End +------------------------------------------------------------------ + +/a[b[:punct:]]/bincode +------------------------------------------------------------------ + Bra + a + [!-/:-@[-`b{-~] + Ket + End +------------------------------------------------------------------ + # End of testinput2 diff --git a/testdata/testoutput5 b/testdata/testoutput5 index ef93100..7eb9df3 100644 --- a/testdata/testoutput5 +++ b/testdata/testoutput5 @@ -4070,4 +4070,31 @@ First code unit = 'a' Last code unit = 'f' Subject length lower bound = 0 +/a[[:punct:]b]/ucp,bincode +------------------------------------------------------------------ + Bra + a + [b[:punct:]] + Ket + End +------------------------------------------------------------------ + +/a[[:punct:]b]/utf,ucp,bincode +------------------------------------------------------------------ + Bra + a + [b[:punct:]] + Ket + End +------------------------------------------------------------------ + +/a[b[:punct:]]/utf,ucp,bincode +------------------------------------------------------------------ + Bra + a + [b[:punct:]] + Ket + End +------------------------------------------------------------------ + # End of testinput5