From c4b8531a8f2649271fe0f880ca87a643b3c416cd Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Tue, 17 Nov 2015 17:59:35 +0000 Subject: [PATCH] Fix [:punct:] bug in UCP mode (matching chars in the range 128-255) --- ChangeLog | 3 +++ src/pcre2_xclass.c | 2 +- testdata/testinput4 | 3 +++ testdata/testoutput4 | 4 ++++ 4 files changed, 11 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 4e3164f..31e8aa4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -318,6 +318,9 @@ with JIT (possibly caused by SSE2?). by a single ASCII character in a class item, was incorrectly compiled in UCP mode. The POSIX class got lost, but only if the single character followed it. +96. [:punct:] in UCP mode was matching some characters in the range 128-255 +that should not have been matched. + Version 10.20 30-June-2015 -------------------------- diff --git a/src/pcre2_xclass.c b/src/pcre2_xclass.c index 2ea89c4..6a3554b 100644 --- a/src/pcre2_xclass.c +++ b/src/pcre2_xclass.c @@ -247,7 +247,7 @@ while ((t = *data++) != XCL_END) case PT_PXPUNCT: if ((PRIV(ucp_gentype)[prop->chartype] == ucp_P || - (c < 256 && PRIV(ucp_gentype)[prop->chartype] == ucp_S)) == isprop) + (c < 128 && PRIV(ucp_gentype)[prop->chartype] == ucp_S)) == isprop) return !negated; break; diff --git a/testdata/testinput4 b/testdata/testinput4 index 22aead5..dfaa1c0 100644 --- a/testdata/testinput4 +++ b/testdata/testinput4 @@ -2233,4 +2233,7 @@ /[^\p{Any}]*+x/utf x +/[[:punct:]]/utf,ucp + \x{b4} + # End of testinput4 diff --git a/testdata/testoutput4 b/testdata/testoutput4 index 743623a..0814646 100644 --- a/testdata/testoutput4 +++ b/testdata/testoutput4 @@ -3620,4 +3620,8 @@ No match x 0: x +/[[:punct:]]/utf,ucp + \x{b4} +No match + # End of testinput4