From 093412143d9309e885803d1375523a95686fe711 Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Fri, 9 Oct 2015 16:06:53 +0000 Subject: [PATCH] Fix compiler bug for classes such as [\W\p{Any}]. --- ChangeLog | 4 ++++ src/pcre2_compile.c | 30 ++++++++++++++++-------------- testdata/testinput5 | 9 +++++++++ testdata/testoutput5 | 25 +++++++++++++++++++++++++ 4 files changed, 54 insertions(+), 14 deletions(-) diff --git a/ChangeLog b/ChangeLog index bf43a47..8cd9fb3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -194,6 +194,10 @@ and the matching functions with NULL contexts can be tested. 55. Implemented PCRE2_SUBSTITUTE_EXTENDED. +56. In a character class such as [\W\p{Any}] where both a negative-type escape +("not a word character") and a property escape were present, the property +escape was being ignored. + Version 10.20 30-June-2015 -------------------------- diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index b581762..c2945e8 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -1645,11 +1645,11 @@ int i; /* If backslash is at the end of the pattern, it's an error. */ -if (ptr >= ptrend) +if (ptr >= ptrend) { *errorcodeptr = ERR1; return 0; - } + } GETCHARINCTEST(c, ptr); /* Get character value, increment pointer */ ptr--; /* Set pointer back to the last code unit */ @@ -1671,8 +1671,8 @@ else if ((i = escapes[c - ESCAPES_FIRST]) != 0) } } -/* Escapes that need further processing, including those that are unknown. -When called from pcre2_substitute(), only \c, \o, and \x are recognized (and \u +/* Escapes that need further processing, including those that are unknown. +When called from pcre2_substitute(), only \c, \o, and \x are recognized (and \u when BSUX is set). */ else @@ -1680,7 +1680,7 @@ else PCRE2_SPTR oldptr; BOOL braced, negated, overflow; unsigned int s; - + /* Filter calls from pcre2_substitute(). */ if (cb == NULL && c != CHAR_c && c != CHAR_o && c != CHAR_x && @@ -1688,7 +1688,7 @@ else { *errorcodeptr = ERR3; return 0; - } + } switch (c) { @@ -4645,19 +4645,21 @@ for (;; ptr++) zeroreqcu = reqcu; zeroreqcuflags = reqcuflags; - /* If there are characters with values > 255, we have to compile an - extended class, with its own opcode, unless there was a negated special - such as \S in the class, and PCRE2_UCP is not set, because in that case all + /* If there are characters with values > 255, or Unicode property settings + (\p or \P), we have to compile an extended class, with its own opcode, + unless there were no property settings and there was a negated special such + as \S in the class, and PCRE2_UCP is not set, because in that case all characters > 255 are in the class, so any that were explicitly given as - well can be ignored. If (when there are explicit characters > 255 that must - be listed) there are no characters < 256, we can omit the bitmap in the - actual compiled code. */ + well can be ignored. If (when there are explicit characters > 255 or + property settings that must be listed) there are no characters < 256, we + can omit the bitmap in the actual compiled code. */ #ifdef SUPPORT_WIDE_CHARS #ifdef SUPPORT_UNICODE - if (xclass && (!should_flip_negation || (options & PCRE2_UCP) != 0)) + if (xclass && (xclass_has_prop || !should_flip_negation || + (options & PCRE2_UCP) != 0)) #elif PCRE2_CODE_UNIT_WIDTH != 8 - if (xclass && !should_flip_negation) + if (xclass && (xclass_has_prop || !should_flip_negation)) #endif { *class_uchardata++ = XCL_END; /* Marks the end of extra data */ diff --git a/testdata/testinput5 b/testdata/testinput5 index d8e51d7..79f551e 100644 --- a/testdata/testinput5 +++ b/testdata/testinput5 @@ -1690,4 +1690,13 @@ /((?\d)|(?\p{L}))/g,substitute_extended,replace=<${digit:+digit; :not digit; }${letter:+letter:not a letter}> ab12cde +/[\W\p{Any}]/B + abc + 123 + +/[\W\pL]/B + abc +\= Expect no match + 123 + # End of testinput5 diff --git a/testdata/testoutput5 b/testdata/testoutput5 index 2b4e535..c2e7976 100644 --- a/testdata/testoutput5 +++ b/testdata/testoutput5 @@ -4043,4 +4043,29 @@ MK: a\x{12345}b\x{09}(d)c ab12cde 7: +/[\W\p{Any}]/B +------------------------------------------------------------------ + Bra + [\x00-/:-@[-^`{-\xff\p{Any}] + Ket + End +------------------------------------------------------------------ + abc + 0: a + 123 + 0: 1 + +/[\W\pL]/B +------------------------------------------------------------------ + Bra + [\x00-/:-@[-^`{-\xff\p{L}] + Ket + End +------------------------------------------------------------------ + abc + 0: a +\= Expect no match + 123 +No match + # End of testinput5