diff --git a/ChangeLog b/ChangeLog index d276feb..151141f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -37,6 +37,10 @@ some minor bugs and Perl incompatibilities were fixed, including: (f) When testing zero-terminated patterns under valgrind, the terminating zero is now marked "no access". This catches bugs that would otherwise show up only with non-zero-terminated patterns. + + (g) A hyphen appearing immediately after a POSIX character class (for example + /[[:ascii:]-z]/) now generates an error. Perl does accept this as a + literal, but gives a warning, so it seems best to fail it in PCRE. One effect of the refactoring is that some error numbers and messages have changed, and the pattern offset given for compiling errors is not always the diff --git a/doc/pcre2pattern.3 b/doc/pcre2pattern.3 index 33e5698..ec134fd 100644 --- a/doc/pcre2pattern.3 +++ b/doc/pcre2pattern.3 @@ -1,4 +1,4 @@ -.TH PCRE2PATTERN 3 "23 December 2016" "PCRE2 10.23" +.TH PCRE2PATTERN 3 "27 December 2016" "PCRE2 10.23" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH "PCRE2 REGULAR EXPRESSION DETAILS" @@ -1352,10 +1352,10 @@ indicating a range, typically as the first or last character in the class, or immediately after a range. For example, [b-d-z] matches letters in the range b to d, a hyphen character, or z. .P -Perl treats a hyphen as a literal if it appears before a POSIX class (see -below) or a character type escape such as as \ed, but gives a warning in its -warning mode, as this is most likely a user error. As PCRE2 has no facility for -warning, an error is given in these cases. +Perl treats a hyphen as a literal if it appears before or after a POSIX class +(see below) or a character type escape such as as \ed, but gives a warning in +its warning mode, as this is most likely a user error. As PCRE2 has no facility +for warning, an error is given in these cases. .P It is not possible to have the literal character "]" as the end character of a range. A pattern such as [W-]46] is interpreted as a class of two characters @@ -3482,6 +3482,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 23 December 2016 +Last updated: 27 December 2016 Copyright (c) 1997-2016 University of Cambridge. .fi diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index fce226b..046faff 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -2992,6 +2992,17 @@ while (ptr < ptrend) goto FAILED; } ptr = tempptr + 2; + + /* Perl treats a hyphen after a POSIX class as a literal, not the + start of a range. However, it gives a warning in its warning mode. PCRE + does not have a warning mode, so we give an error, because this is + likely an error on the user's part. */ + + if (ptr < ptrend && *ptr == CHAR_MINUS) + { + errorcode = ERR50; + goto FAILED; + } /* When PCRE2_UCP is set, some of the POSIX classes are converted to use Unicode properties \p or \P or, in one case, \h or \H. The @@ -5003,7 +5014,7 @@ for (;; pptr++) { #ifdef DEBUG_SHOW_PARSED fprintf(stderr, "** Unrecognized parsed pattern item 0x%.8x " - "in character class", meta); + "in character class\n", meta); #endif *errorcodeptr = ERR89; /* Internal error - unrecognized. */ return 0; diff --git a/testdata/testinput2 b/testdata/testinput2 index 01594c3..f8a0545 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4950,4 +4950,6 @@ a)"xI /.+(?(?C'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'))?!XXXX.=X/ .+(?(?C'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'))?!XXXX.=X +/[:[:alnum:]-[[a:lnum:]+/ + # End of testinput2 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index f062b44..9f8ccb0 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -15431,6 +15431,9 @@ Subject length lower bound = 0 Failed: error 128 at offset 63: assertion expected after (?( or (?(?C) .+(?(?C'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'))?!XXXX.=X +/[:[:alnum:]-[[a:lnum:]+/ +Failed: error 150 at offset 11: invalid range in character class + # End of testinput2 Error -63: PCRE2_ERROR_BADDATA (unknown error number) Error -62: bad serialized data