Fail hyphen after POSIX character class.
This commit is contained in:
parent
8933d999d8
commit
fdf7946ee0
|
@ -38,6 +38,10 @@ some minor bugs and Perl incompatibilities were fixed, including:
|
||||||
zero is now marked "no access". This catches bugs that would otherwise
|
zero is now marked "no access". This catches bugs that would otherwise
|
||||||
show up only with non-zero-terminated patterns.
|
show up only with non-zero-terminated patterns.
|
||||||
|
|
||||||
|
(g) A hyphen appearing immediately after a POSIX character class (for example
|
||||||
|
/[[:ascii:]-z]/) now generates an error. Perl does accept this as a
|
||||||
|
literal, but gives a warning, so it seems best to fail it in PCRE.
|
||||||
|
|
||||||
One effect of the refactoring is that some error numbers and messages have
|
One effect of the refactoring is that some error numbers and messages have
|
||||||
changed, and the pattern offset given for compiling errors is not always the
|
changed, and the pattern offset given for compiling errors is not always the
|
||||||
right-most character that has been read. In particular, for a variable-length
|
right-most character that has been read. In particular, for a variable-length
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2PATTERN 3 "23 December 2016" "PCRE2 10.23"
|
.TH PCRE2PATTERN 3 "27 December 2016" "PCRE2 10.23"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.SH "PCRE2 REGULAR EXPRESSION DETAILS"
|
.SH "PCRE2 REGULAR EXPRESSION DETAILS"
|
||||||
|
@ -1352,10 +1352,10 @@ indicating a range, typically as the first or last character in the class,
|
||||||
or immediately after a range. For example, [b-d-z] matches letters in the range
|
or immediately after a range. For example, [b-d-z] matches letters in the range
|
||||||
b to d, a hyphen character, or z.
|
b to d, a hyphen character, or z.
|
||||||
.P
|
.P
|
||||||
Perl treats a hyphen as a literal if it appears before a POSIX class (see
|
Perl treats a hyphen as a literal if it appears before or after a POSIX class
|
||||||
below) or a character type escape such as as \ed, but gives a warning in its
|
(see below) or a character type escape such as as \ed, but gives a warning in
|
||||||
warning mode, as this is most likely a user error. As PCRE2 has no facility for
|
its warning mode, as this is most likely a user error. As PCRE2 has no facility
|
||||||
warning, an error is given in these cases.
|
for warning, an error is given in these cases.
|
||||||
.P
|
.P
|
||||||
It is not possible to have the literal character "]" as the end character of a
|
It is not possible to have the literal character "]" as the end character of a
|
||||||
range. A pattern such as [W-]46] is interpreted as a class of two characters
|
range. A pattern such as [W-]46] is interpreted as a class of two characters
|
||||||
|
@ -3482,6 +3482,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 23 December 2016
|
Last updated: 27 December 2016
|
||||||
Copyright (c) 1997-2016 University of Cambridge.
|
Copyright (c) 1997-2016 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -2993,6 +2993,17 @@ while (ptr < ptrend)
|
||||||
}
|
}
|
||||||
ptr = tempptr + 2;
|
ptr = tempptr + 2;
|
||||||
|
|
||||||
|
/* Perl treats a hyphen after a POSIX class as a literal, not the
|
||||||
|
start of a range. However, it gives a warning in its warning mode. PCRE
|
||||||
|
does not have a warning mode, so we give an error, because this is
|
||||||
|
likely an error on the user's part. */
|
||||||
|
|
||||||
|
if (ptr < ptrend && *ptr == CHAR_MINUS)
|
||||||
|
{
|
||||||
|
errorcode = ERR50;
|
||||||
|
goto FAILED;
|
||||||
|
}
|
||||||
|
|
||||||
/* When PCRE2_UCP is set, some of the POSIX classes are converted to
|
/* When PCRE2_UCP is set, some of the POSIX classes are converted to
|
||||||
use Unicode properties \p or \P or, in one case, \h or \H. The
|
use Unicode properties \p or \P or, in one case, \h or \H. The
|
||||||
substitutes table has two values per class, containing the type and
|
substitutes table has two values per class, containing the type and
|
||||||
|
@ -5003,7 +5014,7 @@ for (;; pptr++)
|
||||||
{
|
{
|
||||||
#ifdef DEBUG_SHOW_PARSED
|
#ifdef DEBUG_SHOW_PARSED
|
||||||
fprintf(stderr, "** Unrecognized parsed pattern item 0x%.8x "
|
fprintf(stderr, "** Unrecognized parsed pattern item 0x%.8x "
|
||||||
"in character class", meta);
|
"in character class\n", meta);
|
||||||
#endif
|
#endif
|
||||||
*errorcodeptr = ERR89; /* Internal error - unrecognized. */
|
*errorcodeptr = ERR89; /* Internal error - unrecognized. */
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -4950,4 +4950,6 @@ a)"xI
|
||||||
/.+(?(?C'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'))?!XXXX.=X/
|
/.+(?(?C'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'))?!XXXX.=X/
|
||||||
.+(?(?C'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'))?!XXXX.=X
|
.+(?(?C'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'))?!XXXX.=X
|
||||||
|
|
||||||
|
/[:[:alnum:]-[[a:lnum:]+/
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
|
@ -15431,6 +15431,9 @@ Subject length lower bound = 0
|
||||||
Failed: error 128 at offset 63: assertion expected after (?( or (?(?C)
|
Failed: error 128 at offset 63: assertion expected after (?( or (?(?C)
|
||||||
.+(?(?C'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'))?!XXXX.=X
|
.+(?(?C'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'))?!XXXX.=X
|
||||||
|
|
||||||
|
/[:[:alnum:]-[[a:lnum:]+/
|
||||||
|
Failed: error 150 at offset 11: invalid range in character class
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
Error -63: PCRE2_ERROR_BADDATA (unknown error number)
|
Error -63: PCRE2_ERROR_BADDATA (unknown error number)
|
||||||
Error -62: bad serialized data
|
Error -62: bad serialized data
|
||||||
|
|
Loading…
Reference in New Issue