Fix *MARK length check in UTF mode (it was checking characters, not code
units).
This commit is contained in:
parent
2821076981
commit
56084cc922
12
ChangeLog
12
ChangeLog
|
@ -40,6 +40,15 @@ parenthesis item, not the length of the whole group. A length of zero is now
|
|||
given only for a callout at the end of the pattern. Automatic callouts are no
|
||||
longer inserted before and after explicit callouts in the pattern.
|
||||
|
||||
Some bugs in the refactored code were subsequently fixed before release:
|
||||
|
||||
(a) An overall recursion such as (?0) inside a lookbehind assertion was not
|
||||
being diagnosed as an error.
|
||||
|
||||
(b) In utf mode, the length of a *MARK (or other verb) name was being checked
|
||||
in characters instead of code units, which could lead to bad code being
|
||||
compiled, leading to unpredictable behaviour.
|
||||
|
||||
4. Back references are now permitted in lookbehind assertions when there are
|
||||
no duplicated group numbers (that is, (?| has not been used), and, if the
|
||||
reference is by name, there is only one group of that name. The referenced
|
||||
|
@ -96,9 +105,6 @@ only when PCRE2_NO_START_OPTIMIZE was *not* set:
|
|||
16. The "offset" modifier in pcre2test was not being ignored (as documented)
|
||||
when the POSIX API was in use.
|
||||
|
||||
17. An overall recursion such as (?0) inside a lookbehind assertion was not
|
||||
being diagnosed as an error.
|
||||
|
||||
|
||||
Version 10.22 29-July-2016
|
||||
--------------------------
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2LIMITS 3 "29 September 2016" "PCRE2 10.23"
|
||||
.TH PCRE2LIMITS 3 "26 October 2016" "PCRE2 10.23"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH "SIZE AND OTHER LIMITATIONS"
|
||||
|
@ -55,7 +55,8 @@ The maximum length of name for a named subpattern is 32 code units, and the
|
|||
maximum number of named subpatterns is 10000.
|
||||
.P
|
||||
The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb
|
||||
is 255 for the 8-bit library and 65535 for the 16-bit and 32-bit libraries.
|
||||
is 255 code units for the 8-bit library and 65535 code units for the 16-bit and
|
||||
32-bit libraries.
|
||||
.P
|
||||
The maximum length of a string argument to a callout is the largest number a
|
||||
32-bit unsigned integer can hold.
|
||||
|
@ -75,6 +76,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 29 September 2016
|
||||
Last updated: 26 October 2016
|
||||
Copyright (c) 1997-2016 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -2161,6 +2161,7 @@ BOOL negate_class;
|
|||
BOOL okquantifier = FALSE;
|
||||
PCRE2_SPTR name;
|
||||
PCRE2_SPTR ptrend = cb->end_pattern;
|
||||
PCRE2_SPTR verbnamestart = NULL; /* Value avoids compiler warning */
|
||||
named_group *ng;
|
||||
nest_save *top_nest = NULL;
|
||||
nest_save *end_nests = (nest_save *)(cb->start_workspace + cb->workspace_size);
|
||||
|
@ -2248,8 +2249,10 @@ while (ptr < ptrend)
|
|||
|
||||
case CHAR_RIGHT_PARENTHESIS:
|
||||
inverbname = FALSE;
|
||||
/* This is the length in characters */
|
||||
verbnamelength = (PCRE2_SIZE)(parsed_pattern - verblengthptr - 1);
|
||||
if (verbnamelength > MAX_MARK)
|
||||
/* But the limit on the length is in code units */
|
||||
if (ptr - verbnamestart - 1 > MAX_MARK)
|
||||
{
|
||||
ptr--;
|
||||
errorcode = ERR76;
|
||||
|
@ -3149,6 +3152,7 @@ while (ptr < ptrend)
|
|||
*parsed_pattern++ = verbs[i].meta +
|
||||
((verbs[i].meta != META_MARK)? 0x00010000u:0);
|
||||
verblengthptr = parsed_pattern++;
|
||||
verbnamestart = ptr;
|
||||
inverbname = TRUE;
|
||||
}
|
||||
else /* No verb "name" argument */
|
||||
|
|
|
@ -454,4 +454,6 @@
|
|||
\= Expect no match
|
||||
123
|
||||
|
||||
/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/utf
|
||||
|
||||
# End of testinput10
|
||||
|
|
|
@ -258,4 +258,6 @@
|
|||
|
||||
/(*MARK:a\x{100}b)z/alt_verbnames
|
||||
|
||||
/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/
|
||||
|
||||
# End of testinput9
|
||||
|
|
|
@ -1564,4 +1564,7 @@ Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 1
|
|||
123
|
||||
No match
|
||||
|
||||
/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/utf
|
||||
Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
|
||||
|
||||
# End of testinput10
|
||||
|
|
|
@ -364,4 +364,7 @@ Failed: error 177 at offset 7: character code point value in \u.... sequence is
|
|||
/(*MARK:a\x{100}b)z/alt_verbnames
|
||||
Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large
|
||||
|
||||
/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/
|
||||
Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
|
||||
|
||||
# End of testinput9
|
||||
|
|
Loading…
Reference in New Issue