Fix *MARK length check in UTF mode (it was checking characters, not code
units).
This commit is contained in:
parent
2821076981
commit
56084cc922
12
ChangeLog
12
ChangeLog
|
@ -40,6 +40,15 @@ parenthesis item, not the length of the whole group. A length of zero is now
|
||||||
given only for a callout at the end of the pattern. Automatic callouts are no
|
given only for a callout at the end of the pattern. Automatic callouts are no
|
||||||
longer inserted before and after explicit callouts in the pattern.
|
longer inserted before and after explicit callouts in the pattern.
|
||||||
|
|
||||||
|
Some bugs in the refactored code were subsequently fixed before release:
|
||||||
|
|
||||||
|
(a) An overall recursion such as (?0) inside a lookbehind assertion was not
|
||||||
|
being diagnosed as an error.
|
||||||
|
|
||||||
|
(b) In utf mode, the length of a *MARK (or other verb) name was being checked
|
||||||
|
in characters instead of code units, which could lead to bad code being
|
||||||
|
compiled, leading to unpredictable behaviour.
|
||||||
|
|
||||||
4. Back references are now permitted in lookbehind assertions when there are
|
4. Back references are now permitted in lookbehind assertions when there are
|
||||||
no duplicated group numbers (that is, (?| has not been used), and, if the
|
no duplicated group numbers (that is, (?| has not been used), and, if the
|
||||||
reference is by name, there is only one group of that name. The referenced
|
reference is by name, there is only one group of that name. The referenced
|
||||||
|
@ -96,9 +105,6 @@ only when PCRE2_NO_START_OPTIMIZE was *not* set:
|
||||||
16. The "offset" modifier in pcre2test was not being ignored (as documented)
|
16. The "offset" modifier in pcre2test was not being ignored (as documented)
|
||||||
when the POSIX API was in use.
|
when the POSIX API was in use.
|
||||||
|
|
||||||
17. An overall recursion such as (?0) inside a lookbehind assertion was not
|
|
||||||
being diagnosed as an error.
|
|
||||||
|
|
||||||
|
|
||||||
Version 10.22 29-July-2016
|
Version 10.22 29-July-2016
|
||||||
--------------------------
|
--------------------------
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2LIMITS 3 "29 September 2016" "PCRE2 10.23"
|
.TH PCRE2LIMITS 3 "26 October 2016" "PCRE2 10.23"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.SH "SIZE AND OTHER LIMITATIONS"
|
.SH "SIZE AND OTHER LIMITATIONS"
|
||||||
|
@ -55,7 +55,8 @@ The maximum length of name for a named subpattern is 32 code units, and the
|
||||||
maximum number of named subpatterns is 10000.
|
maximum number of named subpatterns is 10000.
|
||||||
.P
|
.P
|
||||||
The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb
|
The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb
|
||||||
is 255 for the 8-bit library and 65535 for the 16-bit and 32-bit libraries.
|
is 255 code units for the 8-bit library and 65535 code units for the 16-bit and
|
||||||
|
32-bit libraries.
|
||||||
.P
|
.P
|
||||||
The maximum length of a string argument to a callout is the largest number a
|
The maximum length of a string argument to a callout is the largest number a
|
||||||
32-bit unsigned integer can hold.
|
32-bit unsigned integer can hold.
|
||||||
|
@ -75,6 +76,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 29 September 2016
|
Last updated: 26 October 2016
|
||||||
Copyright (c) 1997-2016 University of Cambridge.
|
Copyright (c) 1997-2016 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -2161,6 +2161,7 @@ BOOL negate_class;
|
||||||
BOOL okquantifier = FALSE;
|
BOOL okquantifier = FALSE;
|
||||||
PCRE2_SPTR name;
|
PCRE2_SPTR name;
|
||||||
PCRE2_SPTR ptrend = cb->end_pattern;
|
PCRE2_SPTR ptrend = cb->end_pattern;
|
||||||
|
PCRE2_SPTR verbnamestart = NULL; /* Value avoids compiler warning */
|
||||||
named_group *ng;
|
named_group *ng;
|
||||||
nest_save *top_nest = NULL;
|
nest_save *top_nest = NULL;
|
||||||
nest_save *end_nests = (nest_save *)(cb->start_workspace + cb->workspace_size);
|
nest_save *end_nests = (nest_save *)(cb->start_workspace + cb->workspace_size);
|
||||||
|
@ -2248,8 +2249,10 @@ while (ptr < ptrend)
|
||||||
|
|
||||||
case CHAR_RIGHT_PARENTHESIS:
|
case CHAR_RIGHT_PARENTHESIS:
|
||||||
inverbname = FALSE;
|
inverbname = FALSE;
|
||||||
|
/* This is the length in characters */
|
||||||
verbnamelength = (PCRE2_SIZE)(parsed_pattern - verblengthptr - 1);
|
verbnamelength = (PCRE2_SIZE)(parsed_pattern - verblengthptr - 1);
|
||||||
if (verbnamelength > MAX_MARK)
|
/* But the limit on the length is in code units */
|
||||||
|
if (ptr - verbnamestart - 1 > MAX_MARK)
|
||||||
{
|
{
|
||||||
ptr--;
|
ptr--;
|
||||||
errorcode = ERR76;
|
errorcode = ERR76;
|
||||||
|
@ -3149,6 +3152,7 @@ while (ptr < ptrend)
|
||||||
*parsed_pattern++ = verbs[i].meta +
|
*parsed_pattern++ = verbs[i].meta +
|
||||||
((verbs[i].meta != META_MARK)? 0x00010000u:0);
|
((verbs[i].meta != META_MARK)? 0x00010000u:0);
|
||||||
verblengthptr = parsed_pattern++;
|
verblengthptr = parsed_pattern++;
|
||||||
|
verbnamestart = ptr;
|
||||||
inverbname = TRUE;
|
inverbname = TRUE;
|
||||||
}
|
}
|
||||||
else /* No verb "name" argument */
|
else /* No verb "name" argument */
|
||||||
|
|
|
@ -454,4 +454,6 @@
|
||||||
\= Expect no match
|
\= Expect no match
|
||||||
123
|
123
|
||||||
|
|
||||||
|
/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/utf
|
||||||
|
|
||||||
# End of testinput10
|
# End of testinput10
|
||||||
|
|
|
@ -258,4 +258,6 @@
|
||||||
|
|
||||||
/(*MARK:a\x{100}b)z/alt_verbnames
|
/(*MARK:a\x{100}b)z/alt_verbnames
|
||||||
|
|
||||||
|
/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/
|
||||||
|
|
||||||
# End of testinput9
|
# End of testinput9
|
||||||
|
|
|
@ -1564,4 +1564,7 @@ Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 1
|
||||||
123
|
123
|
||||||
No match
|
No match
|
||||||
|
|
||||||
|
/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/utf
|
||||||
|
Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
|
||||||
|
|
||||||
# End of testinput10
|
# End of testinput10
|
||||||
|
|
|
@ -364,4 +364,7 @@ Failed: error 177 at offset 7: character code point value in \u.... sequence is
|
||||||
/(*MARK:a\x{100}b)z/alt_verbnames
|
/(*MARK:a\x{100}b)z/alt_verbnames
|
||||||
Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large
|
Failed: error 134 at offset 14: character code point value in \x{} or \o{} is too large
|
||||||
|
|
||||||
|
/(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/
|
||||||
|
Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
|
||||||
|
|
||||||
# End of testinput9
|
# End of testinput9
|
||||||
|
|
Loading…
Reference in New Issue