More refactoring: keep track of empty branches during compiling, replacing a
post-compile scan.
This commit is contained in:
parent
1f87b60f01
commit
d15aab4d17
|
@ -237,6 +237,13 @@ be the result.
|
||||||
the internal recursive calls that are used for lookrounds and recursions within
|
the internal recursive calls that are used for lookrounds and recursions within
|
||||||
the pattern.
|
the pattern.
|
||||||
|
|
||||||
|
37. More refactoring has got rid of the internal could_be_empty_branch()
|
||||||
|
function (around 400 lines of code, including comments) by keeping track of
|
||||||
|
could-be-emptiness as the pattern is compiled instead of scanning compiled
|
||||||
|
groups. (This would have been much harder before the refactoring of #3 above.)
|
||||||
|
This lifts a restriction on the number of branches in a group (more than about
|
||||||
|
1100 would give "pattern is too complicated").
|
||||||
|
|
||||||
|
|
||||||
Version 10.22 29-July-2016
|
Version 10.22 29-July-2016
|
||||||
--------------------------
|
--------------------------
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -4651,7 +4651,7 @@ B)x/alt_verbnames,mark
|
||||||
|
|
||||||
/abcdef/hex,max_pattern_length=3
|
/abcdef/hex,max_pattern_length=3
|
||||||
|
|
||||||
# These two patterns used to take a long time to compile
|
# These patterns used to take a long time to compile
|
||||||
|
|
||||||
"(.*)
|
"(.*)
|
||||||
((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))
|
((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))
|
||||||
|
@ -4664,9 +4664,6 @@ B)x/alt_verbnames,mark
|
||||||
((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))
|
((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))
|
||||||
a)"xI
|
a)"xI
|
||||||
|
|
||||||
# When (?| is used and groups of the same number may be different,
|
|
||||||
# we have to rely on a count to catch overly complicated patterns.
|
|
||||||
|
|
||||||
"(?|()|())(.*)
|
"(?|()|())(.*)
|
||||||
((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))
|
((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))
|
||||||
((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))
|
((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))
|
||||||
|
@ -4941,4 +4938,10 @@ a)"xI
|
||||||
|
|
||||||
"()X|((((((((()))))))((((())))))\2())((((((\2\2)))\2)(\22((((\2\2)2))\2)))(2\ZZZ)+:)Z^|91ZiZZnter(ZZ |91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z+:)Z|91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z((Z*(\2(Z\':))\0)i|||||||||||||||loZ\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0nte!rnal errpr\2\\21r(2\ZZZ)+:)Z!|91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0(2\ZZZ)+:)Z^|91ZiZZnter(ZZ |91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0(2\ZZZ)+:)Z^)))int \)\0(2\ZZZ)+:)Z^|91ZiZZnter(ZZernZal ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \))\ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)))\2))))((((((\2\2))))))"I
|
"()X|((((((((()))))))((((())))))\2())((((((\2\2)))\2)(\22((((\2\2)2))\2)))(2\ZZZ)+:)Z^|91ZiZZnter(ZZ |91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z+:)Z|91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z((Z*(\2(Z\':))\0)i|||||||||||||||loZ\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0nte!rnal errpr\2\\21r(2\ZZZ)+:)Z!|91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0(2\ZZZ)+:)Z^|91ZiZZnter(ZZ |91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0(2\ZZZ)+:)Z^)))int \)\0(2\ZZZ)+:)Z^|91ZiZZnter(ZZernZal ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \))\ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)))\2))))((((((\2\2))))))"I
|
||||||
|
|
||||||
|
# This checks that new code for handling groups that may match an empty string
|
||||||
|
# works on a very large number of alternatives. This pattern used to provoke a
|
||||||
|
# complaint that it was too complicated.
|
||||||
|
|
||||||
|
/(?:\[A|B|C|D|E|F|G|H|I|J|]{200}Z)/expand
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
|
@ -10741,7 +10741,8 @@ Matched, but too many substrings
|
||||||
|
|
||||||
/(?(DEFINE)(a(?2)|b)(b(?1)|a))(?:(?1)|(?2))/I
|
/(?(DEFINE)(a(?2)|b)(b(?1)|a))(?:(?1)|(?2))/I
|
||||||
Capturing subpattern count = 2
|
Capturing subpattern count = 2
|
||||||
Subject length lower bound = 1
|
May match empty string
|
||||||
|
Subject length lower bound = 0
|
||||||
|
|
||||||
/(a(?2)|b)(b(?1)|a)(?:(?1)|(?2))/I
|
/(a(?2)|b)(b(?1)|a)(?:(?1)|(?2))/I
|
||||||
Capturing subpattern count = 2
|
Capturing subpattern count = 2
|
||||||
|
@ -14759,7 +14760,7 @@ Failed: error 188 at offset 0: pattern string is longer than the limit set by th
|
||||||
|
|
||||||
/abcdef/hex,max_pattern_length=3
|
/abcdef/hex,max_pattern_length=3
|
||||||
|
|
||||||
# These two patterns used to take a long time to compile
|
# These patterns used to take a long time to compile
|
||||||
|
|
||||||
"(.*)
|
"(.*)
|
||||||
((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))
|
((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))
|
||||||
|
@ -14782,14 +14783,14 @@ May match empty string
|
||||||
Options: extended
|
Options: extended
|
||||||
Subject length lower bound = 0
|
Subject length lower bound = 0
|
||||||
|
|
||||||
# When (?| is used and groups of the same number may be different,
|
|
||||||
# we have to rely on a count to catch overly complicated patterns.
|
|
||||||
|
|
||||||
"(?|()|())(.*)
|
"(?|()|())(.*)
|
||||||
((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))
|
((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))
|
||||||
((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))
|
((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))
|
||||||
((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))"xI
|
((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))"xI
|
||||||
Failed: error 186 at offset 148: regular expression is too complicated
|
Capturing subpattern count = 13
|
||||||
|
May match empty string
|
||||||
|
Options: extended
|
||||||
|
Subject length lower bound = 0
|
||||||
|
|
||||||
"(?|()|())(?<=a()
|
"(?|()|())(?<=a()
|
||||||
((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))
|
((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))((?-2)(?-2))
|
||||||
|
@ -15417,6 +15418,12 @@ Max back reference = 22
|
||||||
Contains explicit CR or LF match
|
Contains explicit CR or LF match
|
||||||
Subject length lower bound = 0
|
Subject length lower bound = 0
|
||||||
|
|
||||||
|
# This checks that new code for handling groups that may match an empty string
|
||||||
|
# works on a very large number of alternatives. This pattern used to provoke a
|
||||||
|
# complaint that it was too complicated.
|
||||||
|
|
||||||
|
/(?:\[A|B|C|D|E|F|G|H|I|J|]{200}Z)/expand
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
Error -63: PCRE2_ERROR_BADDATA (unknown error number)
|
Error -63: PCRE2_ERROR_BADDATA (unknown error number)
|
||||||
Error -62: bad serialized data
|
Error -62: bad serialized data
|
||||||
|
|
Loading…
Reference in New Issue