Fix incorrect internal error for very complicated back reference handling.

This commit is contained in:
Philip.Hazel 2016-12-20 16:47:41 +00:00
parent f8dcb985ba
commit a984029d80
4 changed files with 26 additions and 1 deletions

View File

@ -223,6 +223,12 @@ followed by a caseful back reference, could lose the caselessness of the first
repeated back reference (example: /(Z)(a)\2{1,2}?(?-i)\1X/i should match ZaAAZX repeated back reference (example: /(Z)(a)\2{1,2}?(?-i)\1X/i should match ZaAAZX
but didn't). but didn't).
35. When a pattern is too complicated, PCRE2 gives up trying to find a minimum
matching length and just records zero. Typically this happens when there are
too many nested or recursive back references. If the limit was reached in
certain recursive cases it failed to be triggered and an internal error could
be the result.
Version 10.22 29-July-2016 Version 10.22 29-July-2016
-------------------------- --------------------------

View File

@ -485,6 +485,7 @@ for (;;)
this_recurse.prev = recurses; this_recurse.prev = recurses;
this_recurse.group = cs; this_recurse.group = cs;
dd = find_minlength(re, cs, startcode, utf, &this_recurse, countptr); dd = find_minlength(re, cs, startcode, utf, &this_recurse, countptr);
if (dd < 0) return dd;
if (dd < d) d = dd; if (dd < d) d = dd;
} }
} }
@ -525,6 +526,7 @@ for (;;)
this_recurse.prev = recurses; this_recurse.prev = recurses;
this_recurse.group = cs; this_recurse.group = cs;
d = find_minlength(re, cs, startcode, utf, &this_recurse, countptr); d = find_minlength(re, cs, startcode, utf, &this_recurse, countptr);
if (d < 0) return d;
} }
} }
} }
@ -1551,7 +1553,8 @@ the minimum length is already known. */
if ((re->flags & PCRE2_MATCH_EMPTY) == 0) if ((re->flags & PCRE2_MATCH_EMPTY) == 0)
{ {
switch(min = find_minlength(re, code, code, utf, NULL, &count)) min = find_minlength(re, code, code, utf, NULL, &count);
switch(min)
{ {
case -1: /* \C in UTF mode or (*ACCEPT) or over-complex regex */ case -1: /* \C in UTF mode or (*ACCEPT) or over-complex regex */
break; /* Leave minlength unchanged (will be zero) */ break; /* Leave minlength unchanged (will be zero) */

6
testdata/testinput2 vendored
View File

@ -4935,4 +4935,10 @@ a)"xI
".+\QX\E+"B,auto_callout,no_auto_possess ".+\QX\E+"B,auto_callout,no_auto_possess
# This one is here because Perl gives an 'unmatched )' error which goes away
# if one of the \) sequences is removed - which is weird. PCRE finds it too
# complicated to find a minimum matching length.
"()X|((((((((()))))))((((())))))\2())((((((\2\2)))\2)(\22((((\2\2)2))\2)))(2\ZZZ)+:)Z^|91ZiZZnter(ZZ |91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z+:)Z|91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z((Z*(\2(Z\':))\0)i|||||||||||||||loZ\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0nte!rnal errpr\2\\21r(2\ZZZ)+:)Z!|91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0(2\ZZZ)+:)Z^|91ZiZZnter(ZZ |91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0(2\ZZZ)+:)Z^)))int \)\0(2\ZZZ)+:)Z^|91ZiZZnter(ZZernZal ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \))\ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)))\2))))((((((\2\2))))))"I
# End of testinput2 # End of testinput2

10
testdata/testoutput2 vendored
View File

@ -15407,6 +15407,16 @@ Failed: error -33: bad offset value
End End
------------------------------------------------------------------ ------------------------------------------------------------------
# This one is here because Perl gives an 'unmatched )' error which goes away
# if one of the \) sequences is removed - which is weird. PCRE finds it too
# complicated to find a minimum matching length.
"()X|((((((((()))))))((((())))))\2())((((((\2\2)))\2)(\22((((\2\2)2))\2)))(2\ZZZ)+:)Z^|91ZiZZnter(ZZ |91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z+:)Z|91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z((Z*(\2(Z\':))\0)i|||||||||||||||loZ\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0nte!rnal errpr\2\\21r(2\ZZZ)+:)Z!|91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0(2\ZZZ)+:)Z^|91ZiZZnter(ZZ |91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0(2\ZZZ)+:)Z^)))int \)\0(2\ZZZ)+:)Z^|91ZiZZnter(ZZernZal ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \))\ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)))\2))))((((((\2\2))))))"I
Capturing subpattern count = 108
Max back reference = 22
Contains explicit CR or LF match
Subject length lower bound = 0
# End of testinput2 # End of testinput2
Error -63: PCRE2_ERROR_BADDATA (unknown error number) Error -63: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data Error -62: bad serialized data