From a984029d804200b4f0dd1d243dde882d1673c6e1 Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Tue, 20 Dec 2016 16:47:41 +0000 Subject: [PATCH] Fix incorrect internal error for very complicated back reference handling. --- ChangeLog | 6 ++++++ src/pcre2_study.c | 5 ++++- testdata/testinput2 | 6 ++++++ testdata/testoutput2 | 10 ++++++++++ 4 files changed, 26 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 33ff691..1306c5f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -223,6 +223,12 @@ followed by a caseful back reference, could lose the caselessness of the first repeated back reference (example: /(Z)(a)\2{1,2}?(?-i)\1X/i should match ZaAAZX but didn't). +35. When a pattern is too complicated, PCRE2 gives up trying to find a minimum +matching length and just records zero. Typically this happens when there are +too many nested or recursive back references. If the limit was reached in +certain recursive cases it failed to be triggered and an internal error could +be the result. + Version 10.22 29-July-2016 -------------------------- diff --git a/src/pcre2_study.c b/src/pcre2_study.c index 3c70355..4c08bc5 100644 --- a/src/pcre2_study.c +++ b/src/pcre2_study.c @@ -485,6 +485,7 @@ for (;;) this_recurse.prev = recurses; this_recurse.group = cs; dd = find_minlength(re, cs, startcode, utf, &this_recurse, countptr); + if (dd < 0) return dd; if (dd < d) d = dd; } } @@ -525,6 +526,7 @@ for (;;) this_recurse.prev = recurses; this_recurse.group = cs; d = find_minlength(re, cs, startcode, utf, &this_recurse, countptr); + if (d < 0) return d; } } } @@ -1551,7 +1553,8 @@ the minimum length is already known. */ if ((re->flags & PCRE2_MATCH_EMPTY) == 0) { - switch(min = find_minlength(re, code, code, utf, NULL, &count)) + min = find_minlength(re, code, code, utf, NULL, &count); + switch(min) { case -1: /* \C in UTF mode or (*ACCEPT) or over-complex regex */ break; /* Leave minlength unchanged (will be zero) */ diff --git a/testdata/testinput2 b/testdata/testinput2 index aa1013c..aafb4ea 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4935,4 +4935,10 @@ a)"xI ".+\QX\E+"B,auto_callout,no_auto_possess +# This one is here because Perl gives an 'unmatched )' error which goes away +# if one of the \) sequences is removed - which is weird. PCRE finds it too +# complicated to find a minimum matching length. + +"()X|((((((((()))))))((((())))))\2())((((((\2\2)))\2)(\22((((\2\2)2))\2)))(2\ZZZ)+:)Z^|91ZiZZnter(ZZ |91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z+:)Z|91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z((Z*(\2(Z\':))\0)i|||||||||||||||loZ\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0nte!rnal errpr\2\\21r(2\ZZZ)+:)Z!|91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0(2\ZZZ)+:)Z^|91ZiZZnter(ZZ |91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0(2\ZZZ)+:)Z^)))int \)\0(2\ZZZ)+:)Z^|91ZiZZnter(ZZernZal ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \))\ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)))\2))))((((((\2\2))))))"I + # End of testinput2 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 7f177ce..c934be4 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -15407,6 +15407,16 @@ Failed: error -33: bad offset value End ------------------------------------------------------------------ +# This one is here because Perl gives an 'unmatched )' error which goes away +# if one of the \) sequences is removed - which is weird. PCRE finds it too +# complicated to find a minimum matching length. + +"()X|((((((((()))))))((((())))))\2())((((((\2\2)))\2)(\22((((\2\2)2))\2)))(2\ZZZ)+:)Z^|91ZiZZnter(ZZ |91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z+:)Z|91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z((Z*(\2(Z\':))\0)i|||||||||||||||loZ\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0nte!rnal errpr\2\\21r(2\ZZZ)+:)Z!|91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0(2\ZZZ)+:)Z^|91ZiZZnter(ZZ |91Z(ZZ ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \)\0(2\ZZZ)+:)Z^)))int \)\0(2\ZZZ)+:)Z^|91ZiZZnter(ZZernZal ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)2))\2Z)))int \))\ZZ(\r2Z( or#(\Z2(Z\Z(\2\2)2))\2Z)Z(\22Z((\Z2(Z\Z(\2\2)))\2))))((((((\2\2))))))"I +Capturing subpattern count = 108 +Max back reference = 22 +Contains explicit CR or LF match +Subject length lower bound = 0 + # End of testinput2 Error -63: PCRE2_ERROR_BADDATA (unknown error number) Error -62: bad serialized data