From bfad956b34fbee2f6e04584627f5d78390d6305d Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Mon, 3 Sep 2018 15:20:40 +0000 Subject: [PATCH] Treat empty-string-matching repeated conditionals the same as ordinary ones when checking for an anchored pattern. --- ChangeLog | 6 +++ src/pcre2_compile.c | 2 +- testdata/testinput2 | 31 +++++++++++++++ testdata/testoutput2 | 92 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 130 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index db82939..780f60c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -179,6 +179,12 @@ subpattern was treated as anchored, when it should not have been, since the assumed empty second branch cannot be anchored. Demonstrated by test patterns such as /(?(1)^())b/ or /(?(?=^))b/. +40. A repeated conditional subpattern that could match an empty string was +always assumed to be unanchored. Now it it checked just like any other +repeated conditional subpattern, and can be found to be anchored if the minimum +quantifier is one or more. I can't see much use for a repeated anchored +pattern, but the behaviour is now consistent. + Version 10.31 12-February-2018 ------------------------------ diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 3df55e9..6bb1de3 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -7866,7 +7866,7 @@ do { /* Condition. If there is no second branch, it can't be anchored. */ - else if (op == OP_COND) + else if (op == OP_COND || op == OP_SCOND) { if (scode[GET(scode,1)] != OP_ALT) return FALSE; if (!is_anchored(scode, bracket_map, cb, atomcount, inassert)) diff --git a/testdata/testinput2 b/testdata/testinput2 index c0f4292..fc94b35 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -5474,4 +5474,35 @@ a)"xI /(?(1)^())b/I +/(?(1)^())+b/I,aftertext + abc + +/(?(1)^()|^)+b/I,aftertext + bbc +\= Expect no match + abc + +/(?(1)^()|^)*b/I,aftertext + bbc + abc + xbc + +/(?(1)^())+b/I,aftertext + abc + +/(?(1)^a()|^a)+b/I,aftertext + abc +\= Expect no match + bbc + +/(?(1)^|^(a))+b/I,aftertext + abc +\= Expect no match + bbc + +/(?(1)^a()|^a)*b/I,aftertext + abc + bbc + xbc + # End of testinput2 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 6f0dd12..ecf0d80 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -16671,6 +16671,98 @@ Max back reference = 1 Last code unit = 'b' Subject length lower bound = 1 +/(?(1)^())+b/I,aftertext +Capturing subpattern count = 1 +Max back reference = 1 +Last code unit = 'b' +Subject length lower bound = 1 + abc + 0: b + 0+ c + +/(?(1)^()|^)+b/I,aftertext +Capturing subpattern count = 1 +Max back reference = 1 +Compile options: +Overall options: anchored +First code unit = 'b' +Subject length lower bound = 1 + bbc + 0: b + 0+ bc +\= Expect no match + abc +No match + +/(?(1)^()|^)*b/I,aftertext +Capturing subpattern count = 1 +Max back reference = 1 +First code unit = 'b' +Subject length lower bound = 1 + bbc + 0: b + 0+ bc + abc + 0: b + 0+ c + xbc + 0: b + 0+ c + +/(?(1)^())+b/I,aftertext +Capturing subpattern count = 1 +Max back reference = 1 +Last code unit = 'b' +Subject length lower bound = 1 + abc + 0: b + 0+ c + +/(?(1)^a()|^a)+b/I,aftertext +Capturing subpattern count = 1 +Max back reference = 1 +Compile options: +Overall options: anchored +First code unit = 'a' +Last code unit = 'b' +Subject length lower bound = 2 + abc + 0: ab + 0+ c +\= Expect no match + bbc +No match + +/(?(1)^|^(a))+b/I,aftertext +Capturing subpattern count = 1 +Max back reference = 1 +Compile options: +Overall options: anchored +Last code unit = 'b' +Subject length lower bound = 1 + abc + 0: ab + 0+ c + 1: a +\= Expect no match + bbc +No match + +/(?(1)^a()|^a)*b/I,aftertext +Capturing subpattern count = 1 +Max back reference = 1 +Last code unit = 'b' +Subject length lower bound = 1 + abc + 0: ab + 0+ c + bbc + 0: b + 0+ bc + xbc + 0: b + 0+ c + # End of testinput2 Error -70: PCRE2_ERROR_BADDATA (unknown error number) Error -62: bad serialized data