Extend auto-anchoring to ignore "never-obeyed" groups at the start.

This commit is contained in:
Philip.Hazel 2017-04-07 08:46:29 +00:00
parent 88abc14e42
commit 09f87cbfce
4 changed files with 65 additions and 0 deletions

View File

@ -116,6 +116,11 @@ a message, and abandon the run (this would have detected #13 above).
20. Applied Jason Hood's patches (slightly modified) to pcre2grep, to implement
the --output=text (-O) option and the inbuilt callout echo.
21. Extend auto-anchoring etc. to ignore groups with a zero qualifier and
single-branch conditions with a false condition (e.g. DEFINE) at the start of a
branch. For example, /(?(DEFINE)...)^A/ and /(...){0}^B/ are now flagged as
anchored.
Version 10.23 14-February-2017
------------------------------

View File

@ -4164,6 +4164,18 @@ for (;;)
case OP_CALLOUT_STR:
code += GET(code, 1 + 2*LINK_SIZE);
break;
case OP_SKIPZERO:
code += 2 + GET(code, 2) + LINK_SIZE;
break;
case OP_COND:
case OP_SCOND:
if (code[1+LINK_SIZE] != OP_FALSE || /* Not DEFINE */
code[GET(code, 1)] != OP_KET) /* More than one branch */
return code;
code += GET(code, 1) + 1 + LINK_SIZE;
break;
default:
return code;

16
testdata/testinput2 vendored
View File

@ -5040,4 +5040,20 @@ a)"xI
#subject -no_jit
# Check auto-anchoring when there is a group that is never obeyed at
# the start of a branch.
/(?(DEFINE)(a))^bc/I
/(a){0}.*bc/sI
# This should be anchored, as the condition is always false and there is
# no alternative branch.
/(?(VERSION>=999)yes)^bc/I
# This should not be anchored.
/(?(VERSION>=999)yes|no)^bc/I
# End of testinput2

32
testdata/testoutput2 vendored
View File

@ -15576,6 +15576,38 @@ No match
#subject -no_jit
# Check auto-anchoring when there is a group that is never obeyed at
# the start of a branch.
/(?(DEFINE)(a))^bc/I
Capturing subpattern count = 1
Compile options: <none>
Overall options: anchored
Subject length lower bound = 2
/(a){0}.*bc/sI
Capturing subpattern count = 1
Compile options: dotall
Overall options: anchored dotall
Last code unit = 'c'
Subject length lower bound = 2
# This should be anchored, as the condition is always false and there is
# no alternative branch.
/(?(VERSION>=999)yes)^bc/I
Capturing subpattern count = 0
Compile options: <none>
Overall options: anchored
Subject length lower bound = 2
# This should not be anchored.
/(?(VERSION>=999)yes|no)^bc/I
Capturing subpattern count = 0
Last code unit = 'c'
Subject length lower bound = 4
# End of testinput2
Error -63: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data