Compile \p{Any} the same as . in DOTALL mode, to benefit from auto-anchoring.

This commit is contained in:
Philip.Hazel 2019-02-13 17:30:24 +00:00
parent f2e1cea288
commit 255f5e741b
4 changed files with 70 additions and 43 deletions

View File

@ -128,6 +128,9 @@ ClusterFuzz 12950, fixed before release.
31. Implemented PCRE2_EXTRA_ALT_BSUX to support ECMAScript 6's \u{hhh}
construct.
32. Compile \p{Any} to be the same as . in DOTALL mode, so that it benefits
from auto-anchoring if \p{Any}* starts a pattern.
Version 10.32 10-September-2018
-------------------------------

View File

@ -7631,9 +7631,20 @@ for (;; pptr++)
{
uint32_t ptype = *(++pptr) >> 16;
uint32_t pdata = *pptr & 0xffff;
/* The special case of \p{Any} is compiled to OP_ALLANY so as to benefit
from the auto-anchoring code. */
if (meta_arg == ESC_p && ptype == PT_ANY)
{
*code++ = OP_ALLANY;
}
else
{
*code++ = (meta_arg == ESC_p)? OP_PROP : OP_NOTPROP;
*code++ = ptype;
*code++ = pdata;
}
break; /* End META_ESCAPE */
}
#endif

4
testdata/testinput5 vendored
View File

@ -2170,4 +2170,8 @@
/(?'X²ABC'...)/utf
# -------
/\p{Any}*xyz/I
# End of testinput5

47
testdata/testoutput5 vendored
View File

@ -3294,27 +3294,27 @@ No match
/\p{Any}+\p{Any} \p{Any}+\P{Any} \p{Any}+\p{L&} \p{Any}+\p{L} \p{Any}+\p{Lu} \p{Any}+\p{Han} \p{Any}+\p{Xan} \p{Any}+\p{Xsp} \p{Any}+\p{Xps} \p{Xwd}+\p{Any} \p{Any}+\p{Xuc}/Bx,ucp
------------------------------------------------------------------
Bra
prop Any +
prop Any
prop Any +
AllAny+
AllAny
AllAny+
notprop Any
prop Any +
AllAny+
prop L&
prop Any +
AllAny+
prop L
prop Any +
AllAny+
prop Lu
prop Any +
AllAny+
prop Han
prop Any +
AllAny+
prop Xan
prop Any +
AllAny+
prop Xsp
prop Any +
AllAny+
prop Xps
prop Xwd +
prop Any
prop Any +
AllAny
AllAny+
prop Xuc
Ket
End
@ -3324,7 +3324,7 @@ No match
------------------------------------------------------------------
Bra
prop L& +
prop Any
AllAny
prop L& +
prop L&
notprop L& ++
@ -3355,7 +3355,7 @@ No match
------------------------------------------------------------------
Bra
prop N +
prop Any
AllAny
prop N +
prop L&
prop N ++
@ -3386,7 +3386,7 @@ No match
------------------------------------------------------------------
Bra
prop Lu +
prop Any
AllAny
prop Lu +
prop L&
prop Lu +
@ -3448,7 +3448,7 @@ No match
------------------------------------------------------------------
Bra
prop Xan +
prop Any
AllAny
prop Xan +
prop L&
notprop Xan ++
@ -3479,7 +3479,7 @@ No match
------------------------------------------------------------------
Bra
prop Xsp +
prop Any
AllAny
prop Xsp ++
prop L&
prop Xsp ++
@ -3508,7 +3508,7 @@ No match
------------------------------------------------------------------
Bra
prop Xwd +
prop Any
AllAny
prop Xwd +
prop L&
prop Xwd +
@ -3537,7 +3537,7 @@ No match
------------------------------------------------------------------
Bra
prop Xuc +
prop Any
AllAny
prop Xuc +
prop L&
prop Xuc +
@ -4924,4 +4924,13 @@ Failed: error 162 at offset 3: subpattern name expected
/(?'X²ABC'...)/utf
Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?)
# -------
/\p{Any}*xyz/I
Capture group count = 0
Compile options: <none>
Overall options: anchored
Last code unit = 'z'
Subject length lower bound = 3
# End of testinput5