Compile \p{Any} the same as . in DOTALL mode, to benefit from auto-anchoring.
This commit is contained in:
parent
f2e1cea288
commit
255f5e741b
|
@ -128,6 +128,9 @@ ClusterFuzz 12950, fixed before release.
|
|||
31. Implemented PCRE2_EXTRA_ALT_BSUX to support ECMAScript 6's \u{hhh}
|
||||
construct.
|
||||
|
||||
32. Compile \p{Any} to be the same as . in DOTALL mode, so that it benefits
|
||||
from auto-anchoring if \p{Any}* starts a pattern.
|
||||
|
||||
|
||||
Version 10.32 10-September-2018
|
||||
-------------------------------
|
||||
|
|
|
@ -1459,7 +1459,7 @@ Returns: zero => a data character
|
|||
|
||||
int
|
||||
PRIV(check_escape)(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t *chptr,
|
||||
int *errorcodeptr, uint32_t options, uint32_t extra_options, BOOL isclass,
|
||||
int *errorcodeptr, uint32_t options, uint32_t extra_options, BOOL isclass,
|
||||
compile_block *cb)
|
||||
{
|
||||
BOOL utf = (options & PCRE2_UTF) != 0;
|
||||
|
@ -1551,7 +1551,7 @@ else if ((i = escapes[c - ESCAPES_FIRST]) != 0)
|
|||
|
||||
/* Escapes that need further processing, including those that are unknown, have
|
||||
a zero entry in the lookup table. When called from pcre2_substitute(), only \c,
|
||||
\o, and \x are recognized (\u and \U can never appear as they are used for case
|
||||
\o, and \x are recognized (\u and \U can never appear as they are used for case
|
||||
forcing). */
|
||||
|
||||
else
|
||||
|
@ -1559,7 +1559,7 @@ else
|
|||
int s;
|
||||
PCRE2_SPTR oldptr;
|
||||
BOOL overflow;
|
||||
BOOL alt_bsux =
|
||||
BOOL alt_bsux =
|
||||
((options & PCRE2_ALT_BSUX) | (extra_options & PCRE2_EXTRA_ALT_BSUX)) != 0;
|
||||
|
||||
/* Filter calls from pcre2_substitute(). */
|
||||
|
@ -1571,8 +1571,8 @@ else
|
|||
*errorcodeptr = ERR3;
|
||||
return 0;
|
||||
}
|
||||
alt_bsux = FALSE; /* Do not modify \x handling */
|
||||
}
|
||||
alt_bsux = FALSE; /* Do not modify \x handling */
|
||||
}
|
||||
|
||||
switch (c)
|
||||
{
|
||||
|
@ -1595,37 +1595,37 @@ else
|
|||
if (!alt_bsux) *errorcodeptr = ERR37; else
|
||||
{
|
||||
uint32_t xc;
|
||||
|
||||
|
||||
if (ptr >= ptrend) break;
|
||||
if (*ptr == CHAR_LEFT_CURLY_BRACKET &&
|
||||
if (*ptr == CHAR_LEFT_CURLY_BRACKET &&
|
||||
(extra_options & PCRE2_EXTRA_ALT_BSUX) != 0)
|
||||
{
|
||||
PCRE2_SPTR hptr = ptr + 1;
|
||||
cc = 0;
|
||||
|
||||
|
||||
while (hptr < ptrend && (xc = XDIGIT(*hptr)) != 0xff)
|
||||
{
|
||||
{
|
||||
if ((cc & 0xf0000000) != 0) /* Test for 32-bit overflow */
|
||||
{
|
||||
*errorcodeptr = ERR77;
|
||||
ptr = hptr; /* Show where */
|
||||
break; /* *hptr != } will cause another break below */
|
||||
}
|
||||
break; /* *hptr != } will cause another break below */
|
||||
}
|
||||
cc = (cc << 4) | xc;
|
||||
hptr++;
|
||||
}
|
||||
|
||||
hptr++;
|
||||
}
|
||||
|
||||
if (hptr == ptr + 1 || /* No hex digits */
|
||||
hptr >= ptrend || /* Hit end of input */
|
||||
*hptr != CHAR_RIGHT_CURLY_BRACKET) /* No } terminator */
|
||||
break; /* Hex escape not recognized */
|
||||
|
||||
|
||||
c = cc; /* Accept the code point */
|
||||
ptr = hptr + 1;
|
||||
ptr = hptr + 1;
|
||||
}
|
||||
|
||||
|
||||
else /* Must be exactly 4 hex digits */
|
||||
{
|
||||
{
|
||||
if (ptrend - ptr < 4) break; /* Less than 4 chars */
|
||||
if ((cc = XDIGIT(ptr[0])) == 0xff) break; /* Not a hex digit */
|
||||
if ((xc = XDIGIT(ptr[1])) == 0xff) break; /* Not a hex digit */
|
||||
|
@ -1635,8 +1635,8 @@ else
|
|||
if ((xc = XDIGIT(ptr[3])) == 0xff) break; /* Not a hex digit */
|
||||
c = (cc << 4) | xc;
|
||||
ptr += 4;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (utf)
|
||||
{
|
||||
if (c > 0x10ffffU) *errorcodeptr = ERR77;
|
||||
|
@ -3424,7 +3424,7 @@ while (ptr < ptrend)
|
|||
else
|
||||
{
|
||||
tempptr = ptr;
|
||||
escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode, options,
|
||||
escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode, options,
|
||||
cb->cx->extra_options, TRUE, cb);
|
||||
|
||||
if (errorcode != 0)
|
||||
|
@ -7631,9 +7631,20 @@ for (;; pptr++)
|
|||
{
|
||||
uint32_t ptype = *(++pptr) >> 16;
|
||||
uint32_t pdata = *pptr & 0xffff;
|
||||
*code++ = (meta_arg == ESC_p)? OP_PROP : OP_NOTPROP;
|
||||
*code++ = ptype;
|
||||
*code++ = pdata;
|
||||
|
||||
/* The special case of \p{Any} is compiled to OP_ALLANY so as to benefit
|
||||
from the auto-anchoring code. */
|
||||
|
||||
if (meta_arg == ESC_p && ptype == PT_ANY)
|
||||
{
|
||||
*code++ = OP_ALLANY;
|
||||
}
|
||||
else
|
||||
{
|
||||
*code++ = (meta_arg == ESC_p)? OP_PROP : OP_NOTPROP;
|
||||
*code++ = ptype;
|
||||
*code++ = pdata;
|
||||
}
|
||||
break; /* End META_ESCAPE */
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -2170,4 +2170,8 @@
|
|||
|
||||
/(?'X²ABC'...)/utf
|
||||
|
||||
# -------
|
||||
|
||||
/\p{Any}*xyz/I
|
||||
|
||||
# End of testinput5
|
||||
|
|
|
@ -3294,27 +3294,27 @@ No match
|
|||
/\p{Any}+\p{Any} \p{Any}+\P{Any} \p{Any}+\p{L&} \p{Any}+\p{L} \p{Any}+\p{Lu} \p{Any}+\p{Han} \p{Any}+\p{Xan} \p{Any}+\p{Xsp} \p{Any}+\p{Xps} \p{Xwd}+\p{Any} \p{Any}+\p{Xuc}/Bx,ucp
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
prop Any +
|
||||
prop Any
|
||||
prop Any +
|
||||
AllAny+
|
||||
AllAny
|
||||
AllAny+
|
||||
notprop Any
|
||||
prop Any +
|
||||
AllAny+
|
||||
prop L&
|
||||
prop Any +
|
||||
AllAny+
|
||||
prop L
|
||||
prop Any +
|
||||
AllAny+
|
||||
prop Lu
|
||||
prop Any +
|
||||
AllAny+
|
||||
prop Han
|
||||
prop Any +
|
||||
AllAny+
|
||||
prop Xan
|
||||
prop Any +
|
||||
AllAny+
|
||||
prop Xsp
|
||||
prop Any +
|
||||
AllAny+
|
||||
prop Xps
|
||||
prop Xwd +
|
||||
prop Any
|
||||
prop Any +
|
||||
AllAny
|
||||
AllAny+
|
||||
prop Xuc
|
||||
Ket
|
||||
End
|
||||
|
@ -3324,7 +3324,7 @@ No match
|
|||
------------------------------------------------------------------
|
||||
Bra
|
||||
prop L& +
|
||||
prop Any
|
||||
AllAny
|
||||
prop L& +
|
||||
prop L&
|
||||
notprop L& ++
|
||||
|
@ -3355,7 +3355,7 @@ No match
|
|||
------------------------------------------------------------------
|
||||
Bra
|
||||
prop N +
|
||||
prop Any
|
||||
AllAny
|
||||
prop N +
|
||||
prop L&
|
||||
prop N ++
|
||||
|
@ -3386,7 +3386,7 @@ No match
|
|||
------------------------------------------------------------------
|
||||
Bra
|
||||
prop Lu +
|
||||
prop Any
|
||||
AllAny
|
||||
prop Lu +
|
||||
prop L&
|
||||
prop Lu +
|
||||
|
@ -3448,7 +3448,7 @@ No match
|
|||
------------------------------------------------------------------
|
||||
Bra
|
||||
prop Xan +
|
||||
prop Any
|
||||
AllAny
|
||||
prop Xan +
|
||||
prop L&
|
||||
notprop Xan ++
|
||||
|
@ -3479,7 +3479,7 @@ No match
|
|||
------------------------------------------------------------------
|
||||
Bra
|
||||
prop Xsp +
|
||||
prop Any
|
||||
AllAny
|
||||
prop Xsp ++
|
||||
prop L&
|
||||
prop Xsp ++
|
||||
|
@ -3508,7 +3508,7 @@ No match
|
|||
------------------------------------------------------------------
|
||||
Bra
|
||||
prop Xwd +
|
||||
prop Any
|
||||
AllAny
|
||||
prop Xwd +
|
||||
prop L&
|
||||
prop Xwd +
|
||||
|
@ -3537,7 +3537,7 @@ No match
|
|||
------------------------------------------------------------------
|
||||
Bra
|
||||
prop Xuc +
|
||||
prop Any
|
||||
AllAny
|
||||
prop Xuc +
|
||||
prop L&
|
||||
prop Xuc +
|
||||
|
@ -4924,4 +4924,13 @@ Failed: error 162 at offset 3: subpattern name expected
|
|||
/(?'X²ABC'...)/utf
|
||||
Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?)
|
||||
|
||||
# -------
|
||||
|
||||
/\p{Any}*xyz/I
|
||||
Capture group count = 0
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Last code unit = 'z'
|
||||
Subject length lower bound = 3
|
||||
|
||||
# End of testinput5
|
||||
|
|
Loading…
Reference in New Issue