Compile \p{Any} the same as . in DOTALL mode, to benefit from auto-anchoring.
This commit is contained in:
parent
f2e1cea288
commit
255f5e741b
|
@ -128,6 +128,9 @@ ClusterFuzz 12950, fixed before release.
|
||||||
31. Implemented PCRE2_EXTRA_ALT_BSUX to support ECMAScript 6's \u{hhh}
|
31. Implemented PCRE2_EXTRA_ALT_BSUX to support ECMAScript 6's \u{hhh}
|
||||||
construct.
|
construct.
|
||||||
|
|
||||||
|
32. Compile \p{Any} to be the same as . in DOTALL mode, so that it benefits
|
||||||
|
from auto-anchoring if \p{Any}* starts a pattern.
|
||||||
|
|
||||||
|
|
||||||
Version 10.32 10-September-2018
|
Version 10.32 10-September-2018
|
||||||
-------------------------------
|
-------------------------------
|
||||||
|
|
|
@ -1459,7 +1459,7 @@ Returns: zero => a data character
|
||||||
|
|
||||||
int
|
int
|
||||||
PRIV(check_escape)(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t *chptr,
|
PRIV(check_escape)(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t *chptr,
|
||||||
int *errorcodeptr, uint32_t options, uint32_t extra_options, BOOL isclass,
|
int *errorcodeptr, uint32_t options, uint32_t extra_options, BOOL isclass,
|
||||||
compile_block *cb)
|
compile_block *cb)
|
||||||
{
|
{
|
||||||
BOOL utf = (options & PCRE2_UTF) != 0;
|
BOOL utf = (options & PCRE2_UTF) != 0;
|
||||||
|
@ -1551,7 +1551,7 @@ else if ((i = escapes[c - ESCAPES_FIRST]) != 0)
|
||||||
|
|
||||||
/* Escapes that need further processing, including those that are unknown, have
|
/* Escapes that need further processing, including those that are unknown, have
|
||||||
a zero entry in the lookup table. When called from pcre2_substitute(), only \c,
|
a zero entry in the lookup table. When called from pcre2_substitute(), only \c,
|
||||||
\o, and \x are recognized (\u and \U can never appear as they are used for case
|
\o, and \x are recognized (\u and \U can never appear as they are used for case
|
||||||
forcing). */
|
forcing). */
|
||||||
|
|
||||||
else
|
else
|
||||||
|
@ -1559,7 +1559,7 @@ else
|
||||||
int s;
|
int s;
|
||||||
PCRE2_SPTR oldptr;
|
PCRE2_SPTR oldptr;
|
||||||
BOOL overflow;
|
BOOL overflow;
|
||||||
BOOL alt_bsux =
|
BOOL alt_bsux =
|
||||||
((options & PCRE2_ALT_BSUX) | (extra_options & PCRE2_EXTRA_ALT_BSUX)) != 0;
|
((options & PCRE2_ALT_BSUX) | (extra_options & PCRE2_EXTRA_ALT_BSUX)) != 0;
|
||||||
|
|
||||||
/* Filter calls from pcre2_substitute(). */
|
/* Filter calls from pcre2_substitute(). */
|
||||||
|
@ -1571,8 +1571,8 @@ else
|
||||||
*errorcodeptr = ERR3;
|
*errorcodeptr = ERR3;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
alt_bsux = FALSE; /* Do not modify \x handling */
|
alt_bsux = FALSE; /* Do not modify \x handling */
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (c)
|
switch (c)
|
||||||
{
|
{
|
||||||
|
@ -1595,37 +1595,37 @@ else
|
||||||
if (!alt_bsux) *errorcodeptr = ERR37; else
|
if (!alt_bsux) *errorcodeptr = ERR37; else
|
||||||
{
|
{
|
||||||
uint32_t xc;
|
uint32_t xc;
|
||||||
|
|
||||||
if (ptr >= ptrend) break;
|
if (ptr >= ptrend) break;
|
||||||
if (*ptr == CHAR_LEFT_CURLY_BRACKET &&
|
if (*ptr == CHAR_LEFT_CURLY_BRACKET &&
|
||||||
(extra_options & PCRE2_EXTRA_ALT_BSUX) != 0)
|
(extra_options & PCRE2_EXTRA_ALT_BSUX) != 0)
|
||||||
{
|
{
|
||||||
PCRE2_SPTR hptr = ptr + 1;
|
PCRE2_SPTR hptr = ptr + 1;
|
||||||
cc = 0;
|
cc = 0;
|
||||||
|
|
||||||
while (hptr < ptrend && (xc = XDIGIT(*hptr)) != 0xff)
|
while (hptr < ptrend && (xc = XDIGIT(*hptr)) != 0xff)
|
||||||
{
|
{
|
||||||
if ((cc & 0xf0000000) != 0) /* Test for 32-bit overflow */
|
if ((cc & 0xf0000000) != 0) /* Test for 32-bit overflow */
|
||||||
{
|
{
|
||||||
*errorcodeptr = ERR77;
|
*errorcodeptr = ERR77;
|
||||||
ptr = hptr; /* Show where */
|
ptr = hptr; /* Show where */
|
||||||
break; /* *hptr != } will cause another break below */
|
break; /* *hptr != } will cause another break below */
|
||||||
}
|
}
|
||||||
cc = (cc << 4) | xc;
|
cc = (cc << 4) | xc;
|
||||||
hptr++;
|
hptr++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (hptr == ptr + 1 || /* No hex digits */
|
if (hptr == ptr + 1 || /* No hex digits */
|
||||||
hptr >= ptrend || /* Hit end of input */
|
hptr >= ptrend || /* Hit end of input */
|
||||||
*hptr != CHAR_RIGHT_CURLY_BRACKET) /* No } terminator */
|
*hptr != CHAR_RIGHT_CURLY_BRACKET) /* No } terminator */
|
||||||
break; /* Hex escape not recognized */
|
break; /* Hex escape not recognized */
|
||||||
|
|
||||||
c = cc; /* Accept the code point */
|
c = cc; /* Accept the code point */
|
||||||
ptr = hptr + 1;
|
ptr = hptr + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
else /* Must be exactly 4 hex digits */
|
else /* Must be exactly 4 hex digits */
|
||||||
{
|
{
|
||||||
if (ptrend - ptr < 4) break; /* Less than 4 chars */
|
if (ptrend - ptr < 4) break; /* Less than 4 chars */
|
||||||
if ((cc = XDIGIT(ptr[0])) == 0xff) break; /* Not a hex digit */
|
if ((cc = XDIGIT(ptr[0])) == 0xff) break; /* Not a hex digit */
|
||||||
if ((xc = XDIGIT(ptr[1])) == 0xff) break; /* Not a hex digit */
|
if ((xc = XDIGIT(ptr[1])) == 0xff) break; /* Not a hex digit */
|
||||||
|
@ -1635,8 +1635,8 @@ else
|
||||||
if ((xc = XDIGIT(ptr[3])) == 0xff) break; /* Not a hex digit */
|
if ((xc = XDIGIT(ptr[3])) == 0xff) break; /* Not a hex digit */
|
||||||
c = (cc << 4) | xc;
|
c = (cc << 4) | xc;
|
||||||
ptr += 4;
|
ptr += 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
if (c > 0x10ffffU) *errorcodeptr = ERR77;
|
if (c > 0x10ffffU) *errorcodeptr = ERR77;
|
||||||
|
@ -3424,7 +3424,7 @@ while (ptr < ptrend)
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
tempptr = ptr;
|
tempptr = ptr;
|
||||||
escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode, options,
|
escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode, options,
|
||||||
cb->cx->extra_options, TRUE, cb);
|
cb->cx->extra_options, TRUE, cb);
|
||||||
|
|
||||||
if (errorcode != 0)
|
if (errorcode != 0)
|
||||||
|
@ -7631,9 +7631,20 @@ for (;; pptr++)
|
||||||
{
|
{
|
||||||
uint32_t ptype = *(++pptr) >> 16;
|
uint32_t ptype = *(++pptr) >> 16;
|
||||||
uint32_t pdata = *pptr & 0xffff;
|
uint32_t pdata = *pptr & 0xffff;
|
||||||
*code++ = (meta_arg == ESC_p)? OP_PROP : OP_NOTPROP;
|
|
||||||
*code++ = ptype;
|
/* The special case of \p{Any} is compiled to OP_ALLANY so as to benefit
|
||||||
*code++ = pdata;
|
from the auto-anchoring code. */
|
||||||
|
|
||||||
|
if (meta_arg == ESC_p && ptype == PT_ANY)
|
||||||
|
{
|
||||||
|
*code++ = OP_ALLANY;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
*code++ = (meta_arg == ESC_p)? OP_PROP : OP_NOTPROP;
|
||||||
|
*code++ = ptype;
|
||||||
|
*code++ = pdata;
|
||||||
|
}
|
||||||
break; /* End META_ESCAPE */
|
break; /* End META_ESCAPE */
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -2170,4 +2170,8 @@
|
||||||
|
|
||||||
/(?'X²ABC'...)/utf
|
/(?'X²ABC'...)/utf
|
||||||
|
|
||||||
|
# -------
|
||||||
|
|
||||||
|
/\p{Any}*xyz/I
|
||||||
|
|
||||||
# End of testinput5
|
# End of testinput5
|
||||||
|
|
|
@ -3294,27 +3294,27 @@ No match
|
||||||
/\p{Any}+\p{Any} \p{Any}+\P{Any} \p{Any}+\p{L&} \p{Any}+\p{L} \p{Any}+\p{Lu} \p{Any}+\p{Han} \p{Any}+\p{Xan} \p{Any}+\p{Xsp} \p{Any}+\p{Xps} \p{Xwd}+\p{Any} \p{Any}+\p{Xuc}/Bx,ucp
|
/\p{Any}+\p{Any} \p{Any}+\P{Any} \p{Any}+\p{L&} \p{Any}+\p{L} \p{Any}+\p{Lu} \p{Any}+\p{Han} \p{Any}+\p{Xan} \p{Any}+\p{Xsp} \p{Any}+\p{Xps} \p{Xwd}+\p{Any} \p{Any}+\p{Xuc}/Bx,ucp
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Bra
|
Bra
|
||||||
prop Any +
|
AllAny+
|
||||||
prop Any
|
AllAny
|
||||||
prop Any +
|
AllAny+
|
||||||
notprop Any
|
notprop Any
|
||||||
prop Any +
|
AllAny+
|
||||||
prop L&
|
prop L&
|
||||||
prop Any +
|
AllAny+
|
||||||
prop L
|
prop L
|
||||||
prop Any +
|
AllAny+
|
||||||
prop Lu
|
prop Lu
|
||||||
prop Any +
|
AllAny+
|
||||||
prop Han
|
prop Han
|
||||||
prop Any +
|
AllAny+
|
||||||
prop Xan
|
prop Xan
|
||||||
prop Any +
|
AllAny+
|
||||||
prop Xsp
|
prop Xsp
|
||||||
prop Any +
|
AllAny+
|
||||||
prop Xps
|
prop Xps
|
||||||
prop Xwd +
|
prop Xwd +
|
||||||
prop Any
|
AllAny
|
||||||
prop Any +
|
AllAny+
|
||||||
prop Xuc
|
prop Xuc
|
||||||
Ket
|
Ket
|
||||||
End
|
End
|
||||||
|
@ -3324,7 +3324,7 @@ No match
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Bra
|
Bra
|
||||||
prop L& +
|
prop L& +
|
||||||
prop Any
|
AllAny
|
||||||
prop L& +
|
prop L& +
|
||||||
prop L&
|
prop L&
|
||||||
notprop L& ++
|
notprop L& ++
|
||||||
|
@ -3355,7 +3355,7 @@ No match
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Bra
|
Bra
|
||||||
prop N +
|
prop N +
|
||||||
prop Any
|
AllAny
|
||||||
prop N +
|
prop N +
|
||||||
prop L&
|
prop L&
|
||||||
prop N ++
|
prop N ++
|
||||||
|
@ -3386,7 +3386,7 @@ No match
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Bra
|
Bra
|
||||||
prop Lu +
|
prop Lu +
|
||||||
prop Any
|
AllAny
|
||||||
prop Lu +
|
prop Lu +
|
||||||
prop L&
|
prop L&
|
||||||
prop Lu +
|
prop Lu +
|
||||||
|
@ -3448,7 +3448,7 @@ No match
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Bra
|
Bra
|
||||||
prop Xan +
|
prop Xan +
|
||||||
prop Any
|
AllAny
|
||||||
prop Xan +
|
prop Xan +
|
||||||
prop L&
|
prop L&
|
||||||
notprop Xan ++
|
notprop Xan ++
|
||||||
|
@ -3479,7 +3479,7 @@ No match
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Bra
|
Bra
|
||||||
prop Xsp +
|
prop Xsp +
|
||||||
prop Any
|
AllAny
|
||||||
prop Xsp ++
|
prop Xsp ++
|
||||||
prop L&
|
prop L&
|
||||||
prop Xsp ++
|
prop Xsp ++
|
||||||
|
@ -3508,7 +3508,7 @@ No match
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Bra
|
Bra
|
||||||
prop Xwd +
|
prop Xwd +
|
||||||
prop Any
|
AllAny
|
||||||
prop Xwd +
|
prop Xwd +
|
||||||
prop L&
|
prop L&
|
||||||
prop Xwd +
|
prop Xwd +
|
||||||
|
@ -3537,7 +3537,7 @@ No match
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Bra
|
Bra
|
||||||
prop Xuc +
|
prop Xuc +
|
||||||
prop Any
|
AllAny
|
||||||
prop Xuc +
|
prop Xuc +
|
||||||
prop L&
|
prop L&
|
||||||
prop Xuc +
|
prop Xuc +
|
||||||
|
@ -4924,4 +4924,13 @@ Failed: error 162 at offset 3: subpattern name expected
|
||||||
/(?'X²ABC'...)/utf
|
/(?'X²ABC'...)/utf
|
||||||
Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?)
|
Failed: error 142 at offset 4: syntax error in subpattern name (missing terminator?)
|
||||||
|
|
||||||
|
# -------
|
||||||
|
|
||||||
|
/\p{Any}*xyz/I
|
||||||
|
Capture group count = 0
|
||||||
|
Compile options: <none>
|
||||||
|
Overall options: anchored
|
||||||
|
Last code unit = 'z'
|
||||||
|
Subject length lower bound = 3
|
||||||
|
|
||||||
# End of testinput5
|
# End of testinput5
|
||||||
|
|
Loading…
Reference in New Issue