Fix infelicity in not finding a first character inside a non-assertive group
within a positive assertion.
This commit is contained in:
parent
59d85d7b55
commit
3458a2e2cd
|
@ -70,6 +70,14 @@ pattern, apart from assertions, an incorrect first matching character could be
|
||||||
recorded. For example, for the pattern /(?=(a))\1?b/, "b" was incorrectly set
|
recorded. For example, for the pattern /(?=(a))\1?b/, "b" was incorrectly set
|
||||||
as the first character of a match.
|
as the first character of a match.
|
||||||
|
|
||||||
|
18. Characters in a leading positive assertion are considered for recording a
|
||||||
|
first character of a match when the rest of the pattern does not provide one.
|
||||||
|
However, a character in a non-assertive group within a leading assertion such
|
||||||
|
as in the pattern /(?=(a))\1?b/ caused this process to fail. This was an
|
||||||
|
infelicity rather than an outright bug, because it did not affect the result of
|
||||||
|
a match, just its speed. (In fact, in this case, the starting 'a' was
|
||||||
|
subsequently picked up in the study.)
|
||||||
|
|
||||||
|
|
||||||
Version 10.30 14-August-2017
|
Version 10.30 14-August-2017
|
||||||
----------------------------
|
----------------------------
|
||||||
|
|
|
@ -8106,13 +8106,13 @@ REQ_NONE in the flags.
|
||||||
Arguments:
|
Arguments:
|
||||||
code points to start of compiled pattern
|
code points to start of compiled pattern
|
||||||
flags points to the first code unit flags
|
flags points to the first code unit flags
|
||||||
inassert TRUE if in an assertion
|
inassert non-zero if in an assertion
|
||||||
|
|
||||||
Returns: the fixed first code unit, or 0 with REQ_NONE in flags
|
Returns: the fixed first code unit, or 0 with REQ_NONE in flags
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static uint32_t
|
static uint32_t
|
||||||
find_firstassertedcu(PCRE2_SPTR code, int32_t *flags, BOOL inassert)
|
find_firstassertedcu(PCRE2_SPTR code, int32_t *flags, uint32_t inassert)
|
||||||
{
|
{
|
||||||
uint32_t c = 0;
|
uint32_t c = 0;
|
||||||
int cflags = REQ_NONE;
|
int cflags = REQ_NONE;
|
||||||
|
@ -8139,7 +8139,7 @@ do {
|
||||||
case OP_SCBRAPOS:
|
case OP_SCBRAPOS:
|
||||||
case OP_ASSERT:
|
case OP_ASSERT:
|
||||||
case OP_ONCE:
|
case OP_ONCE:
|
||||||
d = find_firstassertedcu(scode, &dflags, op == OP_ASSERT);
|
d = find_firstassertedcu(scode, &dflags, inassert + ((op==OP_ASSERT)?1:0));
|
||||||
if (dflags < 0)
|
if (dflags < 0)
|
||||||
return 0;
|
return 0;
|
||||||
if (cflags < 0) { c = d; cflags = dflags; }
|
if (cflags < 0) { c = d; cflags = dflags; }
|
||||||
|
@ -8154,7 +8154,7 @@ do {
|
||||||
case OP_PLUS:
|
case OP_PLUS:
|
||||||
case OP_MINPLUS:
|
case OP_MINPLUS:
|
||||||
case OP_POSPLUS:
|
case OP_POSPLUS:
|
||||||
if (!inassert) return 0;
|
if (inassert == 0) return 0;
|
||||||
if (cflags < 0) { c = scode[1]; cflags = 0; }
|
if (cflags < 0) { c = scode[1]; cflags = 0; }
|
||||||
else if (c != scode[1]) return 0;
|
else if (c != scode[1]) return 0;
|
||||||
break;
|
break;
|
||||||
|
@ -8167,7 +8167,7 @@ do {
|
||||||
case OP_PLUSI:
|
case OP_PLUSI:
|
||||||
case OP_MINPLUSI:
|
case OP_MINPLUSI:
|
||||||
case OP_POSPLUSI:
|
case OP_POSPLUSI:
|
||||||
if (!inassert) return 0;
|
if (inassert == 0) return 0;
|
||||||
if (cflags < 0) { c = scode[1]; cflags = REQ_CASELESS; }
|
if (cflags < 0) { c = scode[1]; cflags = REQ_CASELESS; }
|
||||||
else if (c != scode[1]) return 0;
|
else if (c != scode[1]) return 0;
|
||||||
break;
|
break;
|
||||||
|
@ -9674,7 +9674,7 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
|
||||||
actual literals that follow). */
|
actual literals that follow). */
|
||||||
|
|
||||||
if (firstcuflags < 0)
|
if (firstcuflags < 0)
|
||||||
firstcu = find_firstassertedcu(codestart, &firstcuflags, FALSE);
|
firstcu = find_firstassertedcu(codestart, &firstcuflags, 0);
|
||||||
|
|
||||||
/* Save the data for a first code unit. */
|
/* Save the data for a first code unit. */
|
||||||
|
|
||||||
|
|
|
@ -16358,7 +16358,7 @@ Subject length lower bound = 1
|
||||||
"(?=(a))\1?b"I
|
"(?=(a))\1?b"I
|
||||||
Capturing subpattern count = 1
|
Capturing subpattern count = 1
|
||||||
Max back reference = 1
|
Max back reference = 1
|
||||||
Starting code units: a
|
First code unit = 'a'
|
||||||
Last code unit = 'b'
|
Last code unit = 'b'
|
||||||
Subject length lower bound = 1
|
Subject length lower bound = 1
|
||||||
ab
|
ab
|
||||||
|
|
Loading…
Reference in New Issue