Fix infelicity in not finding a first character inside a non-assertive group

within a positive assertion.
This commit is contained in:
Philip.Hazel 2017-12-12 16:23:01 +00:00
parent 59d85d7b55
commit 3458a2e2cd
3 changed files with 15 additions and 7 deletions

View File

@ -70,6 +70,14 @@ pattern, apart from assertions, an incorrect first matching character could be
recorded. For example, for the pattern /(?=(a))\1?b/, "b" was incorrectly set recorded. For example, for the pattern /(?=(a))\1?b/, "b" was incorrectly set
as the first character of a match. as the first character of a match.
18. Characters in a leading positive assertion are considered for recording a
first character of a match when the rest of the pattern does not provide one.
However, a character in a non-assertive group within a leading assertion such
as in the pattern /(?=(a))\1?b/ caused this process to fail. This was an
infelicity rather than an outright bug, because it did not affect the result of
a match, just its speed. (In fact, in this case, the starting 'a' was
subsequently picked up in the study.)
Version 10.30 14-August-2017 Version 10.30 14-August-2017
---------------------------- ----------------------------

View File

@ -8106,13 +8106,13 @@ REQ_NONE in the flags.
Arguments: Arguments:
code points to start of compiled pattern code points to start of compiled pattern
flags points to the first code unit flags flags points to the first code unit flags
inassert TRUE if in an assertion inassert non-zero if in an assertion
Returns: the fixed first code unit, or 0 with REQ_NONE in flags Returns: the fixed first code unit, or 0 with REQ_NONE in flags
*/ */
static uint32_t static uint32_t
find_firstassertedcu(PCRE2_SPTR code, int32_t *flags, BOOL inassert) find_firstassertedcu(PCRE2_SPTR code, int32_t *flags, uint32_t inassert)
{ {
uint32_t c = 0; uint32_t c = 0;
int cflags = REQ_NONE; int cflags = REQ_NONE;
@ -8139,7 +8139,7 @@ do {
case OP_SCBRAPOS: case OP_SCBRAPOS:
case OP_ASSERT: case OP_ASSERT:
case OP_ONCE: case OP_ONCE:
d = find_firstassertedcu(scode, &dflags, op == OP_ASSERT); d = find_firstassertedcu(scode, &dflags, inassert + ((op==OP_ASSERT)?1:0));
if (dflags < 0) if (dflags < 0)
return 0; return 0;
if (cflags < 0) { c = d; cflags = dflags; } if (cflags < 0) { c = d; cflags = dflags; }
@ -8154,7 +8154,7 @@ do {
case OP_PLUS: case OP_PLUS:
case OP_MINPLUS: case OP_MINPLUS:
case OP_POSPLUS: case OP_POSPLUS:
if (!inassert) return 0; if (inassert == 0) return 0;
if (cflags < 0) { c = scode[1]; cflags = 0; } if (cflags < 0) { c = scode[1]; cflags = 0; }
else if (c != scode[1]) return 0; else if (c != scode[1]) return 0;
break; break;
@ -8167,7 +8167,7 @@ do {
case OP_PLUSI: case OP_PLUSI:
case OP_MINPLUSI: case OP_MINPLUSI:
case OP_POSPLUSI: case OP_POSPLUSI:
if (!inassert) return 0; if (inassert == 0) return 0;
if (cflags < 0) { c = scode[1]; cflags = REQ_CASELESS; } if (cflags < 0) { c = scode[1]; cflags = REQ_CASELESS; }
else if (c != scode[1]) return 0; else if (c != scode[1]) return 0;
break; break;
@ -9674,7 +9674,7 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
actual literals that follow). */ actual literals that follow). */
if (firstcuflags < 0) if (firstcuflags < 0)
firstcu = find_firstassertedcu(codestart, &firstcuflags, FALSE); firstcu = find_firstassertedcu(codestart, &firstcuflags, 0);
/* Save the data for a first code unit. */ /* Save the data for a first code unit. */

View File

@ -16358,7 +16358,7 @@ Subject length lower bound = 1
"(?=(a))\1?b"I "(?=(a))\1?b"I
Capturing subpattern count = 1 Capturing subpattern count = 1
Max back reference = 1 Max back reference = 1
Starting code units: a First code unit = 'a'
Last code unit = 'b' Last code unit = 'b'
Subject length lower bound = 1 Subject length lower bound = 1
ab ab