Fix infelicity in not finding a first character inside a non-assertive group

within a positive assertion.
This commit is contained in:
Philip.Hazel 2017-12-12 16:23:01 +00:00
parent 59d85d7b55
commit 3458a2e2cd
3 changed files with 15 additions and 7 deletions

View File

@ -70,6 +70,14 @@ pattern, apart from assertions, an incorrect first matching character could be
recorded. For example, for the pattern /(?=(a))\1?b/, "b" was incorrectly set
as the first character of a match.
18. Characters in a leading positive assertion are considered for recording a
first character of a match when the rest of the pattern does not provide one.
However, a character in a non-assertive group within a leading assertion such
as in the pattern /(?=(a))\1?b/ caused this process to fail. This was an
infelicity rather than an outright bug, because it did not affect the result of
a match, just its speed. (In fact, in this case, the starting 'a' was
subsequently picked up in the study.)
Version 10.30 14-August-2017
----------------------------

View File

@ -8106,13 +8106,13 @@ REQ_NONE in the flags.
Arguments:
code points to start of compiled pattern
flags points to the first code unit flags
inassert TRUE if in an assertion
inassert non-zero if in an assertion
Returns: the fixed first code unit, or 0 with REQ_NONE in flags
*/
static uint32_t
find_firstassertedcu(PCRE2_SPTR code, int32_t *flags, BOOL inassert)
find_firstassertedcu(PCRE2_SPTR code, int32_t *flags, uint32_t inassert)
{
uint32_t c = 0;
int cflags = REQ_NONE;
@ -8139,7 +8139,7 @@ do {
case OP_SCBRAPOS:
case OP_ASSERT:
case OP_ONCE:
d = find_firstassertedcu(scode, &dflags, op == OP_ASSERT);
d = find_firstassertedcu(scode, &dflags, inassert + ((op==OP_ASSERT)?1:0));
if (dflags < 0)
return 0;
if (cflags < 0) { c = d; cflags = dflags; }
@ -8154,7 +8154,7 @@ do {
case OP_PLUS:
case OP_MINPLUS:
case OP_POSPLUS:
if (!inassert) return 0;
if (inassert == 0) return 0;
if (cflags < 0) { c = scode[1]; cflags = 0; }
else if (c != scode[1]) return 0;
break;
@ -8167,7 +8167,7 @@ do {
case OP_PLUSI:
case OP_MINPLUSI:
case OP_POSPLUSI:
if (!inassert) return 0;
if (inassert == 0) return 0;
if (cflags < 0) { c = scode[1]; cflags = REQ_CASELESS; }
else if (c != scode[1]) return 0;
break;
@ -9674,7 +9674,7 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
actual literals that follow). */
if (firstcuflags < 0)
firstcu = find_firstassertedcu(codestart, &firstcuflags, FALSE);
firstcu = find_firstassertedcu(codestart, &firstcuflags, 0);
/* Save the data for a first code unit. */

View File

@ -16358,7 +16358,7 @@ Subject length lower bound = 1
"(?=(a))\1?b"I
Capturing subpattern count = 1
Max back reference = 1
Starting code units: a
First code unit = 'a'
Last code unit = 'b'
Subject length lower bound = 1
ab