Fix incorrect computation of group length when one branch exceeded 65535.

This commit is contained in:
Philip.Hazel 2019-08-03 08:30:40 +00:00
parent 81ad92820a
commit 59c7c5d100
4 changed files with 57 additions and 11 deletions

View File

@ -124,6 +124,11 @@ available directly).
26. Make pcre2test -C show if libreadline or libedit is supported. 26. Make pcre2test -C show if libreadline or libedit is supported.
28. If the length of one branch of a group exceeded 65535 (the maximum value
that is remembered as a minimum length), the whole group's length was
incorrectly recorded as 65535, leading to incorrect "no match" when start-up
optimizations were in force.
Version 10.33 16-April-2019 Version 10.33 16-April-2019
--------------------------- ---------------------------

View File

@ -105,6 +105,7 @@ find_minlength(const pcre2_real_code *re, PCRE2_SPTR code,
int *backref_cache) int *backref_cache)
{ {
int length = -1; int length = -1;
int branchlength = 0;
int prev_cap_recno = -1; int prev_cap_recno = -1;
int prev_cap_d = 0; int prev_cap_d = 0;
int prev_recurse_recno = -1; int prev_recurse_recno = -1;
@ -112,9 +113,9 @@ int prev_recurse_d = 0;
uint32_t once_fudge = 0; uint32_t once_fudge = 0;
BOOL had_recurse = FALSE; BOOL had_recurse = FALSE;
BOOL dupcapused = (re->flags & PCRE2_DUPCAPUSED) != 0; BOOL dupcapused = (re->flags & PCRE2_DUPCAPUSED) != 0;
recurse_check this_recurse; PCRE2_SPTR nextbranch = code + GET(code, 1);
int branchlength = 0;
PCRE2_UCHAR *cc = (PCRE2_UCHAR *)code + 1 + LINK_SIZE; PCRE2_UCHAR *cc = (PCRE2_UCHAR *)code + 1 + LINK_SIZE;
recurse_check this_recurse;
/* If this is a "could be empty" group, its minimum length is 0. */ /* If this is a "could be empty" group, its minimum length is 0. */
@ -130,16 +131,20 @@ if ((*countptr)++ > 1000) return -1;
/* Scan along the opcodes for this branch. If we get to the end of the branch, /* Scan along the opcodes for this branch. If we get to the end of the branch,
check the length against that of the other branches. If the accumulated length check the length against that of the other branches. If the accumulated length
passes 16-bits, stop. */ passes 16-bits, reset to that value and skip the rest of the branch. */
for (;;) for (;;)
{ {
int d, min, recno; int d, min, recno;
PCRE2_UCHAR *cs, *ce; PCRE2_UCHAR op, *cs, *ce;
PCRE2_UCHAR op = *cc;
if (branchlength >= UINT16_MAX) return UINT16_MAX; if (branchlength >= UINT16_MAX)
{
branchlength = UINT16_MAX;
cc = (PCRE2_UCHAR *)nextbranch;
}
op = *cc;
switch (op) switch (op)
{ {
case OP_COND: case OP_COND:
@ -229,6 +234,7 @@ for (;;)
if (length < 0 || (!had_recurse && branchlength < length)) if (length < 0 || (!had_recurse && branchlength < length))
length = branchlength; length = branchlength;
if (op != OP_ALT) return length; if (op != OP_ALT) return length;
nextbranch = cc + GET(cc, 1);
cc += 1 + LINK_SIZE; cc += 1 + LINK_SIZE;
branchlength = 0; branchlength = 0;
had_recurse = FALSE; had_recurse = FALSE;
@ -241,7 +247,7 @@ for (;;)
case OP_ASSERTBACK: case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT: case OP_ASSERTBACK_NOT:
case OP_ASSERT_NA: case OP_ASSERT_NA:
case OP_ASSERTBACK_NA: case OP_ASSERTBACK_NA:
do cc += GET(cc, 1); while (*cc == OP_ALT); do cc += GET(cc, 1); while (*cc == OP_ALT);
/* Fall through */ /* Fall through */
@ -1091,7 +1097,7 @@ do
case OP_ONCE: case OP_ONCE:
case OP_SCRIPT_RUN: case OP_SCRIPT_RUN:
case OP_ASSERT: case OP_ASSERT:
case OP_ASSERT_NA: case OP_ASSERT_NA:
rc = set_start_bits(re, tcode, utf); rc = set_start_bits(re, tcode, utf);
if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc; if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
if (rc == SSB_DONE) try_next = FALSE; else if (rc == SSB_DONE) try_next = FALSE; else
@ -1134,7 +1140,7 @@ do
case OP_ASSERT_NOT: case OP_ASSERT_NOT:
case OP_ASSERTBACK: case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT: case OP_ASSERTBACK_NOT:
case OP_ASSERTBACK_NA: case OP_ASSERTBACK_NA:
do tcode += GET(tcode, 1); while (*tcode == OP_ALT); do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
tcode += 1 + LINK_SIZE; tcode += 1 + LINK_SIZE;
break; break;
@ -1584,9 +1590,9 @@ return yield;
/* This function is handed a compiled expression that it must study to produce /* This function is handed a compiled expression that it must study to produce
information that will speed up the matching. information that will speed up the matching.
Argument: Argument:
re points to the compiled expression re points to the compiled expression
Returns: 0 normally; non-zero should never normally occur Returns: 0 normally; non-zero should never normally occur
1 unknown opcode in set_start_bits 1 unknown opcode in set_start_bits
2 missing capturing bracket 2 missing capturing bracket

8
testdata/testinput2 vendored
View File

@ -5740,4 +5740,12 @@ a)"xI
/c*+/ /c*+/
ab\=ph,offset=2 ab\=ph,offset=2
/\A\s*(a|(?:[^`]{28500}){4})/I
a
/\A\s*((?:[^`]{28500}){4})/I
/\A\s*((?:[^`]{28500}){4}|a)/I
a
# End of testinput2 # End of testinput2

27
testdata/testoutput2 vendored
View File

@ -17267,6 +17267,33 @@ Partial match:
ab\=ph,offset=2 ab\=ph,offset=2
Partial match: Partial match:
/\A\s*(a|(?:[^`]{28500}){4})/I
Capture group count = 1
Max lookbehind = 1
Compile options: <none>
Overall options: anchored
Subject length lower bound = 1
a
0: a
1: a
/\A\s*((?:[^`]{28500}){4})/I
Capture group count = 1
Max lookbehind = 1
Compile options: <none>
Overall options: anchored
Subject length lower bound = 65535
/\A\s*((?:[^`]{28500}){4}|a)/I
Capture group count = 1
Max lookbehind = 1
Compile options: <none>
Overall options: anchored
Subject length lower bound = 1
a
0: a
1: a
# End of testinput2 # End of testinput2
Error -70: PCRE2_ERROR_BADDATA (unknown error number) Error -70: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data Error -62: bad serialized data