Fix handling of global matching in pcre2test when a lookbehind assertion

contains \K.
This commit is contained in:
Philip.Hazel 2015-04-06 12:16:36 +00:00
parent cda8384a95
commit 7105d249f6
6 changed files with 125 additions and 19 deletions

View File

@ -70,6 +70,9 @@ lookbehind assertion. This bug was discovered by the LLVM fuzzer.
17. The use of \K in a positive lookbehind assertion in a non-anchored pattern
(e.g. /(?<=\Ka)/) could make pcre2grep loop.
18. There was a similar problem to 17 in pcre2test for global matches, though
the code there did catch the loop.
Version 10.10 06-March-2015
---------------------------

View File

@ -3557,14 +3557,14 @@ unit widths are that the pointers to the subject, the most recent MARK, and a
callout argument string point to strings of the appropriate width. Casts can be
used to deal with this.
Argument:
Argument:
cb pointer to enumerate block
callout_data user data
Returns: 0
Returns: 0
*/
static int callout_callback(pcre2_callout_enumerate_block_8 *cb,
static int callout_callback(pcre2_callout_enumerate_block_8 *cb,
void *callout_data)
{
uint32_t i;
@ -3587,13 +3587,13 @@ if (cb->callout_string != NULL)
}
fprintf(outfile, "%c ", delimiter);
}
else fprintf(outfile, "%d ", cb->callout_number);
else fprintf(outfile, "%d ", cb->callout_number);
fprintf(outfile, "%.*s\n",
(int)((cb->next_item_length == 0)? 1 : cb->next_item_length),
pbuffer8 + cb->pattern_position);
return 0;
return 0;
}
@ -3879,10 +3879,10 @@ if ((pat_patctl.control & CTL_CALLOUT_INFO) != 0)
int len;
fprintf(outfile, "Callout enumerate failed: error %d: ", errorcode);
if (errorcode < 0)
{
{
PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer);
PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
}
}
fprintf(outfile, "\n");
return PR_SKIP;
}
@ -5684,20 +5684,20 @@ else for (gmatched = 0;; gmatched++)
ovector = FLD(match_data, ovector);
/* After the first time round a global loop, save the current ovector[0,1] so
that we can check that they do change each time. Otherwise a matching bug
that returns the same string causes an infinite loop. It has happened! */
/* After the first time round a global loop, for a normal global (/g)
iteration, save the current ovector[0,1] so that we can check that they do
change each time. Otherwise a matching bug that returns the same string
causes an infinite loop. It has happened! */
if (gmatched > 0)
if (gmatched > 0 && (dat_datctl.control & CTL_GLOBAL) != 0)
{
ovecsave[0] = ovector[0];
ovecsave[1] = ovector[1];
}
/* Set the variables on the first iteration, just to stop a compiler warning
when ovecsave[] is referenced below. */
/* For altglobal (or first time round the loop), set an "unset" value. */
else ovecsave[0] = ovecsave[1] = 0;
else ovecsave[0] = ovecsave[1] = PCRE2_UNSET;
/* Fill the ovector with junk to detect elements that do not get set
when they should be. */
@ -6169,13 +6169,48 @@ else for (gmatched = 0;; gmatched++)
if (end_offset == ulen) break; /* End of subject */
g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
}
else g_notempty = 0;
/* For /g, update the start offset, leaving the rest alone */
/* However, even after matching a non-empty string, there is still one
tricky case. If a pattern contains \K within a lookbehind assertion at the
start, the end of the matched string can be at the offset where the match
started. In the case of a normal /g iteration without special action, this
leads to a loop that keeps on returning the same substring. The loop would
be caught above, but we really want to move on to the next match. */
if ((dat_datctl.control & CTL_GLOBAL) != 0) dat_datctl.offset = end_offset;
else
{
g_notempty = 0; /* Set for a "normal" repeat */
if ((dat_datctl.control & CTL_GLOBAL) != 0)
{
PCRE2_SIZE startchar;
PCRE2_GET_STARTCHAR(startchar, match_data);
if (end_offset <= startchar)
{
if (startchar >= ulen) break; /* End of subject */
end_offset = startchar + 1;
if (utf && test_mode != PCRE32_MODE)
{
if (test_mode == PCRE8_MODE)
{
for (; end_offset < ulen; end_offset++)
if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
}
else /* 16-bit mode */
{
for (; end_offset < ulen; end_offset++)
if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
}
}
}
}
}
/* For /G, update the pointer and length */
/* For /g (global), update the start offset, leaving the rest alone. */
if ((dat_datctl.control & CTL_GLOBAL) != 0)
dat_datctl.offset = end_offset;
/* For altglobal, just update the pointer and length. */
else
{

8
testdata/testinput2 vendored
View File

@ -4255,4 +4255,12 @@ a random value. /Ix
";(?<=()((?3))((?2)))"
# Perl loops on this (PCRE2 used to!)
/(?<=\Ka)/g,aftertext
aaaaa
/(?<=\Ka)/altglobal,aftertext
aaaaa
# End of testinput2

6
testdata/testinput5 vendored
View File

@ -1641,4 +1641,10 @@
/[A-`]/iB,utf
abcdefghijklmno
/(?<=\K\x{17f})/g,utf,aftertext
\x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
/(?<=\K\x{17f})/altglobal,utf,aftertext
\x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
# End of testinput5

28
testdata/testoutput2 vendored
View File

@ -14260,4 +14260,32 @@ Failed: error 115 at offset 15: reference to non-existent subpattern
";(?<=()((?3))((?2)))"
Failed: error 125 at offset 20: lookbehind assertion is not fixed length
# Perl loops on this (PCRE2 used to!)
/(?<=\Ka)/g,aftertext
aaaaa
0: a
0+ aaaa
0: a
0+ aaa
0: a
0+ aa
0: a
0+ a
0: a
0+
/(?<=\Ka)/altglobal,aftertext
aaaaa
0: a
0+ aaaa
0: a
0+ aaa
0: a
0+ aa
0: a
0+ a
0: a
0+
# End of testinput2

26
testdata/testoutput5 vendored
View File

@ -4019,4 +4019,30 @@ Failed: error 140 at offset 11: recursion could loop indefinitely
abcdefghijklmno
0: a
/(?<=\K\x{17f})/g,utf,aftertext
\x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
0: \x{17f}
0+ \x{17f}\x{17f}\x{17f}\x{17f}
0: \x{17f}
0+ \x{17f}\x{17f}\x{17f}
0: \x{17f}
0+ \x{17f}\x{17f}
0: \x{17f}
0+ \x{17f}
0: \x{17f}
0+
/(?<=\K\x{17f})/altglobal,utf,aftertext
\x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
0: \x{17f}
0+ \x{17f}\x{17f}\x{17f}\x{17f}
0: \x{17f}
0+ \x{17f}\x{17f}\x{17f}
0: \x{17f}
0+ \x{17f}\x{17f}
0: \x{17f}
0+ \x{17f}
0: \x{17f}
0+
# End of testinput5