Fix handling of global matching in pcre2test when a lookbehind assertion

contains \K.
This commit is contained in:
Philip.Hazel 2015-04-06 12:16:36 +00:00
parent cda8384a95
commit 7105d249f6
6 changed files with 125 additions and 19 deletions

View File

@ -70,6 +70,9 @@ lookbehind assertion. This bug was discovered by the LLVM fuzzer.
17. The use of \K in a positive lookbehind assertion in a non-anchored pattern 17. The use of \K in a positive lookbehind assertion in a non-anchored pattern
(e.g. /(?<=\Ka)/) could make pcre2grep loop. (e.g. /(?<=\Ka)/) could make pcre2grep loop.
18. There was a similar problem to 17 in pcre2test for global matches, though
the code there did catch the loop.
Version 10.10 06-March-2015 Version 10.10 06-March-2015
--------------------------- ---------------------------

View File

@ -3557,14 +3557,14 @@ unit widths are that the pointers to the subject, the most recent MARK, and a
callout argument string point to strings of the appropriate width. Casts can be callout argument string point to strings of the appropriate width. Casts can be
used to deal with this. used to deal with this.
Argument: Argument:
cb pointer to enumerate block cb pointer to enumerate block
callout_data user data callout_data user data
Returns: 0 Returns: 0
*/ */
static int callout_callback(pcre2_callout_enumerate_block_8 *cb, static int callout_callback(pcre2_callout_enumerate_block_8 *cb,
void *callout_data) void *callout_data)
{ {
uint32_t i; uint32_t i;
@ -3587,13 +3587,13 @@ if (cb->callout_string != NULL)
} }
fprintf(outfile, "%c ", delimiter); fprintf(outfile, "%c ", delimiter);
} }
else fprintf(outfile, "%d ", cb->callout_number); else fprintf(outfile, "%d ", cb->callout_number);
fprintf(outfile, "%.*s\n", fprintf(outfile, "%.*s\n",
(int)((cb->next_item_length == 0)? 1 : cb->next_item_length), (int)((cb->next_item_length == 0)? 1 : cb->next_item_length),
pbuffer8 + cb->pattern_position); pbuffer8 + cb->pattern_position);
return 0; return 0;
} }
@ -3879,10 +3879,10 @@ if ((pat_patctl.control & CTL_CALLOUT_INFO) != 0)
int len; int len;
fprintf(outfile, "Callout enumerate failed: error %d: ", errorcode); fprintf(outfile, "Callout enumerate failed: error %d: ", errorcode);
if (errorcode < 0) if (errorcode < 0)
{ {
PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer); PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer);
PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile); PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
} }
fprintf(outfile, "\n"); fprintf(outfile, "\n");
return PR_SKIP; return PR_SKIP;
} }
@ -5684,20 +5684,20 @@ else for (gmatched = 0;; gmatched++)
ovector = FLD(match_data, ovector); ovector = FLD(match_data, ovector);
/* After the first time round a global loop, save the current ovector[0,1] so /* After the first time round a global loop, for a normal global (/g)
that we can check that they do change each time. Otherwise a matching bug iteration, save the current ovector[0,1] so that we can check that they do
that returns the same string causes an infinite loop. It has happened! */ change each time. Otherwise a matching bug that returns the same string
causes an infinite loop. It has happened! */
if (gmatched > 0) if (gmatched > 0 && (dat_datctl.control & CTL_GLOBAL) != 0)
{ {
ovecsave[0] = ovector[0]; ovecsave[0] = ovector[0];
ovecsave[1] = ovector[1]; ovecsave[1] = ovector[1];
} }
/* Set the variables on the first iteration, just to stop a compiler warning /* For altglobal (or first time round the loop), set an "unset" value. */
when ovecsave[] is referenced below. */
else ovecsave[0] = ovecsave[1] = 0; else ovecsave[0] = ovecsave[1] = PCRE2_UNSET;
/* Fill the ovector with junk to detect elements that do not get set /* Fill the ovector with junk to detect elements that do not get set
when they should be. */ when they should be. */
@ -6169,13 +6169,48 @@ else for (gmatched = 0;; gmatched++)
if (end_offset == ulen) break; /* End of subject */ if (end_offset == ulen) break; /* End of subject */
g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED; g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
} }
else g_notempty = 0;
/* For /g, update the start offset, leaving the rest alone */ /* However, even after matching a non-empty string, there is still one
tricky case. If a pattern contains \K within a lookbehind assertion at the
start, the end of the matched string can be at the offset where the match
started. In the case of a normal /g iteration without special action, this
leads to a loop that keeps on returning the same substring. The loop would
be caught above, but we really want to move on to the next match. */
if ((dat_datctl.control & CTL_GLOBAL) != 0) dat_datctl.offset = end_offset; else
{
g_notempty = 0; /* Set for a "normal" repeat */
if ((dat_datctl.control & CTL_GLOBAL) != 0)
{
PCRE2_SIZE startchar;
PCRE2_GET_STARTCHAR(startchar, match_data);
if (end_offset <= startchar)
{
if (startchar >= ulen) break; /* End of subject */
end_offset = startchar + 1;
if (utf && test_mode != PCRE32_MODE)
{
if (test_mode == PCRE8_MODE)
{
for (; end_offset < ulen; end_offset++)
if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
}
else /* 16-bit mode */
{
for (; end_offset < ulen; end_offset++)
if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
}
}
}
}
}
/* For /G, update the pointer and length */ /* For /g (global), update the start offset, leaving the rest alone. */
if ((dat_datctl.control & CTL_GLOBAL) != 0)
dat_datctl.offset = end_offset;
/* For altglobal, just update the pointer and length. */
else else
{ {

8
testdata/testinput2 vendored
View File

@ -4255,4 +4255,12 @@ a random value. /Ix
";(?<=()((?3))((?2)))" ";(?<=()((?3))((?2)))"
# Perl loops on this (PCRE2 used to!)
/(?<=\Ka)/g,aftertext
aaaaa
/(?<=\Ka)/altglobal,aftertext
aaaaa
# End of testinput2 # End of testinput2

6
testdata/testinput5 vendored
View File

@ -1641,4 +1641,10 @@
/[A-`]/iB,utf /[A-`]/iB,utf
abcdefghijklmno abcdefghijklmno
/(?<=\K\x{17f})/g,utf,aftertext
\x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
/(?<=\K\x{17f})/altglobal,utf,aftertext
\x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
# End of testinput5 # End of testinput5

28
testdata/testoutput2 vendored
View File

@ -14260,4 +14260,32 @@ Failed: error 115 at offset 15: reference to non-existent subpattern
";(?<=()((?3))((?2)))" ";(?<=()((?3))((?2)))"
Failed: error 125 at offset 20: lookbehind assertion is not fixed length Failed: error 125 at offset 20: lookbehind assertion is not fixed length
# Perl loops on this (PCRE2 used to!)
/(?<=\Ka)/g,aftertext
aaaaa
0: a
0+ aaaa
0: a
0+ aaa
0: a
0+ aa
0: a
0+ a
0: a
0+
/(?<=\Ka)/altglobal,aftertext
aaaaa
0: a
0+ aaaa
0: a
0+ aaa
0: a
0+ aa
0: a
0+ a
0: a
0+
# End of testinput2 # End of testinput2

26
testdata/testoutput5 vendored
View File

@ -4019,4 +4019,30 @@ Failed: error 140 at offset 11: recursion could loop indefinitely
abcdefghijklmno abcdefghijklmno
0: a 0: a
/(?<=\K\x{17f})/g,utf,aftertext
\x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
0: \x{17f}
0+ \x{17f}\x{17f}\x{17f}\x{17f}
0: \x{17f}
0+ \x{17f}\x{17f}\x{17f}
0: \x{17f}
0+ \x{17f}\x{17f}
0: \x{17f}
0+ \x{17f}
0: \x{17f}
0+
/(?<=\K\x{17f})/altglobal,utf,aftertext
\x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
0: \x{17f}
0+ \x{17f}\x{17f}\x{17f}\x{17f}
0: \x{17f}
0+ \x{17f}\x{17f}\x{17f}
0: \x{17f}
0+ \x{17f}\x{17f}
0: \x{17f}
0+ \x{17f}
0: \x{17f}
0+
# End of testinput5 # End of testinput5