Fix handling of global matching in pcre2test when a lookbehind assertion
contains \K.
This commit is contained in:
parent
cda8384a95
commit
7105d249f6
|
@ -70,6 +70,9 @@ lookbehind assertion. This bug was discovered by the LLVM fuzzer.
|
|||
17. The use of \K in a positive lookbehind assertion in a non-anchored pattern
|
||||
(e.g. /(?<=\Ka)/) could make pcre2grep loop.
|
||||
|
||||
18. There was a similar problem to 17 in pcre2test for global matches, though
|
||||
the code there did catch the loop.
|
||||
|
||||
|
||||
Version 10.10 06-March-2015
|
||||
---------------------------
|
||||
|
|
|
@ -3557,14 +3557,14 @@ unit widths are that the pointers to the subject, the most recent MARK, and a
|
|||
callout argument string point to strings of the appropriate width. Casts can be
|
||||
used to deal with this.
|
||||
|
||||
Argument:
|
||||
Argument:
|
||||
cb pointer to enumerate block
|
||||
callout_data user data
|
||||
|
||||
Returns: 0
|
||||
Returns: 0
|
||||
*/
|
||||
|
||||
static int callout_callback(pcre2_callout_enumerate_block_8 *cb,
|
||||
static int callout_callback(pcre2_callout_enumerate_block_8 *cb,
|
||||
void *callout_data)
|
||||
{
|
||||
uint32_t i;
|
||||
|
@ -3587,13 +3587,13 @@ if (cb->callout_string != NULL)
|
|||
}
|
||||
fprintf(outfile, "%c ", delimiter);
|
||||
}
|
||||
else fprintf(outfile, "%d ", cb->callout_number);
|
||||
else fprintf(outfile, "%d ", cb->callout_number);
|
||||
|
||||
fprintf(outfile, "%.*s\n",
|
||||
(int)((cb->next_item_length == 0)? 1 : cb->next_item_length),
|
||||
pbuffer8 + cb->pattern_position);
|
||||
|
||||
return 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -3879,10 +3879,10 @@ if ((pat_patctl.control & CTL_CALLOUT_INFO) != 0)
|
|||
int len;
|
||||
fprintf(outfile, "Callout enumerate failed: error %d: ", errorcode);
|
||||
if (errorcode < 0)
|
||||
{
|
||||
{
|
||||
PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer);
|
||||
PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
|
||||
}
|
||||
}
|
||||
fprintf(outfile, "\n");
|
||||
return PR_SKIP;
|
||||
}
|
||||
|
@ -5684,20 +5684,20 @@ else for (gmatched = 0;; gmatched++)
|
|||
|
||||
ovector = FLD(match_data, ovector);
|
||||
|
||||
/* After the first time round a global loop, save the current ovector[0,1] so
|
||||
that we can check that they do change each time. Otherwise a matching bug
|
||||
that returns the same string causes an infinite loop. It has happened! */
|
||||
/* After the first time round a global loop, for a normal global (/g)
|
||||
iteration, save the current ovector[0,1] so that we can check that they do
|
||||
change each time. Otherwise a matching bug that returns the same string
|
||||
causes an infinite loop. It has happened! */
|
||||
|
||||
if (gmatched > 0)
|
||||
if (gmatched > 0 && (dat_datctl.control & CTL_GLOBAL) != 0)
|
||||
{
|
||||
ovecsave[0] = ovector[0];
|
||||
ovecsave[1] = ovector[1];
|
||||
}
|
||||
|
||||
/* Set the variables on the first iteration, just to stop a compiler warning
|
||||
when ovecsave[] is referenced below. */
|
||||
/* For altglobal (or first time round the loop), set an "unset" value. */
|
||||
|
||||
else ovecsave[0] = ovecsave[1] = 0;
|
||||
else ovecsave[0] = ovecsave[1] = PCRE2_UNSET;
|
||||
|
||||
/* Fill the ovector with junk to detect elements that do not get set
|
||||
when they should be. */
|
||||
|
@ -6169,13 +6169,48 @@ else for (gmatched = 0;; gmatched++)
|
|||
if (end_offset == ulen) break; /* End of subject */
|
||||
g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
|
||||
}
|
||||
else g_notempty = 0;
|
||||
|
||||
/* For /g, update the start offset, leaving the rest alone */
|
||||
/* However, even after matching a non-empty string, there is still one
|
||||
tricky case. If a pattern contains \K within a lookbehind assertion at the
|
||||
start, the end of the matched string can be at the offset where the match
|
||||
started. In the case of a normal /g iteration without special action, this
|
||||
leads to a loop that keeps on returning the same substring. The loop would
|
||||
be caught above, but we really want to move on to the next match. */
|
||||
|
||||
if ((dat_datctl.control & CTL_GLOBAL) != 0) dat_datctl.offset = end_offset;
|
||||
else
|
||||
{
|
||||
g_notempty = 0; /* Set for a "normal" repeat */
|
||||
if ((dat_datctl.control & CTL_GLOBAL) != 0)
|
||||
{
|
||||
PCRE2_SIZE startchar;
|
||||
PCRE2_GET_STARTCHAR(startchar, match_data);
|
||||
if (end_offset <= startchar)
|
||||
{
|
||||
if (startchar >= ulen) break; /* End of subject */
|
||||
end_offset = startchar + 1;
|
||||
if (utf && test_mode != PCRE32_MODE)
|
||||
{
|
||||
if (test_mode == PCRE8_MODE)
|
||||
{
|
||||
for (; end_offset < ulen; end_offset++)
|
||||
if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
|
||||
}
|
||||
else /* 16-bit mode */
|
||||
{
|
||||
for (; end_offset < ulen; end_offset++)
|
||||
if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* For /G, update the pointer and length */
|
||||
/* For /g (global), update the start offset, leaving the rest alone. */
|
||||
|
||||
if ((dat_datctl.control & CTL_GLOBAL) != 0)
|
||||
dat_datctl.offset = end_offset;
|
||||
|
||||
/* For altglobal, just update the pointer and length. */
|
||||
|
||||
else
|
||||
{
|
||||
|
|
|
@ -4255,4 +4255,12 @@ a random value. /Ix
|
|||
|
||||
";(?<=()((?3))((?2)))"
|
||||
|
||||
# Perl loops on this (PCRE2 used to!)
|
||||
|
||||
/(?<=\Ka)/g,aftertext
|
||||
aaaaa
|
||||
|
||||
/(?<=\Ka)/altglobal,aftertext
|
||||
aaaaa
|
||||
|
||||
# End of testinput2
|
||||
|
|
|
@ -1641,4 +1641,10 @@
|
|||
/[A-`]/iB,utf
|
||||
abcdefghijklmno
|
||||
|
||||
/(?<=\K\x{17f})/g,utf,aftertext
|
||||
\x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
|
||||
|
||||
/(?<=\K\x{17f})/altglobal,utf,aftertext
|
||||
\x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
|
||||
|
||||
# End of testinput5
|
||||
|
|
|
@ -14260,4 +14260,32 @@ Failed: error 115 at offset 15: reference to non-existent subpattern
|
|||
";(?<=()((?3))((?2)))"
|
||||
Failed: error 125 at offset 20: lookbehind assertion is not fixed length
|
||||
|
||||
# Perl loops on this (PCRE2 used to!)
|
||||
|
||||
/(?<=\Ka)/g,aftertext
|
||||
aaaaa
|
||||
0: a
|
||||
0+ aaaa
|
||||
0: a
|
||||
0+ aaa
|
||||
0: a
|
||||
0+ aa
|
||||
0: a
|
||||
0+ a
|
||||
0: a
|
||||
0+
|
||||
|
||||
/(?<=\Ka)/altglobal,aftertext
|
||||
aaaaa
|
||||
0: a
|
||||
0+ aaaa
|
||||
0: a
|
||||
0+ aaa
|
||||
0: a
|
||||
0+ aa
|
||||
0: a
|
||||
0+ a
|
||||
0: a
|
||||
0+
|
||||
|
||||
# End of testinput2
|
||||
|
|
|
@ -4019,4 +4019,30 @@ Failed: error 140 at offset 11: recursion could loop indefinitely
|
|||
abcdefghijklmno
|
||||
0: a
|
||||
|
||||
/(?<=\K\x{17f})/g,utf,aftertext
|
||||
\x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
|
||||
0: \x{17f}
|
||||
0+ \x{17f}\x{17f}\x{17f}\x{17f}
|
||||
0: \x{17f}
|
||||
0+ \x{17f}\x{17f}\x{17f}
|
||||
0: \x{17f}
|
||||
0+ \x{17f}\x{17f}
|
||||
0: \x{17f}
|
||||
0+ \x{17f}
|
||||
0: \x{17f}
|
||||
0+
|
||||
|
||||
/(?<=\K\x{17f})/altglobal,utf,aftertext
|
||||
\x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
|
||||
0: \x{17f}
|
||||
0+ \x{17f}\x{17f}\x{17f}\x{17f}
|
||||
0: \x{17f}
|
||||
0+ \x{17f}\x{17f}\x{17f}
|
||||
0: \x{17f}
|
||||
0+ \x{17f}\x{17f}
|
||||
0: \x{17f}
|
||||
0+ \x{17f}
|
||||
0: \x{17f}
|
||||
0+
|
||||
|
||||
# End of testinput5
|
||||
|
|
Loading…
Reference in New Issue