Fix handling of global matching in pcre2test when a lookbehind assertion
contains \K.
This commit is contained in:
parent
cda8384a95
commit
7105d249f6
|
@ -70,6 +70,9 @@ lookbehind assertion. This bug was discovered by the LLVM fuzzer.
|
||||||
17. The use of \K in a positive lookbehind assertion in a non-anchored pattern
|
17. The use of \K in a positive lookbehind assertion in a non-anchored pattern
|
||||||
(e.g. /(?<=\Ka)/) could make pcre2grep loop.
|
(e.g. /(?<=\Ka)/) could make pcre2grep loop.
|
||||||
|
|
||||||
|
18. There was a similar problem to 17 in pcre2test for global matches, though
|
||||||
|
the code there did catch the loop.
|
||||||
|
|
||||||
|
|
||||||
Version 10.10 06-March-2015
|
Version 10.10 06-March-2015
|
||||||
---------------------------
|
---------------------------
|
||||||
|
|
|
@ -3557,14 +3557,14 @@ unit widths are that the pointers to the subject, the most recent MARK, and a
|
||||||
callout argument string point to strings of the appropriate width. Casts can be
|
callout argument string point to strings of the appropriate width. Casts can be
|
||||||
used to deal with this.
|
used to deal with this.
|
||||||
|
|
||||||
Argument:
|
Argument:
|
||||||
cb pointer to enumerate block
|
cb pointer to enumerate block
|
||||||
callout_data user data
|
callout_data user data
|
||||||
|
|
||||||
Returns: 0
|
Returns: 0
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static int callout_callback(pcre2_callout_enumerate_block_8 *cb,
|
static int callout_callback(pcre2_callout_enumerate_block_8 *cb,
|
||||||
void *callout_data)
|
void *callout_data)
|
||||||
{
|
{
|
||||||
uint32_t i;
|
uint32_t i;
|
||||||
|
@ -3587,13 +3587,13 @@ if (cb->callout_string != NULL)
|
||||||
}
|
}
|
||||||
fprintf(outfile, "%c ", delimiter);
|
fprintf(outfile, "%c ", delimiter);
|
||||||
}
|
}
|
||||||
else fprintf(outfile, "%d ", cb->callout_number);
|
else fprintf(outfile, "%d ", cb->callout_number);
|
||||||
|
|
||||||
fprintf(outfile, "%.*s\n",
|
fprintf(outfile, "%.*s\n",
|
||||||
(int)((cb->next_item_length == 0)? 1 : cb->next_item_length),
|
(int)((cb->next_item_length == 0)? 1 : cb->next_item_length),
|
||||||
pbuffer8 + cb->pattern_position);
|
pbuffer8 + cb->pattern_position);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -3879,10 +3879,10 @@ if ((pat_patctl.control & CTL_CALLOUT_INFO) != 0)
|
||||||
int len;
|
int len;
|
||||||
fprintf(outfile, "Callout enumerate failed: error %d: ", errorcode);
|
fprintf(outfile, "Callout enumerate failed: error %d: ", errorcode);
|
||||||
if (errorcode < 0)
|
if (errorcode < 0)
|
||||||
{
|
{
|
||||||
PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer);
|
PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer);
|
||||||
PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
|
PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
|
||||||
}
|
}
|
||||||
fprintf(outfile, "\n");
|
fprintf(outfile, "\n");
|
||||||
return PR_SKIP;
|
return PR_SKIP;
|
||||||
}
|
}
|
||||||
|
@ -5684,20 +5684,20 @@ else for (gmatched = 0;; gmatched++)
|
||||||
|
|
||||||
ovector = FLD(match_data, ovector);
|
ovector = FLD(match_data, ovector);
|
||||||
|
|
||||||
/* After the first time round a global loop, save the current ovector[0,1] so
|
/* After the first time round a global loop, for a normal global (/g)
|
||||||
that we can check that they do change each time. Otherwise a matching bug
|
iteration, save the current ovector[0,1] so that we can check that they do
|
||||||
that returns the same string causes an infinite loop. It has happened! */
|
change each time. Otherwise a matching bug that returns the same string
|
||||||
|
causes an infinite loop. It has happened! */
|
||||||
|
|
||||||
if (gmatched > 0)
|
if (gmatched > 0 && (dat_datctl.control & CTL_GLOBAL) != 0)
|
||||||
{
|
{
|
||||||
ovecsave[0] = ovector[0];
|
ovecsave[0] = ovector[0];
|
||||||
ovecsave[1] = ovector[1];
|
ovecsave[1] = ovector[1];
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Set the variables on the first iteration, just to stop a compiler warning
|
/* For altglobal (or first time round the loop), set an "unset" value. */
|
||||||
when ovecsave[] is referenced below. */
|
|
||||||
|
|
||||||
else ovecsave[0] = ovecsave[1] = 0;
|
else ovecsave[0] = ovecsave[1] = PCRE2_UNSET;
|
||||||
|
|
||||||
/* Fill the ovector with junk to detect elements that do not get set
|
/* Fill the ovector with junk to detect elements that do not get set
|
||||||
when they should be. */
|
when they should be. */
|
||||||
|
@ -6169,13 +6169,48 @@ else for (gmatched = 0;; gmatched++)
|
||||||
if (end_offset == ulen) break; /* End of subject */
|
if (end_offset == ulen) break; /* End of subject */
|
||||||
g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
|
g_notempty = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
|
||||||
}
|
}
|
||||||
else g_notempty = 0;
|
|
||||||
|
|
||||||
/* For /g, update the start offset, leaving the rest alone */
|
/* However, even after matching a non-empty string, there is still one
|
||||||
|
tricky case. If a pattern contains \K within a lookbehind assertion at the
|
||||||
|
start, the end of the matched string can be at the offset where the match
|
||||||
|
started. In the case of a normal /g iteration without special action, this
|
||||||
|
leads to a loop that keeps on returning the same substring. The loop would
|
||||||
|
be caught above, but we really want to move on to the next match. */
|
||||||
|
|
||||||
if ((dat_datctl.control & CTL_GLOBAL) != 0) dat_datctl.offset = end_offset;
|
else
|
||||||
|
{
|
||||||
|
g_notempty = 0; /* Set for a "normal" repeat */
|
||||||
|
if ((dat_datctl.control & CTL_GLOBAL) != 0)
|
||||||
|
{
|
||||||
|
PCRE2_SIZE startchar;
|
||||||
|
PCRE2_GET_STARTCHAR(startchar, match_data);
|
||||||
|
if (end_offset <= startchar)
|
||||||
|
{
|
||||||
|
if (startchar >= ulen) break; /* End of subject */
|
||||||
|
end_offset = startchar + 1;
|
||||||
|
if (utf && test_mode != PCRE32_MODE)
|
||||||
|
{
|
||||||
|
if (test_mode == PCRE8_MODE)
|
||||||
|
{
|
||||||
|
for (; end_offset < ulen; end_offset++)
|
||||||
|
if ((((PCRE2_SPTR8)pp)[end_offset] & 0xc0) != 0x80) break;
|
||||||
|
}
|
||||||
|
else /* 16-bit mode */
|
||||||
|
{
|
||||||
|
for (; end_offset < ulen; end_offset++)
|
||||||
|
if ((((PCRE2_SPTR16)pp)[end_offset] & 0xfc00) != 0xdc00) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* For /G, update the pointer and length */
|
/* For /g (global), update the start offset, leaving the rest alone. */
|
||||||
|
|
||||||
|
if ((dat_datctl.control & CTL_GLOBAL) != 0)
|
||||||
|
dat_datctl.offset = end_offset;
|
||||||
|
|
||||||
|
/* For altglobal, just update the pointer and length. */
|
||||||
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
|
@ -4255,4 +4255,12 @@ a random value. /Ix
|
||||||
|
|
||||||
";(?<=()((?3))((?2)))"
|
";(?<=()((?3))((?2)))"
|
||||||
|
|
||||||
|
# Perl loops on this (PCRE2 used to!)
|
||||||
|
|
||||||
|
/(?<=\Ka)/g,aftertext
|
||||||
|
aaaaa
|
||||||
|
|
||||||
|
/(?<=\Ka)/altglobal,aftertext
|
||||||
|
aaaaa
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
|
@ -1641,4 +1641,10 @@
|
||||||
/[A-`]/iB,utf
|
/[A-`]/iB,utf
|
||||||
abcdefghijklmno
|
abcdefghijklmno
|
||||||
|
|
||||||
|
/(?<=\K\x{17f})/g,utf,aftertext
|
||||||
|
\x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
|
||||||
|
|
||||||
|
/(?<=\K\x{17f})/altglobal,utf,aftertext
|
||||||
|
\x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
|
||||||
|
|
||||||
# End of testinput5
|
# End of testinput5
|
||||||
|
|
|
@ -14260,4 +14260,32 @@ Failed: error 115 at offset 15: reference to non-existent subpattern
|
||||||
";(?<=()((?3))((?2)))"
|
";(?<=()((?3))((?2)))"
|
||||||
Failed: error 125 at offset 20: lookbehind assertion is not fixed length
|
Failed: error 125 at offset 20: lookbehind assertion is not fixed length
|
||||||
|
|
||||||
|
# Perl loops on this (PCRE2 used to!)
|
||||||
|
|
||||||
|
/(?<=\Ka)/g,aftertext
|
||||||
|
aaaaa
|
||||||
|
0: a
|
||||||
|
0+ aaaa
|
||||||
|
0: a
|
||||||
|
0+ aaa
|
||||||
|
0: a
|
||||||
|
0+ aa
|
||||||
|
0: a
|
||||||
|
0+ a
|
||||||
|
0: a
|
||||||
|
0+
|
||||||
|
|
||||||
|
/(?<=\Ka)/altglobal,aftertext
|
||||||
|
aaaaa
|
||||||
|
0: a
|
||||||
|
0+ aaaa
|
||||||
|
0: a
|
||||||
|
0+ aaa
|
||||||
|
0: a
|
||||||
|
0+ aa
|
||||||
|
0: a
|
||||||
|
0+ a
|
||||||
|
0: a
|
||||||
|
0+
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
|
@ -4019,4 +4019,30 @@ Failed: error 140 at offset 11: recursion could loop indefinitely
|
||||||
abcdefghijklmno
|
abcdefghijklmno
|
||||||
0: a
|
0: a
|
||||||
|
|
||||||
|
/(?<=\K\x{17f})/g,utf,aftertext
|
||||||
|
\x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
|
||||||
|
0: \x{17f}
|
||||||
|
0+ \x{17f}\x{17f}\x{17f}\x{17f}
|
||||||
|
0: \x{17f}
|
||||||
|
0+ \x{17f}\x{17f}\x{17f}
|
||||||
|
0: \x{17f}
|
||||||
|
0+ \x{17f}\x{17f}
|
||||||
|
0: \x{17f}
|
||||||
|
0+ \x{17f}
|
||||||
|
0: \x{17f}
|
||||||
|
0+
|
||||||
|
|
||||||
|
/(?<=\K\x{17f})/altglobal,utf,aftertext
|
||||||
|
\x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
|
||||||
|
0: \x{17f}
|
||||||
|
0+ \x{17f}\x{17f}\x{17f}\x{17f}
|
||||||
|
0: \x{17f}
|
||||||
|
0+ \x{17f}\x{17f}\x{17f}
|
||||||
|
0: \x{17f}
|
||||||
|
0+ \x{17f}\x{17f}
|
||||||
|
0: \x{17f}
|
||||||
|
0+ \x{17f}
|
||||||
|
0: \x{17f}
|
||||||
|
0+
|
||||||
|
|
||||||
# End of testinput5
|
# End of testinput5
|
||||||
|
|
Loading…
Reference in New Issue