Fix pcre2grep loop provoked by \K in a lookbehind assertion in a non-anchored

pattern.
This commit is contained in:
Philip.Hazel 2015-04-03 11:14:19 +00:00
parent ade67323e7
commit cda8384a95
6 changed files with 49 additions and 5 deletions

View File

@ -27,7 +27,7 @@ error about an unsupported item.
8. For some types of pattern, for example /Z*(|d*){216}/, the auto-
possessification code could take exponential time to complete. A recursion
depth limit of 10000 has been imposed to limit the resources used by this
depth limit of 1000 has been imposed to limit the resources used by this
optimization. This infelicity was discovered by the LLVM fuzzer.
9. A pattern such as /(*UTF)[\S\V\H]/, which contains a negated special class
@ -67,6 +67,9 @@ have either been changed to uint32_t or commented as "must be signed".
caused a stack overflow instead of the diagnosis of a non-fixed length
lookbehind assertion. This bug was discovered by the LLVM fuzzer.
17. The use of \K in a positive lookbehind assertion in a non-anchored pattern
(e.g. /(?<=\Ka)/) could make pcre2grep loop.
Version 10.10 06-March-2015
---------------------------

View File

@ -524,6 +524,11 @@ echo "---------------------------- Test 106 -----------------------------" >>tes
(cd $srcdir; echo "a" | $valgrind $pcre2grep -M "|a" ) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 107 -----------------------------" >>testtrygrep
echo "a" >testtemp1grep
echo "aaaaa" >>testtemp1grep
(cd $srcdir; $valgrind $pcre2grep --line-offsets '(?<=\Ka)' testtemp1grep) >>testtrygrep 2>&1
echo "RC=$?" >>testtrygrep
# Now compare the results.
@ -544,6 +549,10 @@ if [ $utf8 -ne 0 ] ; then
(cd $srcdir; $valgrind $pcre2grep -n -u -C 3 --newline=any "Match" ./testdata/grepinput8) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test U3 ------------------------------" >>testtrygrep
(cd $srcdir; $valgrind $pcre2grep --line-offsets -u '(?<=\K\x{17f})' ./testdata/grepinput8) >>testtrygrep
echo "RC=$?" >>testtrygrep
$cf $srcdir/testdata/grepoutput8 testtrygrep
if [ $? != 0 ] ; then exit 1; fi

View File

@ -1433,7 +1433,7 @@ Returns: TRUE if there was a match
static BOOL
match_patterns(char *matchptr, size_t length, unsigned int options,
int startoffset, int *mrc)
size_t startoffset, int *mrc)
{
int i;
size_t slen = length;
@ -1581,12 +1581,12 @@ while (ptr < endptr)
{
int endlinelength;
int mrc = 0;
int startoffset = 0;
unsigned int options = 0;
BOOL match;
char *matchptr = ptr;
char *t = ptr;
size_t length, linelength;
size_t startoffset = 0;
/* At this point, ptr is at the start of a line. We need to find the length
of the subject string to pass to pcre_exec(). In multiline mode, it is the
@ -1729,6 +1729,8 @@ while (ptr < endptr)
{
if (!invert)
{
size_t oldstartoffset;
if (printname != NULL) fprintf(stdout, "%s:", printname);
if (number) fprintf(stdout, "%d:", linenumber);
@ -1772,12 +1774,23 @@ while (ptr < endptr)
if (printed || printname != NULL || number) fprintf(stdout, "\n");
}
/* Prepare to repeat to find the next match */
/* Prepare to repeat to find the next match. If the pattern contained a
lookbehind that included \K, it is possible that the end of the match
might be at or before the actual starting offset we have just used. In
this case, start one character further on. */
match = FALSE;
if (line_buffered) fflush(stdout);
rc = 0; /* Had some success */
startoffset = offsets[1]; /* Restart after the match */
oldstartoffset = pcre2_get_startchar(match_data);
if (startoffset <= oldstartoffset)
{
if (startoffset >= length) goto END_ONE_MATCH; /* Were at end */
startoffset = oldstartoffset + 1;
if (utf)
while ((matchptr[startoffset] & 0xc0) == 0x80) startoffset++;
}
goto ONLY_MATCHING_RESTART;
}
}
@ -1917,7 +1930,7 @@ while (ptr < endptr)
for (;;)
{
startoffset = offsets[1];
if (startoffset >= (int)linelength + endlinelength ||
if (startoffset >= linelength + endlinelength ||
!match_patterns(matchptr, length, options, startoffset, &mrc))
break;
FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
@ -1973,6 +1986,7 @@ while (ptr < endptr)
/* Advance to after the newline and increment the line number. The file
offset to the current line is maintained in filepos. */
END_ONE_MATCH:
ptr += linelength + endlinelength;
filepos += (int)(linelength + endlinelength);
linenumber++;

2
testdata/grepinput8 vendored
View File

@ -9,3 +9,5 @@ After 111
After 222After 333
And so on and so on
And so on and so on
ſ
ſſſſſ

8
testdata/grepoutput vendored
View File

@ -743,3 +743,11 @@ RC=0
---------------------------- Test 106 -----------------------------
a
RC=0
---------------------------- Test 107 -----------------------------
1:0,1
2:0,1
2:1,1
2:2,1
2:3,1
2:4,1
RC=0

View File

@ -10,3 +10,11 @@ RC=0
16-After 111
17-After 22218-After 333
RC=0
---------------------------- Test U3 ------------------------------
12:0,2
13:0,2
13:2,2
13:4,2
13:6,2
13:8,2
RC=0