Fix multiple multiline matching issues in pcre2grep.

This commit is contained in:
Philip.Hazel 2017-11-13 16:52:39 +00:00
parent 1cc5351f6c
commit 76814d92c7
5 changed files with 177 additions and 42 deletions

View File

@ -45,6 +45,12 @@ misbehaviour for subsequent references to groups that started outside the
recursion. ACCEPT in an assertion now closes only those groups that were recursion. ACCEPT in an assertion now closes only those groups that were
started within that assertion. Fixes oss-fuzz issues 3852 and 3891. started within that assertion. Fixes oss-fuzz issues 3852 and 3891.
13. Multiline matching in pcre2grep was misbehaving if the pattern matched
within a line, and then matched again at the end of the line and over into
subsequent lines. Behaviour was different with and without colouring, and
sometimes context lines were incorrectly printed and/or line endings were lost.
All these issues should now be fixed.
Version 10.30 14-August-2017 Version 10.30 14-August-2017
---------------------------- ----------------------------

View File

@ -248,7 +248,7 @@ echo "---------------------------- Test 35 -----------------------------" >>test
echo "RC=$?" >>testtrygrep echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 36 -----------------------------" >>testtrygrep echo "---------------------------- Test 36 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include=grepinput --exclude 'grepinput$' --exclude=grepinput8 --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep (cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include=grepinput --exclude 'grepinput$' --exclude=grepinput8 --exclude=grepinputM --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
echo "RC=$?" >>testtrygrep echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 37 -----------------------------" >>testtrygrep echo "---------------------------- Test 37 -----------------------------" >>testtrygrep
@ -391,6 +391,12 @@ echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 70 -----------------------------" >>testtrygrep echo "---------------------------- Test 70 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep --color=always -M "triple:\t.*\n\n" ./testdata/grepinput3) >>testtrygrep (cd $srcdir; $valgrind $vjs $pcre2grep --color=always -M "triple:\t.*\n\n" ./testdata/grepinput3) >>testtrygrep
echo "RC=$?" >>testtrygrep echo "RC=$?" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep --color=always -M -n "triple:\t.*\n\n" ./testdata/grepinput3) >>testtrygrep
echo "RC=$?" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -M "triple:\t.*\n\n" ./testdata/grepinput3) >>testtrygrep
echo "RC=$?" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -M -n "triple:\t.*\n\n" ./testdata/grepinput3) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 71 -----------------------------" >>testtrygrep echo "---------------------------- Test 71 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -o "^01|^02|^03" ./testdata/grepinput) >>testtrygrep (cd $srcdir; $valgrind $vjs $pcre2grep -o "^01|^02|^03" ./testdata/grepinput) >>testtrygrep
@ -494,25 +500,25 @@ echo "---------------------------- Test 95 -----------------------------" >>test
echo "RC=$?" >>testtrygrep echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 96 -----------------------------" >>testtrygrep echo "---------------------------- Test 96 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include-dir=testdata --exclude '^(?!grepinput)' 'fox' ./test* | sort) >>testtrygrep (cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include-dir=testdata --exclude '^(?!grepinput)' --exclude=grepinputM 'fox' ./test* | sort) >>testtrygrep
echo "RC=$?" >>testtrygrep echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 97 -----------------------------" >>testtrygrep echo "---------------------------- Test 97 -----------------------------" >>testtrygrep
echo "grepinput$" >testtemp1grep echo "grepinput$" >testtemp1grep
echo "grepinput8" >>testtemp1grep echo "grepinput8" >>testtemp1grep
(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep (cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include=grepinput --exclude=grepinputM --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
echo "RC=$?" >>testtrygrep echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 98 -----------------------------" >>testtrygrep echo "---------------------------- Test 98 -----------------------------" >>testtrygrep
echo "grepinput$" >testtemp1grep echo "grepinput$" >testtemp1grep
echo "grepinput8" >>testtemp1grep echo "grepinput8" >>testtemp1grep
(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --exclude=grepinput3 --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep (cd $srcdir; $valgrind $vjs $pcre2grep -L -r --exclude=grepinput3 --exclude=grepinputM --include=grepinput --exclude-from $builddir/testtemp1grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
echo "RC=$?" >>testtrygrep echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 99 -----------------------------" >>testtrygrep echo "---------------------------- Test 99 -----------------------------" >>testtrygrep
echo "grepinput$" >testtemp1grep echo "grepinput$" >testtemp1grep
echo "grepinput8" >testtemp2grep echo "grepinput8" >testtemp2grep
(cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include grepinput --exclude-from $builddir/testtemp1grep --exclude-from=$builddir/testtemp2grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep (cd $srcdir; $valgrind $vjs $pcre2grep -L -r --include grepinput --exclude=grepinputM --exclude-from $builddir/testtemp1grep --exclude-from=$builddir/testtemp2grep --exclude-dir='^\.' 'fox' ./testdata | sort) >>testtrygrep
echo "RC=$?" >>testtrygrep echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 100 ------------------------------" >>testtrygrep echo "---------------------------- Test 100 ------------------------------" >>testtrygrep
@ -582,7 +588,7 @@ echo "---------------------------- Test 115 -----------------------------" >>tes
echo "RC=$?" >>testtrygrep echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 116 -----------------------------" >>testtrygrep echo "---------------------------- Test 116 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -th 'the' testdata/grepinput*) >>testtrygrep (cd $srcdir; $valgrind $vjs $pcre2grep --exclude=grepinputM -th 'the' testdata/grepinput*) >>testtrygrep
echo "RC=$?" >>testtrygrep echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 117 -----------------------------" >>testtrygrep echo "---------------------------- Test 117 -----------------------------" >>testtrygrep
@ -610,10 +616,20 @@ echo "---------------------------- Test 122 -----------------------------" >>tes
(cd $srcdir; $valgrind $vjs $pcre2grep -w 'cat|dog' testdata/grepinputv) >>testtrygrep (cd $srcdir; $valgrind $vjs $pcre2grep -w 'cat|dog' testdata/grepinputv) >>testtrygrep
echo "RC=$?" >>testtrygrep echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 122 -----------------------------" >>testtrygrep echo "---------------------------- Test 123 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -w 'dog|cat' testdata/grepinputv) >>testtrygrep (cd $srcdir; $valgrind $vjs $pcre2grep -w 'dog|cat' testdata/grepinputv) >>testtrygrep
echo "RC=$?" >>testtrygrep echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 124 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -Mn --colour=always 'start[\s]+end' testdata/grepinputM) >>testtrygrep
echo "RC=$?" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -Mn --colour=always -A2 'start[\s]+end' testdata/grepinputM) >>testtrygrep
echo "RC=$?" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -Mn 'start[\s]+end' testdata/grepinputM) >>testtrygrep
echo "RC=$?" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -Mn -A2 'start[\s]+end' testdata/grepinputM) >>testtrygrep
echo "RC=$?" >>testtrygrep
# Now compare the results. # Now compare the results.

View File

@ -2505,7 +2505,10 @@ while (ptr < endptr)
match = match_patterns(ptr, length, options, startoffset, &mrc); match = match_patterns(ptr, length, options, startoffset, &mrc);
options = PCRE2_NOTEMPTY; options = PCRE2_NOTEMPTY;
/* If it's a match or a not-match (as required), do what's wanted. */ /* If it's a match or a not-match (as required), do what's wanted. NOTE: Use
only FWRITE_IGNORE() - which is just a packaged fwrite() that ignores its
return code - to output data lines, so that binary zeroes are treated as just
another data character. */
if (match != invert) if (match != invert)
{ {
@ -2734,27 +2737,6 @@ while (ptr < endptr)
if (printname != NULL) fprintf(stdout, "%s:", printname); if (printname != NULL) fprintf(stdout, "%s:", printname);
if (number) fprintf(stdout, "%d:", linenumber); if (number) fprintf(stdout, "%d:", linenumber);
/* In multiline mode, we want to print to the end of the line in which
the end of the matched string is found, so we adjust linelength and the
line number appropriately, but only when there actually was a match
(invert not set). Because the PCRE2_FIRSTLINE option is set, the start of
the match will always be before the first newline sequence. */
if (multiline & !invert)
{
char *endmatch = ptr + offsets[1];
t = ptr;
while (t <= endmatch)
{
t = end_of_line(t, endptr, &endlinelength);
if (t < endmatch) linenumber++; else break;
}
linelength = t - ptr - endlinelength;
}
/*** NOTE: Use only fwrite() to output the data line, so that binary
zeroes are treated as just another data character. */
/* This extra option, for Jeffrey Friedl's debugging requirements, /* This extra option, for Jeffrey Friedl's debugging requirements,
replaces the matched string, or a specific captured string if it exists, replaces the matched string, or a specific captured string if it exists,
with X. When this happens, colouring is ignored. */ with X. When this happens, colouring is ignored. */
@ -2771,20 +2753,48 @@ while (ptr < endptr)
else else
#endif #endif
/* We have to split the line(s) up if colouring, and search for further /* In multiline mode, or if colouring, we have to split the line(s) up
matches, but not of course if the line is a non-match. */ and search for further matches, but not of course if the line is a
non-match. In multiline mode this is necessary in case there is another
match that spans the end of the current line. When colouring we want to
colour all matches. */
if (do_colour && !invert) if ((multiline || do_colour) && !invert)
{ {
int plength; int plength;
FWRITE_IGNORE(ptr, 1, offsets[0], stdout); FWRITE_IGNORE(ptr, 1, offsets[0], stdout);
print_match(ptr + offsets[0], offsets[1] - offsets[0]); print_match(ptr + offsets[0], offsets[1] - offsets[0]);
for (;;) for (;;)
{ {
startoffset = offsets[1]; startoffset = offsets[1]; /* Advance after previous match. */
if (startoffset >= linelength + endlinelength ||
!match_patterns(ptr, length, options, startoffset, &mrc)) /* If the current match ended past the end of the line (only possible
break; in multiline mode), we must move on to the line in which it did end
before searching for more matches. Because the PCRE2_FIRSTLINE option
is set, the start of the match will always be before the first
newline sequence. */
while (startoffset > linelength + endlinelength)
{
ptr += linelength + endlinelength;
filepos += (int)(linelength + endlinelength);
linenumber++;
startoffset -= (int)(linelength + endlinelength);
t = end_of_line(ptr, endptr, &endlinelength);
linelength = t - ptr - endlinelength;
length = (size_t)(endptr - ptr);
}
/* If startoffset is at the exact end of the line it means this
complete line was the final part of the match, so there is nothing
more to do. */
if (startoffset == linelength + endlinelength) break;
/* Otherwise, run a match from within the final line, and if found,
loop for any that may follow. */
if (!match_patterns(ptr, length, options, startoffset, &mrc)) break;
FWRITE_IGNORE(ptr + startoffset, 1, offsets[0] - startoffset, stdout); FWRITE_IGNORE(ptr + startoffset, 1, offsets[0] - startoffset, stdout);
print_match(ptr + offsets[0], offsets[1] - offsets[0]); print_match(ptr + offsets[0], offsets[1] - offsets[0]);
} }
@ -2797,7 +2807,7 @@ while (ptr < endptr)
if (plength > 0) FWRITE_IGNORE(ptr + startoffset, 1, plength, stdout); if (plength > 0) FWRITE_IGNORE(ptr + startoffset, 1, plength, stdout);
} }
/* Not colouring; no need to search for further matches */ /* Not colouring or multiline; no need to search for further matches. */
else FWRITE_IGNORE(ptr, 1, linelength + endlinelength, stdout); else FWRITE_IGNORE(ptr, 1, linelength + endlinelength, stdout);
} }

17
testdata/grepinputM vendored Normal file
View File

@ -0,0 +1,17 @@
Data file for multiline tests of multiple matches.
start end in between start
end and following
Other stuff
start end in between start
end and following start
end other stuff
start end in between start
end
** These two lines must be last.
start end in between start
end

96
testdata/grepoutput vendored
View File

@ -487,6 +487,7 @@ RC=0
./testdata/grepinput:456 ./testdata/grepinput:456
./testdata/grepinput3:0 ./testdata/grepinput3:0
./testdata/grepinput8:0 ./testdata/grepinput8:0
./testdata/grepinputM:0
./testdata/grepinputv:1 ./testdata/grepinputv:1
./testdata/grepinputx:0 ./testdata/grepinputx:0
RC=0 RC=0
@ -600,6 +601,33 @@ RC=0
triple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt triple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt
RC=0 RC=0
1:triple: t1_txt s1_tag s_txt p_tag p_txt o_tag o_txt
6:triple: t3_txt s2_tag s_txt p_tag p_txt o_tag o_txt
8:triple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt
13:triple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt
RC=0
triple: t1_txt s1_tag s_txt p_tag p_txt o_tag o_txt
triple: t3_txt s2_tag s_txt p_tag p_txt o_tag o_txt
triple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt
triple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt
RC=0
1:triple: t1_txt s1_tag s_txt p_tag p_txt o_tag o_txt
6:triple: t3_txt s2_tag s_txt p_tag p_txt o_tag o_txt
8:triple: t4_txt s1_tag s_txt p_tag p_txt o_tag o_txt
13:triple: t6_txt s2_tag s_txt p_tag p_txt o_tag o_txt
RC=0
---------------------------- Test 71 ----------------------------- ---------------------------- Test 71 -----------------------------
01 01
RC=0 RC=0
@ -793,21 +821,23 @@ RC=0
37216,12 37216,12
RC=0 RC=0
---------------------------- Test 113 ----------------------------- ---------------------------- Test 113 -----------------------------
478 480
RC=0 RC=0
---------------------------- Test 114 ----------------------------- ---------------------------- Test 114 -----------------------------
testdata/grepinput:469 testdata/grepinput:469
testdata/grepinput3:0 testdata/grepinput3:0
testdata/grepinput8:0 testdata/grepinput8:0
testdata/grepinputM:2
testdata/grepinputv:3 testdata/grepinputv:3
testdata/grepinputx:6 testdata/grepinputx:6
TOTAL:478 TOTAL:480
RC=0 RC=0
---------------------------- Test 115 ----------------------------- ---------------------------- Test 115 -----------------------------
testdata/grepinput:469 testdata/grepinput:469
testdata/grepinputM:2
testdata/grepinputv:3 testdata/grepinputv:3
testdata/grepinputx:6 testdata/grepinputx:6
TOTAL:478 TOTAL:480
RC=0 RC=0
---------------------------- Test 116 ----------------------------- ---------------------------- Test 116 -----------------------------
478 478
@ -816,9 +846,10 @@ RC=0
469 469
0 0
0 0
2
3 3
6 6
478 480
RC=0 RC=0
---------------------------- Test 118 ----------------------------- ---------------------------- Test 118 -----------------------------
testdata/grepinput3 testdata/grepinput3
@ -846,7 +877,62 @@ RC=0
over the lazy dog. over the lazy dog.
The word is cat in this line The word is cat in this line
RC=0 RC=0
---------------------------- Test 122 ----------------------------- ---------------------------- Test 123 -----------------------------
over the lazy dog. over the lazy dog.
The word is cat in this line The word is cat in this line
RC=0 RC=0
---------------------------- Test 124 -----------------------------
3:start end in between start
end and following
7:start end in between start
end and following start
end other stuff
11:start end in between start
end
16:start end in between start
end
RC=0
3:start end in between start
end and following
5-Other stuff
6-
7:start end in between start
end and following start
end other stuff
10-
11:start end in between start
end
14-
15-** These two lines must be last.
16:start end in between start
end
RC=0
3:start end in between start
end and following
7:start end in between start
end and following start
end other stuff
11:start end in between start
end
16:start end in between start
end
RC=0
3:start end in between start
end and following
5-Other stuff
6-
7:start end in between start
end and following start
end other stuff
10-
11:start end in between start
end
14-
15-** These two lines must be last.
16:start end in between start
end
RC=0