Fix bad interaction between -o and -M in pcre2grep.

This commit is contained in:
Philip.Hazel 2016-06-17 17:37:26 +00:00
parent 4f238daf41
commit b002f44061
5 changed files with 91 additions and 23 deletions

View File

@ -147,6 +147,10 @@ checked in RunTest.
pcre2_match.c, in anticipation that this is needed for the same reason it was pcre2_match.c, in anticipation that this is needed for the same reason it was
recently added to pcrecpp.cc in PCRE1. recently added to pcrecpp.cc in PCRE1.
38. Using -o with -M in pcre2grep could cause unnecessary repeated output when
the match extended over a line boundary, as it tried to find more matches "on
the same line" - but it was already over the end.
Version 10.21 12-January-2016 Version 10.21 12-January-2016
----------------------------- -----------------------------

View File

@ -556,6 +556,18 @@ echo "---------------------------- Test 109 -----------------------------" >>tes
(cd $srcdir; $valgrind $vjs $pcre2grep -cq lazy ./testdata/grepinput*) >>testtrygrep (cd $srcdir; $valgrind $vjs $pcre2grep -cq lazy ./testdata/grepinput*) >>testtrygrep
echo "RC=$?" >>testtrygrep echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 110 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep --om-separator / -Mo0 -o1 -o2 'match (\d+):\n (.)\n' testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 111 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep --line-offsets -M 'match (\d+):\n (.)\n' testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 112 -----------------------------" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep --file-offsets -M 'match (\d+):\n (.)\n' testdata/grepinput) >>testtrygrep
echo "RC=$?" >>testtrygrep
# Now compare the results. # Now compare the results.
$cf $srcdir/testdata/grepoutput testtrygrep $cf $srcdir/testdata/grepoutput testtrygrep

View File

@ -1886,7 +1886,7 @@ while (ptr < endptr)
size_t startoffset = 0; size_t startoffset = 0;
/* At this point, ptr is at the start of a line. We need to find the length /* At this point, ptr is at the start of a line. We need to find the length
of the subject string to pass to pcre_exec(). In multiline mode, it is the of the subject string to pass to pcre2_match(). In multiline mode, it is the
length remainder of the data in the buffer. Otherwise, it is the length of length remainder of the data in the buffer. Otherwise, it is the length of
the next line, excluding the terminating newline. After matching, we always the next line, excluding the terminating newline. After matching, we always
advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE
@ -1977,7 +1977,7 @@ while (ptr < endptr)
match = match_patterns(matchptr, length, options, startoffset, &mrc); match = match_patterns(matchptr, length, options, startoffset, &mrc);
options = PCRE2_NOTEMPTY; options = PCRE2_NOTEMPTY;
/* If it's a match or a not-match (as required), do what's wanted. */ /* If it's a match or a not-match (as required), do what's wanted. */
if (match != invert) if (match != invert)
@ -2074,14 +2074,22 @@ while (ptr < endptr)
fprintf(stdout, STDOUT_NL); fprintf(stdout, STDOUT_NL);
} }
/* Prepare to repeat to find the next match. If the pattern contained a /* Prepare to repeat to find the next match in the line. */
lookbehind that included \K, it is possible that the end of the match
might be at or before the actual starting offset we have just used. In
this case, start one character further on. */
match = FALSE; match = FALSE;
if (line_buffered) fflush(stdout); if (line_buffered) fflush(stdout);
rc = 0; /* Had some success */ rc = 0; /* Had some success */
/* If the current match ended past the end of the line (only possible
in multiline mode), we are done with this line. */
if (offsets[1] > linelength) goto END_ONE_MATCH;
/* If the pattern contained a lookbehind that included \K, it is
possible that the end of the match might be at or before the actual
starting offset we have just used. In this case, start one character
further on. */
startoffset = offsets[1]; /* Restart after the match */ startoffset = offsets[1]; /* Restart after the match */
oldstartoffset = pcre2_get_startchar(match_data); oldstartoffset = pcre2_get_startchar(match_data);
if (startoffset <= oldstartoffset) if (startoffset <= oldstartoffset)
@ -2786,24 +2794,24 @@ if ((popts & PO_FIXED_STRINGS) != 0)
} }
sprintf((char *)buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]); sprintf((char *)buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
p->compiled = pcre2_compile(buffer, -1, options, &errcode, &erroffset, p->compiled = pcre2_compile(buffer, PCRE2_ZERO_TERMINATED, options, &errcode,
compile_context); &erroffset, compile_context);
/* Handle successful compile */ /* Handle successful compile */
if (p->compiled != NULL) if (p->compiled != NULL)
{ {
#ifdef SUPPORT_PCRE2GREP_JIT #ifdef SUPPORT_PCRE2GREP_JIT
if (use_jit) if (use_jit)
{ {
errcode = pcre2_jit_compile(p->compiled, PCRE2_JIT_COMPLETE); errcode = pcre2_jit_compile(p->compiled, PCRE2_JIT_COMPLETE);
if (errcode == 0) return TRUE; if (errcode == 0) return TRUE;
erroffset = PCRE2_SIZE_MAX; /* Will get reduced to patlen below */ erroffset = PCRE2_SIZE_MAX; /* Will get reduced to patlen below */
} }
else else
#endif #endif
return TRUE; return TRUE;
} }
/* Handle compile and JIT compile errors */ /* Handle compile and JIT compile errors */

13
testdata/grepinput vendored
View File

@ -604,6 +604,19 @@ AB.VE the turtle
010203040506 010203040506
match 1:
a
match 2:
b
match 3:
c
match 4:
d
match 5:
e
Rhubarb
Custard Tart
PUT NEW DATA ABOVE THIS LINE. PUT NEW DATA ABOVE THIS LINE.
============================= =============================

49
testdata/grepoutput vendored
View File

@ -10,7 +10,7 @@ RC=0
7:PATTERN at the start of a line. 7:PATTERN at the start of a line.
8:In the middle of a line, PATTERN appears. 8:In the middle of a line, PATTERN appears.
10:This pattern is in lower case. 10:This pattern is in lower case.
610:Check up on PATTERN near the end. 623:Check up on PATTERN near the end.
RC=0 RC=0
---------------------------- Test 4 ------------------------------ ---------------------------- Test 4 ------------------------------
4 4
@ -19,7 +19,7 @@ RC=0
./testdata/grepinput:7:PATTERN at the start of a line. ./testdata/grepinput:7:PATTERN at the start of a line.
./testdata/grepinput:8:In the middle of a line, PATTERN appears. ./testdata/grepinput:8:In the middle of a line, PATTERN appears.
./testdata/grepinput:10:This pattern is in lower case. ./testdata/grepinput:10:This pattern is in lower case.
./testdata/grepinput:610:Check up on PATTERN near the end. ./testdata/grepinput:623:Check up on PATTERN near the end.
./testdata/grepinputx:3:Here is the pattern again. ./testdata/grepinputx:3:Here is the pattern again.
./testdata/grepinputx:5:Pattern ./testdata/grepinputx:5:Pattern
./testdata/grepinputx:42:This line contains pattern not on a line by itself. ./testdata/grepinputx:42:This line contains pattern not on a line by itself.
@ -28,7 +28,7 @@ RC=0
7:PATTERN at the start of a line. 7:PATTERN at the start of a line.
8:In the middle of a line, PATTERN appears. 8:In the middle of a line, PATTERN appears.
10:This pattern is in lower case. 10:This pattern is in lower case.
610:Check up on PATTERN near the end. 623:Check up on PATTERN near the end.
3:Here is the pattern again. 3:Here is the pattern again.
5:Pattern 5:Pattern
42:This line contains pattern not on a line by itself. 42:This line contains pattern not on a line by itself.
@ -324,10 +324,10 @@ RC=0
./testdata/grepinput-9- ./testdata/grepinput-9-
./testdata/grepinput:10:This pattern is in lower case. ./testdata/grepinput:10:This pattern is in lower case.
-- --
./testdata/grepinput-607-PUT NEW DATA ABOVE THIS LINE. ./testdata/grepinput-620-PUT NEW DATA ABOVE THIS LINE.
./testdata/grepinput-608-============================= ./testdata/grepinput-621-=============================
./testdata/grepinput-609- ./testdata/grepinput-622-
./testdata/grepinput:610:Check up on PATTERN near the end. ./testdata/grepinput:623:Check up on PATTERN near the end.
-- --
./testdata/grepinputx-1-This is a second file of input for the pcregrep tests. ./testdata/grepinputx-1-This is a second file of input for the pcregrep tests.
./testdata/grepinputx-2- ./testdata/grepinputx-2-
@ -349,8 +349,8 @@ RC=0
./testdata/grepinput-12-Here follows a whole lot of stuff that makes the file over 24K long. ./testdata/grepinput-12-Here follows a whole lot of stuff that makes the file over 24K long.
./testdata/grepinput-13- ./testdata/grepinput-13-
-- --
./testdata/grepinput:610:Check up on PATTERN near the end. ./testdata/grepinput:623:Check up on PATTERN near the end.
./testdata/grepinput-611-This is the last line of this file. ./testdata/grepinput-624-This is the last line of this file.
-- --
./testdata/grepinputx:3:Here is the pattern again. ./testdata/grepinputx:3:Here is the pattern again.
./testdata/grepinputx-4- ./testdata/grepinputx-4-
@ -756,3 +756,34 @@ RC=0
RC=0 RC=0
---------------------------- Test 109 ----------------------------- ---------------------------- Test 109 -----------------------------
RC=0 RC=0
---------------------------- Test 110 -----------------------------
match 1:
a
/1/a
match 2:
b
/2/b
match 3:
c
/3/c
match 4:
d
/4/d
match 5:
e
/5/e
RC=0
---------------------------- Test 111 -----------------------------
607:0,12
609:0,12
611:0,12
613:0,12
615:0,12
RC=0
---------------------------- Test 112 -----------------------------
37168,12
37180,12
37192,12
37204,12
37216,12
RC=0