diff --git a/ChangeLog b/ChangeLog index 819e7ce..de7dc9d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -147,6 +147,10 @@ checked in RunTest. pcre2_match.c, in anticipation that this is needed for the same reason it was recently added to pcrecpp.cc in PCRE1. +38. Using -o with -M in pcre2grep could cause unnecessary repeated output when +the match extended over a line boundary, as it tried to find more matches "on +the same line" - but it was already over the end. + Version 10.21 12-January-2016 ----------------------------- diff --git a/RunGrepTest b/RunGrepTest index 9ba4bd0..0e80d06 100755 --- a/RunGrepTest +++ b/RunGrepTest @@ -556,6 +556,18 @@ echo "---------------------------- Test 109 -----------------------------" >>tes (cd $srcdir; $valgrind $vjs $pcre2grep -cq lazy ./testdata/grepinput*) >>testtrygrep echo "RC=$?" >>testtrygrep +echo "---------------------------- Test 110 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --om-separator / -Mo0 -o1 -o2 'match (\d+):\n (.)\n' testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 111 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --line-offsets -M 'match (\d+):\n (.)\n' testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 112 -----------------------------" >>testtrygrep +(cd $srcdir; $valgrind $vjs $pcre2grep --file-offsets -M 'match (\d+):\n (.)\n' testdata/grepinput) >>testtrygrep +echo "RC=$?" >>testtrygrep + # Now compare the results. $cf $srcdir/testdata/grepoutput testtrygrep diff --git a/src/pcre2grep.c b/src/pcre2grep.c index 60b6812..2079faa 100644 --- a/src/pcre2grep.c +++ b/src/pcre2grep.c @@ -1886,7 +1886,7 @@ while (ptr < endptr) size_t startoffset = 0; /* At this point, ptr is at the start of a line. We need to find the length - of the subject string to pass to pcre_exec(). In multiline mode, it is the + of the subject string to pass to pcre2_match(). In multiline mode, it is the length remainder of the data in the buffer. Otherwise, it is the length of the next line, excluding the terminating newline. After matching, we always advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE @@ -1977,7 +1977,7 @@ while (ptr < endptr) match = match_patterns(matchptr, length, options, startoffset, &mrc); options = PCRE2_NOTEMPTY; - + /* If it's a match or a not-match (as required), do what's wanted. */ if (match != invert) @@ -2074,14 +2074,22 @@ while (ptr < endptr) fprintf(stdout, STDOUT_NL); } - /* Prepare to repeat to find the next match. If the pattern contained a - lookbehind that included \K, it is possible that the end of the match - might be at or before the actual starting offset we have just used. In - this case, start one character further on. */ + /* Prepare to repeat to find the next match in the line. */ match = FALSE; if (line_buffered) fflush(stdout); rc = 0; /* Had some success */ + + /* If the current match ended past the end of the line (only possible + in multiline mode), we are done with this line. */ + + if (offsets[1] > linelength) goto END_ONE_MATCH; + + /* If the pattern contained a lookbehind that included \K, it is + possible that the end of the match might be at or before the actual + starting offset we have just used. In this case, start one character + further on. */ + startoffset = offsets[1]; /* Restart after the match */ oldstartoffset = pcre2_get_startchar(match_data); if (startoffset <= oldstartoffset) @@ -2786,24 +2794,24 @@ if ((popts & PO_FIXED_STRINGS) != 0) } sprintf((char *)buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]); -p->compiled = pcre2_compile(buffer, -1, options, &errcode, &erroffset, - compile_context); - +p->compiled = pcre2_compile(buffer, PCRE2_ZERO_TERMINATED, options, &errcode, + &erroffset, compile_context); + /* Handle successful compile */ - -if (p->compiled != NULL) + +if (p->compiled != NULL) { #ifdef SUPPORT_PCRE2GREP_JIT if (use_jit) { errcode = pcre2_jit_compile(p->compiled, PCRE2_JIT_COMPLETE); if (errcode == 0) return TRUE; - erroffset = PCRE2_SIZE_MAX; /* Will get reduced to patlen below */ + erroffset = PCRE2_SIZE_MAX; /* Will get reduced to patlen below */ } - else + else #endif return TRUE; - } + } /* Handle compile and JIT compile errors */ diff --git a/testdata/grepinput b/testdata/grepinput index 0f00edd..b01643d 100644 --- a/testdata/grepinput +++ b/testdata/grepinput @@ -604,6 +604,19 @@ AB.VE the turtle 010203040506 +match 1: + a +match 2: + b +match 3: + c +match 4: + d +match 5: + e +Rhubarb +Custard Tart + PUT NEW DATA ABOVE THIS LINE. ============================= diff --git a/testdata/grepoutput b/testdata/grepoutput index de8b8c2..9d41817 100644 --- a/testdata/grepoutput +++ b/testdata/grepoutput @@ -10,7 +10,7 @@ RC=0 7:PATTERN at the start of a line. 8:In the middle of a line, PATTERN appears. 10:This pattern is in lower case. -610:Check up on PATTERN near the end. +623:Check up on PATTERN near the end. RC=0 ---------------------------- Test 4 ------------------------------ 4 @@ -19,7 +19,7 @@ RC=0 ./testdata/grepinput:7:PATTERN at the start of a line. ./testdata/grepinput:8:In the middle of a line, PATTERN appears. ./testdata/grepinput:10:This pattern is in lower case. -./testdata/grepinput:610:Check up on PATTERN near the end. +./testdata/grepinput:623:Check up on PATTERN near the end. ./testdata/grepinputx:3:Here is the pattern again. ./testdata/grepinputx:5:Pattern ./testdata/grepinputx:42:This line contains pattern not on a line by itself. @@ -28,7 +28,7 @@ RC=0 7:PATTERN at the start of a line. 8:In the middle of a line, PATTERN appears. 10:This pattern is in lower case. -610:Check up on PATTERN near the end. +623:Check up on PATTERN near the end. 3:Here is the pattern again. 5:Pattern 42:This line contains pattern not on a line by itself. @@ -324,10 +324,10 @@ RC=0 ./testdata/grepinput-9- ./testdata/grepinput:10:This pattern is in lower case. -- -./testdata/grepinput-607-PUT NEW DATA ABOVE THIS LINE. -./testdata/grepinput-608-============================= -./testdata/grepinput-609- -./testdata/grepinput:610:Check up on PATTERN near the end. +./testdata/grepinput-620-PUT NEW DATA ABOVE THIS LINE. +./testdata/grepinput-621-============================= +./testdata/grepinput-622- +./testdata/grepinput:623:Check up on PATTERN near the end. -- ./testdata/grepinputx-1-This is a second file of input for the pcregrep tests. ./testdata/grepinputx-2- @@ -349,8 +349,8 @@ RC=0 ./testdata/grepinput-12-Here follows a whole lot of stuff that makes the file over 24K long. ./testdata/grepinput-13- -- -./testdata/grepinput:610:Check up on PATTERN near the end. -./testdata/grepinput-611-This is the last line of this file. +./testdata/grepinput:623:Check up on PATTERN near the end. +./testdata/grepinput-624-This is the last line of this file. -- ./testdata/grepinputx:3:Here is the pattern again. ./testdata/grepinputx-4- @@ -756,3 +756,34 @@ RC=0 RC=0 ---------------------------- Test 109 ----------------------------- RC=0 +---------------------------- Test 110 ----------------------------- +match 1: + a +/1/a +match 2: + b +/2/b +match 3: + c +/3/c +match 4: + d +/4/d +match 5: + e +/5/e +RC=0 +---------------------------- Test 111 ----------------------------- +607:0,12 +609:0,12 +611:0,12 +613:0,12 +615:0,12 +RC=0 +---------------------------- Test 112 ----------------------------- +37168,12 +37180,12 +37192,12 +37204,12 +37216,12 +RC=0