Fix \K issues in pcre2grep.

This commit is contained in:
Philip.Hazel 2017-12-26 15:10:04 +00:00
parent 07de1b1a9f
commit 911d43cd1e
4 changed files with 69 additions and 5 deletions

View File

@ -100,6 +100,11 @@ code for -g that handles the case when \K in an assertion causes the match to
end at the original start point. Also arranged for it to detect when \K causes
the end of a match to be before its start.
24. Similar to 23 above, strange things (including loops) could happen in
pcre2grep when \K was used in an assertion when --colour was used or in
multiline mode. The "end at original start point" bug is fixed, and if the end
point is found to be before the start point, they are swapped.
Version 10.30 14-August-2017
----------------------------

View File

@ -630,6 +630,17 @@ echo "RC=$?" >>testtrygrep
(cd $srcdir; $valgrind $vjs $pcre2grep -Mn -A2 'start[\s]+end' testdata/grepinputM) >>testtrygrep
echo "RC=$?" >>testtrygrep
echo "---------------------------- Test 125 -----------------------------" >>testtrygrep
printf "abcd\n" >testNinputgrep
$valgrind $vjs $pcre2grep --colour=always '(?<=\K.)' testNinputgrep >>testtrygrep
echo "RC=$?" >>testtrygrep
$valgrind $vjs $pcre2grep --colour=always '(?=.\K)' testNinputgrep >>testtrygrep
echo "RC=$?" >>testtrygrep
$valgrind $vjs $pcre2grep --colour=always '(?<=\K[ac])' testNinputgrep >>testtrygrep
echo "RC=$?" >>testtrygrep
$valgrind $vjs $pcre2grep --colour=always '(?=[ac]\K)' testNinputgrep >>testtrygrep
echo "RC=$?" >>testtrygrep
# Now compare the results.

View File

@ -2764,11 +2764,38 @@ while (ptr < endptr)
if ((multiline || do_colour) && !invert)
{
int plength;
PCRE2_SIZE endprevious;
/* The use of \K may make the end offset earlier than the start. In
this situation, swap them round. */
if (offsets[0] > offsets[1])
{
PCRE2_SIZE temp = offsets[0];
offsets[0] = offsets[1];
offsets[1] = temp;
}
FWRITE_IGNORE(ptr, 1, offsets[0], stdout);
print_match(ptr + offsets[0], offsets[1] - offsets[0]);
for (;;)
{
startoffset = offsets[1]; /* Advance after previous match. */
PCRE2_SIZE oldstartoffset = pcre2_get_startchar(match_data);
endprevious = offsets[1];
startoffset = endprevious; /* Advance after previous match. */
/* If the pattern contained a lookbehind that included \K, it is
possible that the end of the match might be at or before the actual
starting offset we have just used. In this case, start one character
further on. */
if (startoffset <= oldstartoffset)
{
startoffset = oldstartoffset + 1;
if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
}
/* If the current match ended past the end of the line (only possible
in multiline mode), we must move on to the line in which it did end
@ -2782,6 +2809,7 @@ while (ptr < endptr)
filepos += (int)(linelength + endlinelength);
linenumber++;
startoffset -= (int)(linelength + endlinelength);
endprevious -= (int)(linelength + endlinelength);
t = end_of_line(ptr, endptr, &endlinelength);
linelength = t - ptr - endlinelength;
length = (size_t)(endptr - ptr);
@ -2797,7 +2825,18 @@ while (ptr < endptr)
loop for any that may follow. */
if (!match_patterns(ptr, length, options, startoffset, &mrc)) break;
FWRITE_IGNORE(ptr + startoffset, 1, offsets[0] - startoffset, stdout);
/* The use of \K may make the end offset earlier than the start. In
this situation, swap them round. */
if (offsets[0] > offsets[1])
{
PCRE2_SIZE temp = offsets[0];
offsets[0] = offsets[1];
offsets[1] = temp;
}
FWRITE_IGNORE(ptr + endprevious, 1, offsets[0] - endprevious, stdout);
print_match(ptr + offsets[0], offsets[1] - offsets[0]);
}
@ -2805,8 +2844,8 @@ while (ptr < endptr)
and its line-ending characters (if they matched the pattern), so there
may be no more to print. */
plength = (int)((linelength + endlinelength) - startoffset);
if (plength > 0) FWRITE_IGNORE(ptr + startoffset, 1, plength, stdout);
plength = (int)((linelength + endlinelength) - endprevious);
if (plength > 0) FWRITE_IGNORE(ptr + endprevious, 1, plength, stdout);
}
/* Not colouring or multiline; no need to search for further matches. */

9
testdata/grepoutput vendored
View File

@ -936,3 +936,12 @@ end
16:start end in between start
end
RC=0
---------------------------- Test 125 -----------------------------
abcd
RC=0
abcd
RC=0
abcd
RC=0
abcd
RC=0