From 7db87842963394d250b8405cdb513a502b479005 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= Date: Tue, 9 Nov 2021 08:57:48 -0800 Subject: [PATCH] pcre2grep: correctly handle multiple passes (#35) * tests: use a explicit filehandle to share in testing -m The way stdin is shared to all participants of a subshell varies per shell, and at least the standard /bin/sh in Solaris seem to create a new copy for each command, defeating the purpose of the test. Use instead exec to create a filehandle that could then be used explicitly in the test to confirm that the stream is set. * pcre2grep: correctly handle multiple passes When the -m option is used, pcre2grep is meant to exit after enough matches are found but while leaving the stream pinned to the next position after the last match. Unfortunately, it wasn't tracking correctly the beginning of the stream on subsequent passes, and therefore it will fail to use the right seek value. Grab the position of the stream at the beginning and while at it, make sure that the stream passed hasn't been consumed already. --- RunGrepTest | 6 +++++- src/pcre2grep.c | 15 ++++++++++++--- testdata/grepoutput | 11 +++++++++++ 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/RunGrepTest b/RunGrepTest index 25f69bd..9682c5f 100755 --- a/RunGrepTest +++ b/RunGrepTest @@ -674,10 +674,14 @@ echo "---------------------------- Test 131 -----------------------------" >>tes echo "RC=$?" >>testtrygrep echo "---------------------------- Test 132 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $vjs $pcre2grep -m1 -A3 '^match'; echo '---'; head -1) <$srcdir/testdata/grepinput >>testtrygrep 2>&1 +(cd $srcdir; exec 3<$srcdir/testdata/grepinput; $valgrind $vjs $pcre2grep -m1 -A3 '^match' <&3; echo '---'; head -1 <&3; exec 3<&-) >>testtrygrep 2>&1 echo "RC=$?" >>testtrygrep echo "---------------------------- Test 133 -----------------------------" >>testtrygrep +(cd $srcdir; exec 3<$srcdir/testdata/grepinput; $valgrind $vjs $pcre2grep -m1 -A3 '^match' <&3; echo '---'; $valgrind $vjs $pcre2grep -m1 -A3 '^match' <&3; exec 3<&-) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 134 -----------------------------" >>testtrygrep (cd $srcdir; $valgrind $vjs $pcre2grep -m1 -O '=$x{41}$x423$o{103}$o1045=' 'fox') <$srcdir/testdata/grepinputv >>testtrygrep 2>&1 echo "RC=$?" >>testtrygrep diff --git a/src/pcre2grep.c b/src/pcre2grep.c index aa84ea7..519346b 100644 --- a/src/pcre2grep.c +++ b/src/pcre2grep.c @@ -2538,6 +2538,7 @@ BOOL endhyphenpending = FALSE; BOOL lines_printed = FALSE; BOOL input_line_buffered = line_buffered; FILE *in = NULL; /* Ensure initialized */ +long stream_start = -1; /* Only non-negative if relevant */ /* Do the first read into the start of the buffer and set up the pointer to end of what we have. In the case of libz, a non-zipped .gz file will be read as a @@ -2547,7 +2548,15 @@ fail. */ if (frtype != FR_LIBZ && frtype != FR_LIBBZ2) { in = (FILE *)handle; - if (is_file_tty(in)) input_line_buffered = TRUE; + if (feof(in)) + return 1; + if (is_file_tty(in)) + input_line_buffered = TRUE; + else + { + if (count_limit >= 0 && filename == stdin_name) + stream_start = ftell(in); + } } else input_line_buffered = FALSE; @@ -2594,8 +2603,8 @@ while (ptr < endptr) if (count_limit >= 0 && count_matched_lines >= count_limit) { - if (frtype == FR_PLAIN && filename == stdin_name && !is_file_tty(handle)) - (void)fseek(handle, (long int)filepos, SEEK_SET); + if (stream_start >= 0) + (void)fseek(handle, stream_start + (long int)filepos, SEEK_SET); rc = (count_limit == 0)? 1 : 0; break; } diff --git a/testdata/grepoutput b/testdata/grepoutput index 5f3b97c..66af4cf 100644 --- a/testdata/grepoutput +++ b/testdata/grepoutput @@ -978,5 +978,16 @@ match 2: a RC=0 ---------------------------- Test 133 ----------------------------- +match 1: + a +match 2: + b +--- +match 2: + b +match 3: + c +RC=0 +---------------------------- Test 134 ----------------------------- =AB3CD5= RC=0