From cda8384a95d958343174b9139f70bc37564f9f1b Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Fri, 3 Apr 2015 11:14:19 +0000 Subject: [PATCH] Fix pcre2grep loop provoked by \K in a lookbehind assertion in a non-anchored pattern. --- ChangeLog | 5 ++++- RunGrepTest | 9 +++++++++ src/pcre2grep.c | 22 ++++++++++++++++++---- testdata/grepinput8 | 2 ++ testdata/grepoutput | 8 ++++++++ testdata/grepoutput8 | 8 ++++++++ 6 files changed, 49 insertions(+), 5 deletions(-) diff --git a/ChangeLog b/ChangeLog index e455569..1bbea12 100644 --- a/ChangeLog +++ b/ChangeLog @@ -27,7 +27,7 @@ error about an unsupported item. 8. For some types of pattern, for example /Z*(|d*){216}/, the auto- possessification code could take exponential time to complete. A recursion -depth limit of 10000 has been imposed to limit the resources used by this +depth limit of 1000 has been imposed to limit the resources used by this optimization. This infelicity was discovered by the LLVM fuzzer. 9. A pattern such as /(*UTF)[\S\V\H]/, which contains a negated special class @@ -67,6 +67,9 @@ have either been changed to uint32_t or commented as "must be signed". caused a stack overflow instead of the diagnosis of a non-fixed length lookbehind assertion. This bug was discovered by the LLVM fuzzer. +17. The use of \K in a positive lookbehind assertion in a non-anchored pattern +(e.g. /(?<=\Ka)/) could make pcre2grep loop. + Version 10.10 06-March-2015 --------------------------- diff --git a/RunGrepTest b/RunGrepTest index 6d26d87..26c9099 100755 --- a/RunGrepTest +++ b/RunGrepTest @@ -524,6 +524,11 @@ echo "---------------------------- Test 106 -----------------------------" >>tes (cd $srcdir; echo "a" | $valgrind $pcre2grep -M "|a" ) >>testtrygrep 2>&1 echo "RC=$?" >>testtrygrep +echo "---------------------------- Test 107 -----------------------------" >>testtrygrep +echo "a" >testtemp1grep +echo "aaaaa" >>testtemp1grep +(cd $srcdir; $valgrind $pcre2grep --line-offsets '(?<=\Ka)' testtemp1grep) >>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep # Now compare the results. @@ -544,6 +549,10 @@ if [ $utf8 -ne 0 ] ; then (cd $srcdir; $valgrind $pcre2grep -n -u -C 3 --newline=any "Match" ./testdata/grepinput8) >>testtrygrep echo "RC=$?" >>testtrygrep + echo "---------------------------- Test U3 ------------------------------" >>testtrygrep + (cd $srcdir; $valgrind $pcre2grep --line-offsets -u '(?<=\K\x{17f})' ./testdata/grepinput8) >>testtrygrep + echo "RC=$?" >>testtrygrep + $cf $srcdir/testdata/grepoutput8 testtrygrep if [ $? != 0 ] ; then exit 1; fi diff --git a/src/pcre2grep.c b/src/pcre2grep.c index a1a60bd..9aa3fcb 100644 --- a/src/pcre2grep.c +++ b/src/pcre2grep.c @@ -1433,7 +1433,7 @@ Returns: TRUE if there was a match static BOOL match_patterns(char *matchptr, size_t length, unsigned int options, - int startoffset, int *mrc) + size_t startoffset, int *mrc) { int i; size_t slen = length; @@ -1581,12 +1581,12 @@ while (ptr < endptr) { int endlinelength; int mrc = 0; - int startoffset = 0; unsigned int options = 0; BOOL match; char *matchptr = ptr; char *t = ptr; size_t length, linelength; + size_t startoffset = 0; /* At this point, ptr is at the start of a line. We need to find the length of the subject string to pass to pcre_exec(). In multiline mode, it is the @@ -1729,6 +1729,8 @@ while (ptr < endptr) { if (!invert) { + size_t oldstartoffset; + if (printname != NULL) fprintf(stdout, "%s:", printname); if (number) fprintf(stdout, "%d:", linenumber); @@ -1772,12 +1774,23 @@ while (ptr < endptr) if (printed || printname != NULL || number) fprintf(stdout, "\n"); } - /* Prepare to repeat to find the next match */ + /* Prepare to repeat to find the next match. If the pattern contained a + lookbehind that included \K, it is possible that the end of the match + might be at or before the actual starting offset we have just used. In + this case, start one character further on. */ match = FALSE; if (line_buffered) fflush(stdout); rc = 0; /* Had some success */ startoffset = offsets[1]; /* Restart after the match */ + oldstartoffset = pcre2_get_startchar(match_data); + if (startoffset <= oldstartoffset) + { + if (startoffset >= length) goto END_ONE_MATCH; /* Were at end */ + startoffset = oldstartoffset + 1; + if (utf) + while ((matchptr[startoffset] & 0xc0) == 0x80) startoffset++; + } goto ONLY_MATCHING_RESTART; } } @@ -1917,7 +1930,7 @@ while (ptr < endptr) for (;;) { startoffset = offsets[1]; - if (startoffset >= (int)linelength + endlinelength || + if (startoffset >= linelength + endlinelength || !match_patterns(matchptr, length, options, startoffset, &mrc)) break; FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout); @@ -1973,6 +1986,7 @@ while (ptr < endptr) /* Advance to after the newline and increment the line number. The file offset to the current line is maintained in filepos. */ + END_ONE_MATCH: ptr += linelength + endlinelength; filepos += (int)(linelength + endlinelength); linenumber++; diff --git a/testdata/grepinput8 b/testdata/grepinput8 index c4b8c44..7779cdc 100644 --- a/testdata/grepinput8 +++ b/testdata/grepinput8 @@ -9,3 +9,5 @@ After 111 After 222
After 333 And so on and so on And so on and so on +ſ +ſſſſſ diff --git a/testdata/grepoutput b/testdata/grepoutput index dd92b3e..883bad7 100644 --- a/testdata/grepoutput +++ b/testdata/grepoutput @@ -743,3 +743,11 @@ RC=0 ---------------------------- Test 106 ----------------------------- a RC=0 +---------------------------- Test 107 ----------------------------- +1:0,1 +2:0,1 +2:1,1 +2:2,1 +2:3,1 +2:4,1 +RC=0 diff --git a/testdata/grepoutput8 b/testdata/grepoutput8 index 91493bd..ecdbd70 100644 --- a/testdata/grepoutput8 +++ b/testdata/grepoutput8 @@ -10,3 +10,11 @@ RC=0 16-After 111 17-After 222
18-After 333 RC=0 +---------------------------- Test U3 ------------------------------ +12:0,2 +13:0,2 +13:2,2 +13:4,2 +13:6,2 +13:8,2 +RC=0