From e3e4131379eb19b261a04edd60b05bff17fa3e81 Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Wed, 5 Nov 2014 16:05:19 +0000 Subject: [PATCH] Fix bug for (*ACCEPT) inside a capturing group. --- ChangeLog | 7 +++++++ src/pcre2_match.c | 17 ++++++++++++++--- src/pcre2test.c | 26 ++++++++++++++++++++++---- testdata/testinput1 | 3 +++ testdata/testoutput1 | 9 +++++++++ 5 files changed, 55 insertions(+), 7 deletions(-) diff --git a/ChangeLog b/ChangeLog index cdca10a..eecc01c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -44,4 +44,11 @@ strings matched by the repetition are not all the same length. information. This applied to any pattern with a group that matched no characters, for example: /(?:(?=.)|(?ovector[offset] = mb->ovector[mb->offset_end - number]; mb->ovector[offset+1] = eptr - mb->start_subject; - if (offset_top <= offset) offset_top = offset + 2; + + /* If this group is at or above the current highwater mark, ensure that + any groups between the current high water mark and this group are marked + unset and then update the high water mark. */ + + if (offset >= offset_top) + { + register PCRE2_SIZE *iptr = mb->ovector + offset_top; + register PCRE2_SIZE *iend = mb->ovector + offset; + while (iptr < iend) *iptr++ = PCRE2_UNSET; + offset_top = offset + 2; + } } ecode += 1 + IMM2_SIZE; break; @@ -6321,18 +6332,18 @@ while (nextframe != NULL) * Match a Regular Expression * *************************************************/ -/* This function applies a compiled re to a subject string and picks out +/* This function applies a compiled pattern to a subject string and picks out portions of the string if it matches. Two elements in the vector are set for each substring: the offsets to the start and end of the substring. Arguments: - context points a PCRE2 context code points to the compiled expression subject points to the subject string length length of subject string (may contain binary zeros) start_offset where to start in the subject string options option bits match_data points to a match_data block + mcontext points a PCRE2 context Returns: > 0 => success; value is the number of ovector pairs filled = 0 => success, but ovector is not big enough diff --git a/src/pcre2test.c b/src/pcre2test.c index 66b2e19..7146733 100644 --- a/src/pcre2test.c +++ b/src/pcre2test.c @@ -163,6 +163,7 @@ void vms_setsymbol( char *, char *, int ); #define CFAIL_UNSET UINT32_MAX /* Unset value for cfail fields */ #define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */ #define DEFAULT_OVECCOUNT 15 /* Default ovector count */ +#define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */ #define LOOPREPEAT 500000 /* Default loop count for timing */ #define VERSION_SIZE 64 /* Size of buffer for the version strings */ @@ -4685,12 +4686,18 @@ else for (gmatched = 0;; gmatched++) { + PCRE2_SIZE j; int capcount; PCRE2_SIZE *ovector; PCRE2_SIZE ovecsave[2]; ovector = FLD(match_data, ovector); + /* Fill the ovector with junk to detect elements that do not get set + when they should be. */ + + for (j = 0; j < 2*dat_datctl.oveccount; j++) ovector[j] = JUNK_OFFSET; + /* When matching is via pcre2_match(), we will detect the use of JIT via the stack callback function. */ @@ -4786,7 +4793,7 @@ for (gmatched = 0;; gmatched++) { PCRE2_SET_CALLOUT(dat_context, NULL, NULL); /* No callout */ } - + /* Run a single DFA or NFA match. */ if ((dat_datctl.control & CTL_DFA) != 0) @@ -4887,14 +4894,27 @@ for (gmatched = 0;; gmatched++) fprintf(outfile, "Start of matched string is beyond its end - " "displaying from end to start.\n"); } - + fprintf(outfile, "%2d: ", i/2); + + /* Check for an unset group */ + if (start == PCRE2_UNSET) { fprintf(outfile, "\n"); continue; } + /* Check for silly offsets, in particular, values that have not been + set when they should have been. */ + + if (start > ulen || end > ulen) + { + fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n", + start, end); + continue; + } + /* When JIT is not being used, ALLUSEDTEXT may be set. (It if is set with JIT, it is disabled above, with a comment.) When the match is done by the interpreter, leftchar and rightchar are available, and if ALLUSEDTEXT is @@ -4918,7 +4938,6 @@ for (gmatched = 0;; gmatched++) if (showallused) { - PCRE2_SIZE j; PCHARS(lleft, pp, leftchar, start - leftchar, utf, outfile); PCHARS(lmiddle, pp, start, end - start, utf, outfile); PCHARS(lright, pp, end, rightchar - end, utf, outfile); @@ -4944,7 +4963,6 @@ for (gmatched = 0;; gmatched++) fprintf(outfile, " (JIT)"); if (startchar != start) { - PCRE2_SIZE j; fprintf(outfile, "\n "); for (j = 0; j < lleft; j++) fprintf(outfile, "^"); } diff --git a/testdata/testinput1 b/testdata/testinput1 index 2aea25d..fa6c203 100644 --- a/testdata/testinput1 +++ b/testdata/testinput1 @@ -5702,4 +5702,7 @@ name)/mark abd xyd +/(?:((abcd))|(((?:(?:(?:(?:abc|(?:abcdef))))b)abcdefghi)abc)|((*ACCEPT)))/ + 1234abcd + # End of testinput1 diff --git a/testdata/testoutput1 b/testdata/testoutput1 index 8240536..6bd2e9e 100644 --- a/testdata/testoutput1 +++ b/testdata/testoutput1 @@ -9403,4 +9403,13 @@ No match xyd 0: d +/(?:((abcd))|(((?:(?:(?:(?:abc|(?:abcdef))))b)abcdefghi)abc)|((*ACCEPT)))/ + 1234abcd + 0: + 1: + 2: + 3: + 4: + 5: + # End of testinput1