Fix bug for (*ACCEPT) inside a capturing group.

This commit is contained in:
Philip.Hazel 2014-11-05 16:05:19 +00:00
parent 44ef2c3401
commit e3e4131379
5 changed files with 55 additions and 7 deletions

View File

@ -44,4 +44,11 @@ strings matched by the repetition are not all the same length.
information. This applied to any pattern with a group that matched no information. This applied to any pattern with a group that matched no
characters, for example: /(?:(?=.)|(?<!x))a/. characters, for example: /(?:(?=.)|(?<!x))a/.
7. When an (*ACCEPT) is triggered inside capturing parentheses, it arranges for
those parentheses to be closed with whatever has been captured so far. However,
it was failing to mark any other groups between the hightest capture so far and
the currrent group as "unset". Thus, the ovector for those groups contained
whatever was previously there. An example is the pattern /(x)|((*ACCEPT))/ when
matched against "abcd".
**** ****

View File

@ -1465,7 +1465,18 @@ for (;;)
mb->ovector[offset] = mb->ovector[offset] =
mb->ovector[mb->offset_end - number]; mb->ovector[mb->offset_end - number];
mb->ovector[offset+1] = eptr - mb->start_subject; mb->ovector[offset+1] = eptr - mb->start_subject;
if (offset_top <= offset) offset_top = offset + 2;
/* If this group is at or above the current highwater mark, ensure that
any groups between the current high water mark and this group are marked
unset and then update the high water mark. */
if (offset >= offset_top)
{
register PCRE2_SIZE *iptr = mb->ovector + offset_top;
register PCRE2_SIZE *iend = mb->ovector + offset;
while (iptr < iend) *iptr++ = PCRE2_UNSET;
offset_top = offset + 2;
}
} }
ecode += 1 + IMM2_SIZE; ecode += 1 + IMM2_SIZE;
break; break;
@ -6321,18 +6332,18 @@ while (nextframe != NULL)
* Match a Regular Expression * * Match a Regular Expression *
*************************************************/ *************************************************/
/* This function applies a compiled re to a subject string and picks out /* This function applies a compiled pattern to a subject string and picks out
portions of the string if it matches. Two elements in the vector are set for portions of the string if it matches. Two elements in the vector are set for
each substring: the offsets to the start and end of the substring. each substring: the offsets to the start and end of the substring.
Arguments: Arguments:
context points a PCRE2 context
code points to the compiled expression code points to the compiled expression
subject points to the subject string subject points to the subject string
length length of subject string (may contain binary zeros) length length of subject string (may contain binary zeros)
start_offset where to start in the subject string start_offset where to start in the subject string
options option bits options option bits
match_data points to a match_data block match_data points to a match_data block
mcontext points a PCRE2 context
Returns: > 0 => success; value is the number of ovector pairs filled Returns: > 0 => success; value is the number of ovector pairs filled
= 0 => success, but ovector is not big enough = 0 => success, but ovector is not big enough

View File

@ -163,6 +163,7 @@ void vms_setsymbol( char *, char *, int );
#define CFAIL_UNSET UINT32_MAX /* Unset value for cfail fields */ #define CFAIL_UNSET UINT32_MAX /* Unset value for cfail fields */
#define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */ #define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */
#define DEFAULT_OVECCOUNT 15 /* Default ovector count */ #define DEFAULT_OVECCOUNT 15 /* Default ovector count */
#define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */
#define LOOPREPEAT 500000 /* Default loop count for timing */ #define LOOPREPEAT 500000 /* Default loop count for timing */
#define VERSION_SIZE 64 /* Size of buffer for the version strings */ #define VERSION_SIZE 64 /* Size of buffer for the version strings */
@ -4685,12 +4686,18 @@ else
for (gmatched = 0;; gmatched++) for (gmatched = 0;; gmatched++)
{ {
PCRE2_SIZE j;
int capcount; int capcount;
PCRE2_SIZE *ovector; PCRE2_SIZE *ovector;
PCRE2_SIZE ovecsave[2]; PCRE2_SIZE ovecsave[2];
ovector = FLD(match_data, ovector); ovector = FLD(match_data, ovector);
/* Fill the ovector with junk to detect elements that do not get set
when they should be. */
for (j = 0; j < 2*dat_datctl.oveccount; j++) ovector[j] = JUNK_OFFSET;
/* When matching is via pcre2_match(), we will detect the use of JIT via the /* When matching is via pcre2_match(), we will detect the use of JIT via the
stack callback function. */ stack callback function. */
@ -4786,7 +4793,7 @@ for (gmatched = 0;; gmatched++)
{ {
PCRE2_SET_CALLOUT(dat_context, NULL, NULL); /* No callout */ PCRE2_SET_CALLOUT(dat_context, NULL, NULL); /* No callout */
} }
/* Run a single DFA or NFA match. */ /* Run a single DFA or NFA match. */
if ((dat_datctl.control & CTL_DFA) != 0) if ((dat_datctl.control & CTL_DFA) != 0)
@ -4887,14 +4894,27 @@ for (gmatched = 0;; gmatched++)
fprintf(outfile, "Start of matched string is beyond its end - " fprintf(outfile, "Start of matched string is beyond its end - "
"displaying from end to start.\n"); "displaying from end to start.\n");
} }
fprintf(outfile, "%2d: ", i/2); fprintf(outfile, "%2d: ", i/2);
/* Check for an unset group */
if (start == PCRE2_UNSET) if (start == PCRE2_UNSET)
{ {
fprintf(outfile, "<unset>\n"); fprintf(outfile, "<unset>\n");
continue; continue;
} }
/* Check for silly offsets, in particular, values that have not been
set when they should have been. */
if (start > ulen || end > ulen)
{
fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n",
start, end);
continue;
}
/* When JIT is not being used, ALLUSEDTEXT may be set. (It if is set with /* When JIT is not being used, ALLUSEDTEXT may be set. (It if is set with
JIT, it is disabled above, with a comment.) When the match is done by the JIT, it is disabled above, with a comment.) When the match is done by the
interpreter, leftchar and rightchar are available, and if ALLUSEDTEXT is interpreter, leftchar and rightchar are available, and if ALLUSEDTEXT is
@ -4918,7 +4938,6 @@ for (gmatched = 0;; gmatched++)
if (showallused) if (showallused)
{ {
PCRE2_SIZE j;
PCHARS(lleft, pp, leftchar, start - leftchar, utf, outfile); PCHARS(lleft, pp, leftchar, start - leftchar, utf, outfile);
PCHARS(lmiddle, pp, start, end - start, utf, outfile); PCHARS(lmiddle, pp, start, end - start, utf, outfile);
PCHARS(lright, pp, end, rightchar - end, utf, outfile); PCHARS(lright, pp, end, rightchar - end, utf, outfile);
@ -4944,7 +4963,6 @@ for (gmatched = 0;; gmatched++)
fprintf(outfile, " (JIT)"); fprintf(outfile, " (JIT)");
if (startchar != start) if (startchar != start)
{ {
PCRE2_SIZE j;
fprintf(outfile, "\n "); fprintf(outfile, "\n ");
for (j = 0; j < lleft; j++) fprintf(outfile, "^"); for (j = 0; j < lleft; j++) fprintf(outfile, "^");
} }

3
testdata/testinput1 vendored
View File

@ -5702,4 +5702,7 @@ name)/mark
abd abd
xyd xyd
/(?:((abcd))|(((?:(?:(?:(?:abc|(?:abcdef))))b)abcdefghi)abc)|((*ACCEPT)))/
1234abcd
# End of testinput1 # End of testinput1

View File

@ -9403,4 +9403,13 @@ No match
xyd xyd
0: d 0: d
/(?:((abcd))|(((?:(?:(?:(?:abc|(?:abcdef))))b)abcdefghi)abc)|((*ACCEPT)))/
1234abcd
0:
1: <unset>
2: <unset>
3: <unset>
4: <unset>
5:
# End of testinput1 # End of testinput1