Fix bug for groups like (a)*+ (possessive, zero minimum) when the ovector was

too small to capture.
This commit is contained in:
Philip.Hazel 2015-02-11 10:06:09 +00:00
parent 154bc83cb5
commit 3d9cc76a52
4 changed files with 89 additions and 68 deletions

View File

@ -76,6 +76,10 @@ locales that can be used.
capturing group number without parentheses, the last character was incorrectly capturing group number without parentheses, the last character was incorrectly
literally included at the end of the replacement string. literally included at the end of the replacement string.
15. A possessive capturing group such as (a)*+ with a minimum repeat of zero
failed to allow the zero-repeat case if pcre2_match() was called with an
ovector too small to capture the group.
Version 10.00 05-January-2015 Version 10.00 05-January-2015
----------------------------- -----------------------------

View File

@ -1149,7 +1149,8 @@ for (;;)
different. The end of these brackets will always be OP_KETRPOS, which different. The end of these brackets will always be OP_KETRPOS, which
returns MATCH_KETRPOS without going further in the pattern. By this means returns MATCH_KETRPOS without going further in the pattern. By this means
we can handle the group by iteration rather than recursion, thereby we can handle the group by iteration rather than recursion, thereby
reducing the amount of stack needed. */ reducing the amount of stack needed. If the ovector is too small for
capturing, treat as non-capturing. */
case OP_CBRAPOS: case OP_CBRAPOS:
case OP_SCBRAPOS: case OP_SCBRAPOS:
@ -1158,86 +1159,77 @@ for (;;)
POSSESSIVE_CAPTURE: POSSESSIVE_CAPTURE:
number = GET2(ecode, 1+LINK_SIZE); number = GET2(ecode, 1+LINK_SIZE);
offset = number << 1; offset = number << 1;
if (offset >= mb->offset_max) goto POSSESSIVE_NON_CAPTURE;
if (offset < mb->offset_max) matched_once = FALSE;
code_offset = (int)(ecode - mb->start_code);
save_offset1 = mb->ovector[offset];
save_offset2 = mb->ovector[offset+1];
save_offset3 = mb->ovector[mb->offset_end - number];
save_capture_last = mb->capture_last;
/* Each time round the loop, save the current subject position for use
when the group matches. For MATCH_MATCH, the group has matched, so we
restart it with a new subject starting position, remembering that we had
at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
usual. If we haven't matched any alternatives in any iteration, check to
see if a previous iteration matched. If so, the group has matched;
continue from afterwards. Otherwise it has failed; restore the previous
capture values before returning NOMATCH. */
for (;;)
{ {
matched_once = FALSE; mb->ovector[mb->offset_end - number] = eptr - mb->start_subject;
code_offset = (int)(ecode - mb->start_code); if (op >= OP_SBRA) mb->match_function_type |= MATCH_CBEGROUP;
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, mb,
save_offset1 = mb->ovector[offset]; eptrb, RM63);
save_offset2 = mb->ovector[offset+1]; if (rrc == MATCH_KETRPOS)
save_offset3 = mb->ovector[mb->offset_end - number];
save_capture_last = mb->capture_last;
/* Each time round the loop, save the current subject position for use
when the group matches. For MATCH_MATCH, the group has matched, so we
restart it with a new subject starting position, remembering that we had
at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
usual. If we haven't matched any alternatives in any iteration, check to
see if a previous iteration matched. If so, the group has matched;
continue from afterwards. Otherwise it has failed; restore the previous
capture values before returning NOMATCH. */
for (;;)
{ {
mb->ovector[mb->offset_end - number] = eptr - mb->start_subject; offset_top = mb->end_offset_top;
if (op >= OP_SBRA) mb->match_function_type |= MATCH_CBEGROUP; ecode = mb->start_code + code_offset;
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, mb, save_capture_last = mb->capture_last;
eptrb, RM63); matched_once = TRUE;
if (rrc == MATCH_KETRPOS) mstart = mb->start_match_ptr; /* In case \K changed it */
if (eptr == mb->end_match_ptr) /* Matched an empty string */
{ {
offset_top = mb->end_offset_top; do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
ecode = mb->start_code + code_offset; break;
save_capture_last = mb->capture_last;
matched_once = TRUE;
mstart = mb->start_match_ptr; /* In case \K changed it */
if (eptr == mb->end_match_ptr) /* Matched an empty string */
{
do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
break;
}
eptr = mb->end_match_ptr;
continue;
} }
eptr = mb->end_match_ptr;
/* See comment in the code for capturing groups above about handling continue;
THEN. */
if (rrc == MATCH_THEN)
{
next_ecode = ecode + GET(ecode,1);
if (mb->start_match_ptr < next_ecode &&
(*ecode == OP_ALT || *next_ecode == OP_ALT))
rrc = MATCH_NOMATCH;
}
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
mb->capture_last = save_capture_last;
ecode += GET(ecode, 1);
if (*ecode != OP_ALT) break;
} }
if (!matched_once) /* See comment in the code for capturing groups above about handling
THEN. */
if (rrc == MATCH_THEN)
{ {
mb->ovector[offset] = save_offset1; next_ecode = ecode + GET(ecode,1);
mb->ovector[offset+1] = save_offset2; if (mb->start_match_ptr < next_ecode &&
mb->ovector[mb->offset_end - number] = save_offset3; (*ecode == OP_ALT || *next_ecode == OP_ALT))
rrc = MATCH_NOMATCH;
} }
if (allow_zero || matched_once) if (rrc != MATCH_NOMATCH) RRETURN(rrc);
{ mb->capture_last = save_capture_last;
ecode += 1 + LINK_SIZE; ecode += GET(ecode, 1);
break; if (*ecode != OP_ALT) break;
}
RRETURN(MATCH_NOMATCH);
} }
/* FALL THROUGH ... Insufficient room for saving captured contents. Treat if (!matched_once)
as a non-capturing bracket. */ {
mb->ovector[offset] = save_offset1;
mb->ovector[offset+1] = save_offset2;
mb->ovector[mb->offset_end - number] = save_offset3;
}
/* VVVVVVVVVVVVVVVVVVVVVVVVV */ if (allow_zero || matched_once)
/* VVVVVVVVVVVVVVVVVVVVVVVVV */ {
ecode += 1 + LINK_SIZE;
break;
}
RRETURN(MATCH_NOMATCH);
/* Non-capturing possessive bracket with unlimited repeat. We come here /* Non-capturing possessive bracket with unlimited repeat. We come here
from BRAZERO with allow_zero = TRUE. The code is similar to the above, from BRAZERO with allow_zero = TRUE. The code is similar to the above,

8
testdata/testinput2 vendored
View File

@ -4164,4 +4164,12 @@ a random value. /Ix
** Failers ** Failers
356 356
'^(a)*+(\w)'
g
g\=ovector=1
'^(?:a)*+(\w)'
g
g\=ovector=1
# End of testinput2 # End of testinput2

17
testdata/testoutput2 vendored
View File

@ -13933,4 +13933,21 @@ No match
356 356
No match No match
'^(a)*+(\w)'
g
0: g
1: <unset>
2: g
g\=ovector=1
Matched, but too many substrings
0: g
'^(?:a)*+(\w)'
g
0: g
1: g
g\=ovector=1
Matched, but too many substrings
0: g
# End of testinput2 # End of testinput2