Fix bug for groups like (a)*+ (possessive, zero minimum) when the ovector was
too small to capture.
This commit is contained in:
parent
154bc83cb5
commit
3d9cc76a52
|
@ -76,6 +76,10 @@ locales that can be used.
|
||||||
capturing group number without parentheses, the last character was incorrectly
|
capturing group number without parentheses, the last character was incorrectly
|
||||||
literally included at the end of the replacement string.
|
literally included at the end of the replacement string.
|
||||||
|
|
||||||
|
15. A possessive capturing group such as (a)*+ with a minimum repeat of zero
|
||||||
|
failed to allow the zero-repeat case if pcre2_match() was called with an
|
||||||
|
ovector too small to capture the group.
|
||||||
|
|
||||||
|
|
||||||
Version 10.00 05-January-2015
|
Version 10.00 05-January-2015
|
||||||
-----------------------------
|
-----------------------------
|
||||||
|
|
|
@ -1149,7 +1149,8 @@ for (;;)
|
||||||
different. The end of these brackets will always be OP_KETRPOS, which
|
different. The end of these brackets will always be OP_KETRPOS, which
|
||||||
returns MATCH_KETRPOS without going further in the pattern. By this means
|
returns MATCH_KETRPOS without going further in the pattern. By this means
|
||||||
we can handle the group by iteration rather than recursion, thereby
|
we can handle the group by iteration rather than recursion, thereby
|
||||||
reducing the amount of stack needed. */
|
reducing the amount of stack needed. If the ovector is too small for
|
||||||
|
capturing, treat as non-capturing. */
|
||||||
|
|
||||||
case OP_CBRAPOS:
|
case OP_CBRAPOS:
|
||||||
case OP_SCBRAPOS:
|
case OP_SCBRAPOS:
|
||||||
|
@ -1158,86 +1159,77 @@ for (;;)
|
||||||
POSSESSIVE_CAPTURE:
|
POSSESSIVE_CAPTURE:
|
||||||
number = GET2(ecode, 1+LINK_SIZE);
|
number = GET2(ecode, 1+LINK_SIZE);
|
||||||
offset = number << 1;
|
offset = number << 1;
|
||||||
|
if (offset >= mb->offset_max) goto POSSESSIVE_NON_CAPTURE;
|
||||||
|
|
||||||
if (offset < mb->offset_max)
|
matched_once = FALSE;
|
||||||
|
code_offset = (int)(ecode - mb->start_code);
|
||||||
|
|
||||||
|
save_offset1 = mb->ovector[offset];
|
||||||
|
save_offset2 = mb->ovector[offset+1];
|
||||||
|
save_offset3 = mb->ovector[mb->offset_end - number];
|
||||||
|
save_capture_last = mb->capture_last;
|
||||||
|
|
||||||
|
/* Each time round the loop, save the current subject position for use
|
||||||
|
when the group matches. For MATCH_MATCH, the group has matched, so we
|
||||||
|
restart it with a new subject starting position, remembering that we had
|
||||||
|
at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
|
||||||
|
usual. If we haven't matched any alternatives in any iteration, check to
|
||||||
|
see if a previous iteration matched. If so, the group has matched;
|
||||||
|
continue from afterwards. Otherwise it has failed; restore the previous
|
||||||
|
capture values before returning NOMATCH. */
|
||||||
|
|
||||||
|
for (;;)
|
||||||
{
|
{
|
||||||
matched_once = FALSE;
|
mb->ovector[mb->offset_end - number] = eptr - mb->start_subject;
|
||||||
code_offset = (int)(ecode - mb->start_code);
|
if (op >= OP_SBRA) mb->match_function_type |= MATCH_CBEGROUP;
|
||||||
|
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, mb,
|
||||||
save_offset1 = mb->ovector[offset];
|
eptrb, RM63);
|
||||||
save_offset2 = mb->ovector[offset+1];
|
if (rrc == MATCH_KETRPOS)
|
||||||
save_offset3 = mb->ovector[mb->offset_end - number];
|
|
||||||
save_capture_last = mb->capture_last;
|
|
||||||
|
|
||||||
/* Each time round the loop, save the current subject position for use
|
|
||||||
when the group matches. For MATCH_MATCH, the group has matched, so we
|
|
||||||
restart it with a new subject starting position, remembering that we had
|
|
||||||
at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
|
|
||||||
usual. If we haven't matched any alternatives in any iteration, check to
|
|
||||||
see if a previous iteration matched. If so, the group has matched;
|
|
||||||
continue from afterwards. Otherwise it has failed; restore the previous
|
|
||||||
capture values before returning NOMATCH. */
|
|
||||||
|
|
||||||
for (;;)
|
|
||||||
{
|
{
|
||||||
mb->ovector[mb->offset_end - number] = eptr - mb->start_subject;
|
offset_top = mb->end_offset_top;
|
||||||
if (op >= OP_SBRA) mb->match_function_type |= MATCH_CBEGROUP;
|
ecode = mb->start_code + code_offset;
|
||||||
RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, mb,
|
save_capture_last = mb->capture_last;
|
||||||
eptrb, RM63);
|
matched_once = TRUE;
|
||||||
if (rrc == MATCH_KETRPOS)
|
mstart = mb->start_match_ptr; /* In case \K changed it */
|
||||||
|
if (eptr == mb->end_match_ptr) /* Matched an empty string */
|
||||||
{
|
{
|
||||||
offset_top = mb->end_offset_top;
|
do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
|
||||||
ecode = mb->start_code + code_offset;
|
break;
|
||||||
save_capture_last = mb->capture_last;
|
|
||||||
matched_once = TRUE;
|
|
||||||
mstart = mb->start_match_ptr; /* In case \K changed it */
|
|
||||||
if (eptr == mb->end_match_ptr) /* Matched an empty string */
|
|
||||||
{
|
|
||||||
do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
eptr = mb->end_match_ptr;
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
eptr = mb->end_match_ptr;
|
||||||
/* See comment in the code for capturing groups above about handling
|
continue;
|
||||||
THEN. */
|
|
||||||
|
|
||||||
if (rrc == MATCH_THEN)
|
|
||||||
{
|
|
||||||
next_ecode = ecode + GET(ecode,1);
|
|
||||||
if (mb->start_match_ptr < next_ecode &&
|
|
||||||
(*ecode == OP_ALT || *next_ecode == OP_ALT))
|
|
||||||
rrc = MATCH_NOMATCH;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
|
||||||
mb->capture_last = save_capture_last;
|
|
||||||
ecode += GET(ecode, 1);
|
|
||||||
if (*ecode != OP_ALT) break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!matched_once)
|
/* See comment in the code for capturing groups above about handling
|
||||||
|
THEN. */
|
||||||
|
|
||||||
|
if (rrc == MATCH_THEN)
|
||||||
{
|
{
|
||||||
mb->ovector[offset] = save_offset1;
|
next_ecode = ecode + GET(ecode,1);
|
||||||
mb->ovector[offset+1] = save_offset2;
|
if (mb->start_match_ptr < next_ecode &&
|
||||||
mb->ovector[mb->offset_end - number] = save_offset3;
|
(*ecode == OP_ALT || *next_ecode == OP_ALT))
|
||||||
|
rrc = MATCH_NOMATCH;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (allow_zero || matched_once)
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||||
{
|
mb->capture_last = save_capture_last;
|
||||||
ecode += 1 + LINK_SIZE;
|
ecode += GET(ecode, 1);
|
||||||
break;
|
if (*ecode != OP_ALT) break;
|
||||||
}
|
|
||||||
|
|
||||||
RRETURN(MATCH_NOMATCH);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* FALL THROUGH ... Insufficient room for saving captured contents. Treat
|
if (!matched_once)
|
||||||
as a non-capturing bracket. */
|
{
|
||||||
|
mb->ovector[offset] = save_offset1;
|
||||||
|
mb->ovector[offset+1] = save_offset2;
|
||||||
|
mb->ovector[mb->offset_end - number] = save_offset3;
|
||||||
|
}
|
||||||
|
|
||||||
/* VVVVVVVVVVVVVVVVVVVVVVVVV */
|
if (allow_zero || matched_once)
|
||||||
/* VVVVVVVVVVVVVVVVVVVVVVVVV */
|
{
|
||||||
|
ecode += 1 + LINK_SIZE;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
RRETURN(MATCH_NOMATCH);
|
||||||
|
|
||||||
/* Non-capturing possessive bracket with unlimited repeat. We come here
|
/* Non-capturing possessive bracket with unlimited repeat. We come here
|
||||||
from BRAZERO with allow_zero = TRUE. The code is similar to the above,
|
from BRAZERO with allow_zero = TRUE. The code is similar to the above,
|
||||||
|
|
|
@ -4164,4 +4164,12 @@ a random value. /Ix
|
||||||
** Failers
|
** Failers
|
||||||
356
|
356
|
||||||
|
|
||||||
|
'^(a)*+(\w)'
|
||||||
|
g
|
||||||
|
g\=ovector=1
|
||||||
|
|
||||||
|
'^(?:a)*+(\w)'
|
||||||
|
g
|
||||||
|
g\=ovector=1
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
|
@ -13933,4 +13933,21 @@ No match
|
||||||
356
|
356
|
||||||
No match
|
No match
|
||||||
|
|
||||||
|
'^(a)*+(\w)'
|
||||||
|
g
|
||||||
|
0: g
|
||||||
|
1: <unset>
|
||||||
|
2: g
|
||||||
|
g\=ovector=1
|
||||||
|
Matched, but too many substrings
|
||||||
|
0: g
|
||||||
|
|
||||||
|
'^(?:a)*+(\w)'
|
||||||
|
g
|
||||||
|
0: g
|
||||||
|
1: g
|
||||||
|
g\=ovector=1
|
||||||
|
Matched, but too many substrings
|
||||||
|
0: g
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
Loading…
Reference in New Issue