Fix auto-callout with (?# comment bug.
This commit is contained in:
parent
5075e74d8a
commit
2eb24e2dac
|
@ -330,6 +330,10 @@ and could fail to match.
|
|||
98. In pcre2test, make the "startoffset" modifier a synonym of "offset",
|
||||
because it sets the "startoffset" parameter for pcre2_match().
|
||||
|
||||
99. If PCRE2_AUTO_CALLOUT was set on a pattern that had a (?# comment between
|
||||
an item and its qualifier (for example, A(?#comment)?B) pcre2_compile()
|
||||
misbehaved.
|
||||
|
||||
|
||||
Version 10.20 30-June-2015
|
||||
--------------------------
|
||||
|
|
|
@ -4001,32 +4001,48 @@ for (;; ptr++)
|
|||
}
|
||||
}
|
||||
|
||||
/* Skip over (?# comments. We need to do this here because we want to know if
|
||||
the next thing is a quantifier, and these comments may come between an item
|
||||
and its quantifier. */
|
||||
|
||||
if (c == CHAR_LEFT_PARENTHESIS && ptr[1] == CHAR_QUESTION_MARK &&
|
||||
ptr[2] == CHAR_NUMBER_SIGN)
|
||||
{
|
||||
ptr += 3;
|
||||
while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
|
||||
if (*ptr != CHAR_RIGHT_PARENTHESIS)
|
||||
{
|
||||
*errorcodeptr = ERR18;
|
||||
goto FAILED;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* See if the next thing is a quantifier. */
|
||||
|
||||
is_quantifier =
|
||||
c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK ||
|
||||
(c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1));
|
||||
|
||||
/* Fill in length of a previous callout, except when the next thing is a
|
||||
quantifier or when processing a property substitution string in UCP mode. */
|
||||
/* Fill in length of a previous callout and create an auto callout if
|
||||
required, except when the next thing is a quantifier or when processing a
|
||||
property substitution string for \w etc in UCP mode. */
|
||||
|
||||
if (!is_quantifier && previous_callout != NULL && cb->nestptr[0] == NULL &&
|
||||
after_manual_callout-- <= 0)
|
||||
if (!is_quantifier && cb->nestptr[0] == NULL)
|
||||
{
|
||||
if (previous_callout != NULL && after_manual_callout-- <= 0)
|
||||
{
|
||||
if (lengthptr == NULL) /* Don't attempt in pre-compile phase */
|
||||
complete_callout(previous_callout, ptr, cb);
|
||||
previous_callout = NULL;
|
||||
}
|
||||
|
||||
/* Create auto callout, except for quantifiers, or while processing property
|
||||
strings that are substituted for \w etc in UCP mode. */
|
||||
|
||||
if ((options & PCRE2_AUTO_CALLOUT) != 0 && !is_quantifier &&
|
||||
cb->nestptr[0] == NULL)
|
||||
if ((options & PCRE2_AUTO_CALLOUT) != 0)
|
||||
{
|
||||
previous_callout = code;
|
||||
code = auto_callout(code, ptr, cb);
|
||||
}
|
||||
}
|
||||
|
||||
/* Process the next pattern item. */
|
||||
|
||||
|
@ -5742,33 +5758,19 @@ for (;; ptr++)
|
|||
|
||||
|
||||
/* ===================================================================*/
|
||||
/* Start of nested parenthesized sub-expression, or comment or lookahead or
|
||||
lookbehind or option setting or condition or all the other extended
|
||||
parenthesis forms. We must save the current high-water-mark for the
|
||||
forward reference list so that we know where they start for this group.
|
||||
However, because the list may be extended when there are very many forward
|
||||
references (usually the result of a replicated inner group), we must use
|
||||
an offset rather than an absolute address. */
|
||||
/* Start of nested parenthesized sub-expression, or lookahead or lookbehind
|
||||
or option setting or condition or all the other extended parenthesis forms.
|
||||
We must save the current high-water-mark for the forward reference list so
|
||||
that we know where they start for this group. However, because the list may
|
||||
be extended when there are very many forward references (usually the result
|
||||
of a replicated inner group), we must use an offset rather than an absolute
|
||||
address. Note that (?# comments are dealt with at the top of the loop;
|
||||
they do not get this far. */
|
||||
|
||||
case CHAR_LEFT_PARENTHESIS:
|
||||
ptr++;
|
||||
|
||||
/* First deal with comments. Putting this code right at the start ensures
|
||||
that comments have no bad side effects. */
|
||||
|
||||
if (ptr[0] == CHAR_QUESTION_MARK && ptr[1] == CHAR_NUMBER_SIGN)
|
||||
{
|
||||
ptr += 2;
|
||||
while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
|
||||
if (*ptr != CHAR_RIGHT_PARENTHESIS)
|
||||
{
|
||||
*errorcodeptr = ERR18;
|
||||
goto FAILED;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Now deal with various "verbs" that can be introduced by '*'. */
|
||||
/* Deal with various "verbs" that can be introduced by '*'. */
|
||||
|
||||
if (ptr[0] == CHAR_ASTERISK && (ptr[1] == ':'
|
||||
|| (MAX_255(ptr[1]) && ((cb->ctypes[ptr[1]] & ctype_letter) != 0))))
|
||||
|
|
|
@ -4689,4 +4689,12 @@ a)"xI
|
|||
|
||||
/a[b[:punct:]]/bincode
|
||||
|
||||
/L(?#(|++<!(2)?/B
|
||||
|
||||
/L(?#(|++<!(2)?/B,no_auto_possess
|
||||
|
||||
/L(?#(|++<!(2)?/B,auto_callout
|
||||
|
||||
/L(?#(|++<!(2)?/B,no_auto_possess,auto_callout
|
||||
|
||||
# End of testinput2
|
||||
|
|
|
@ -1712,4 +1712,8 @@
|
|||
|
||||
/[[:^ascii:]a]/utf,ucp,bincode
|
||||
|
||||
/L(?#(|++<!(2)?/B,utf,no_auto_possess,auto_callout
|
||||
|
||||
/L(?#(|++<!(2)?/B,utf,ucp,auto_callout
|
||||
|
||||
# End of testinput5
|
||||
|
|
|
@ -14906,4 +14906,40 @@ Subject length lower bound = 0
|
|||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/L(?#(|++<!(2)?/B
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
L?+
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/L(?#(|++<!(2)?/B,no_auto_possess
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
L?
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/L(?#(|++<!(2)?/B,auto_callout
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
Callout 255 0 14
|
||||
L?+
|
||||
Callout 255 14 0
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/L(?#(|++<!(2)?/B,no_auto_possess,auto_callout
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
Callout 255 0 14
|
||||
L?
|
||||
Callout 255 14 0
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
# End of testinput2
|
||||
|
|
|
@ -4146,4 +4146,24 @@ No match
|
|||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/L(?#(|++<!(2)?/B,utf,no_auto_possess,auto_callout
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
Callout 255 0 14
|
||||
L?
|
||||
Callout 255 14 0
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
/L(?#(|++<!(2)?/B,utf,ucp,auto_callout
|
||||
------------------------------------------------------------------
|
||||
Bra
|
||||
Callout 255 0 14
|
||||
L?+
|
||||
Callout 255 14 0
|
||||
Ket
|
||||
End
|
||||
------------------------------------------------------------------
|
||||
|
||||
# End of testinput5
|
||||
|
|
Loading…
Reference in New Issue