Fix auto-callout with (?# comment bug.

This commit is contained in:
Philip.Hazel 2015-11-25 18:46:35 +00:00
parent 5075e74d8a
commit 2eb24e2dac
6 changed files with 113 additions and 39 deletions

View File

@ -330,6 +330,10 @@ and could fail to match.
98. In pcre2test, make the "startoffset" modifier a synonym of "offset",
because it sets the "startoffset" parameter for pcre2_match().
99. If PCRE2_AUTO_CALLOUT was set on a pattern that had a (?# comment between
an item and its qualifier (for example, A(?#comment)?B) pcre2_compile()
misbehaved.
Version 10.20 30-June-2015
--------------------------

View File

@ -4001,31 +4001,47 @@ for (;; ptr++)
}
}
/* Skip over (?# comments. We need to do this here because we want to know if
the next thing is a quantifier, and these comments may come between an item
and its quantifier. */
if (c == CHAR_LEFT_PARENTHESIS && ptr[1] == CHAR_QUESTION_MARK &&
ptr[2] == CHAR_NUMBER_SIGN)
{
ptr += 3;
while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
if (*ptr != CHAR_RIGHT_PARENTHESIS)
{
*errorcodeptr = ERR18;
goto FAILED;
}
continue;
}
/* See if the next thing is a quantifier. */
is_quantifier =
c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK ||
(c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1));
/* Fill in length of a previous callout, except when the next thing is a
quantifier or when processing a property substitution string in UCP mode. */
/* Fill in length of a previous callout and create an auto callout if
required, except when the next thing is a quantifier or when processing a
property substitution string for \w etc in UCP mode. */
if (!is_quantifier && previous_callout != NULL && cb->nestptr[0] == NULL &&
after_manual_callout-- <= 0)
if (!is_quantifier && cb->nestptr[0] == NULL)
{
if (lengthptr == NULL) /* Don't attempt in pre-compile phase */
complete_callout(previous_callout, ptr, cb);
previous_callout = NULL;
}
if (previous_callout != NULL && after_manual_callout-- <= 0)
{
if (lengthptr == NULL) /* Don't attempt in pre-compile phase */
complete_callout(previous_callout, ptr, cb);
previous_callout = NULL;
}
/* Create auto callout, except for quantifiers, or while processing property
strings that are substituted for \w etc in UCP mode. */
if ((options & PCRE2_AUTO_CALLOUT) != 0 && !is_quantifier &&
cb->nestptr[0] == NULL)
{
previous_callout = code;
code = auto_callout(code, ptr, cb);
if ((options & PCRE2_AUTO_CALLOUT) != 0)
{
previous_callout = code;
code = auto_callout(code, ptr, cb);
}
}
/* Process the next pattern item. */
@ -5742,33 +5758,19 @@ for (;; ptr++)
/* ===================================================================*/
/* Start of nested parenthesized sub-expression, or comment or lookahead or
lookbehind or option setting or condition or all the other extended
parenthesis forms. We must save the current high-water-mark for the
forward reference list so that we know where they start for this group.
However, because the list may be extended when there are very many forward
references (usually the result of a replicated inner group), we must use
an offset rather than an absolute address. */
/* Start of nested parenthesized sub-expression, or lookahead or lookbehind
or option setting or condition or all the other extended parenthesis forms.
We must save the current high-water-mark for the forward reference list so
that we know where they start for this group. However, because the list may
be extended when there are very many forward references (usually the result
of a replicated inner group), we must use an offset rather than an absolute
address. Note that (?# comments are dealt with at the top of the loop;
they do not get this far. */
case CHAR_LEFT_PARENTHESIS:
ptr++;
/* First deal with comments. Putting this code right at the start ensures
that comments have no bad side effects. */
if (ptr[0] == CHAR_QUESTION_MARK && ptr[1] == CHAR_NUMBER_SIGN)
{
ptr += 2;
while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
if (*ptr != CHAR_RIGHT_PARENTHESIS)
{
*errorcodeptr = ERR18;
goto FAILED;
}
continue;
}
/* Now deal with various "verbs" that can be introduced by '*'. */
/* Deal with various "verbs" that can be introduced by '*'. */
if (ptr[0] == CHAR_ASTERISK && (ptr[1] == ':'
|| (MAX_255(ptr[1]) && ((cb->ctypes[ptr[1]] & ctype_letter) != 0))))

8
testdata/testinput2 vendored
View File

@ -4689,4 +4689,12 @@ a)"xI
/a[b[:punct:]]/bincode
/L(?#(|++<!(2)?/B
/L(?#(|++<!(2)?/B,no_auto_possess
/L(?#(|++<!(2)?/B,auto_callout
/L(?#(|++<!(2)?/B,no_auto_possess,auto_callout
# End of testinput2

4
testdata/testinput5 vendored
View File

@ -1712,4 +1712,8 @@
/[[:^ascii:]a]/utf,ucp,bincode
/L(?#(|++<!(2)?/B,utf,no_auto_possess,auto_callout
/L(?#(|++<!(2)?/B,utf,ucp,auto_callout
# End of testinput5

36
testdata/testoutput2 vendored
View File

@ -14906,4 +14906,40 @@ Subject length lower bound = 0
End
------------------------------------------------------------------
/L(?#(|++<!(2)?/B
------------------------------------------------------------------
Bra
L?+
Ket
End
------------------------------------------------------------------
/L(?#(|++<!(2)?/B,no_auto_possess
------------------------------------------------------------------
Bra
L?
Ket
End
------------------------------------------------------------------
/L(?#(|++<!(2)?/B,auto_callout
------------------------------------------------------------------
Bra
Callout 255 0 14
L?+
Callout 255 14 0
Ket
End
------------------------------------------------------------------
/L(?#(|++<!(2)?/B,no_auto_possess,auto_callout
------------------------------------------------------------------
Bra
Callout 255 0 14
L?
Callout 255 14 0
Ket
End
------------------------------------------------------------------
# End of testinput2

20
testdata/testoutput5 vendored
View File

@ -4146,4 +4146,24 @@ No match
End
------------------------------------------------------------------
/L(?#(|++<!(2)?/B,utf,no_auto_possess,auto_callout
------------------------------------------------------------------
Bra
Callout 255 0 14
L?
Callout 255 14 0
Ket
End
------------------------------------------------------------------
/L(?#(|++<!(2)?/B,utf,ucp,auto_callout
------------------------------------------------------------------
Bra
Callout 255 0 14
L?+
Callout 255 14 0
Ket
End
------------------------------------------------------------------
# End of testinput5