Fix auto-callout with (?# comment bug.

This commit is contained in:
Philip.Hazel 2015-11-25 18:46:35 +00:00
parent 5075e74d8a
commit 2eb24e2dac
6 changed files with 113 additions and 39 deletions

View File

@ -330,6 +330,10 @@ and could fail to match.
98. In pcre2test, make the "startoffset" modifier a synonym of "offset", 98. In pcre2test, make the "startoffset" modifier a synonym of "offset",
because it sets the "startoffset" parameter for pcre2_match(). because it sets the "startoffset" parameter for pcre2_match().
99. If PCRE2_AUTO_CALLOUT was set on a pattern that had a (?# comment between
an item and its qualifier (for example, A(?#comment)?B) pcre2_compile()
misbehaved.
Version 10.20 30-June-2015 Version 10.20 30-June-2015
-------------------------- --------------------------

View File

@ -4001,31 +4001,47 @@ for (;; ptr++)
} }
} }
/* Skip over (?# comments. We need to do this here because we want to know if
the next thing is a quantifier, and these comments may come between an item
and its quantifier. */
if (c == CHAR_LEFT_PARENTHESIS && ptr[1] == CHAR_QUESTION_MARK &&
ptr[2] == CHAR_NUMBER_SIGN)
{
ptr += 3;
while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
if (*ptr != CHAR_RIGHT_PARENTHESIS)
{
*errorcodeptr = ERR18;
goto FAILED;
}
continue;
}
/* See if the next thing is a quantifier. */ /* See if the next thing is a quantifier. */
is_quantifier = is_quantifier =
c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK || c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK ||
(c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1)); (c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1));
/* Fill in length of a previous callout, except when the next thing is a /* Fill in length of a previous callout and create an auto callout if
quantifier or when processing a property substitution string in UCP mode. */ required, except when the next thing is a quantifier or when processing a
property substitution string for \w etc in UCP mode. */
if (!is_quantifier && previous_callout != NULL && cb->nestptr[0] == NULL && if (!is_quantifier && cb->nestptr[0] == NULL)
after_manual_callout-- <= 0)
{ {
if (lengthptr == NULL) /* Don't attempt in pre-compile phase */ if (previous_callout != NULL && after_manual_callout-- <= 0)
complete_callout(previous_callout, ptr, cb); {
previous_callout = NULL; if (lengthptr == NULL) /* Don't attempt in pre-compile phase */
} complete_callout(previous_callout, ptr, cb);
previous_callout = NULL;
}
/* Create auto callout, except for quantifiers, or while processing property if ((options & PCRE2_AUTO_CALLOUT) != 0)
strings that are substituted for \w etc in UCP mode. */ {
previous_callout = code;
if ((options & PCRE2_AUTO_CALLOUT) != 0 && !is_quantifier && code = auto_callout(code, ptr, cb);
cb->nestptr[0] == NULL) }
{
previous_callout = code;
code = auto_callout(code, ptr, cb);
} }
/* Process the next pattern item. */ /* Process the next pattern item. */
@ -5742,33 +5758,19 @@ for (;; ptr++)
/* ===================================================================*/ /* ===================================================================*/
/* Start of nested parenthesized sub-expression, or comment or lookahead or /* Start of nested parenthesized sub-expression, or lookahead or lookbehind
lookbehind or option setting or condition or all the other extended or option setting or condition or all the other extended parenthesis forms.
parenthesis forms. We must save the current high-water-mark for the We must save the current high-water-mark for the forward reference list so
forward reference list so that we know where they start for this group. that we know where they start for this group. However, because the list may
However, because the list may be extended when there are very many forward be extended when there are very many forward references (usually the result
references (usually the result of a replicated inner group), we must use of a replicated inner group), we must use an offset rather than an absolute
an offset rather than an absolute address. */ address. Note that (?# comments are dealt with at the top of the loop;
they do not get this far. */
case CHAR_LEFT_PARENTHESIS: case CHAR_LEFT_PARENTHESIS:
ptr++; ptr++;
/* First deal with comments. Putting this code right at the start ensures /* Deal with various "verbs" that can be introduced by '*'. */
that comments have no bad side effects. */
if (ptr[0] == CHAR_QUESTION_MARK && ptr[1] == CHAR_NUMBER_SIGN)
{
ptr += 2;
while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
if (*ptr != CHAR_RIGHT_PARENTHESIS)
{
*errorcodeptr = ERR18;
goto FAILED;
}
continue;
}
/* Now deal with various "verbs" that can be introduced by '*'. */
if (ptr[0] == CHAR_ASTERISK && (ptr[1] == ':' if (ptr[0] == CHAR_ASTERISK && (ptr[1] == ':'
|| (MAX_255(ptr[1]) && ((cb->ctypes[ptr[1]] & ctype_letter) != 0)))) || (MAX_255(ptr[1]) && ((cb->ctypes[ptr[1]] & ctype_letter) != 0))))

8
testdata/testinput2 vendored
View File

@ -4689,4 +4689,12 @@ a)"xI
/a[b[:punct:]]/bincode /a[b[:punct:]]/bincode
/L(?#(|++<!(2)?/B
/L(?#(|++<!(2)?/B,no_auto_possess
/L(?#(|++<!(2)?/B,auto_callout
/L(?#(|++<!(2)?/B,no_auto_possess,auto_callout
# End of testinput2 # End of testinput2

4
testdata/testinput5 vendored
View File

@ -1712,4 +1712,8 @@
/[[:^ascii:]a]/utf,ucp,bincode /[[:^ascii:]a]/utf,ucp,bincode
/L(?#(|++<!(2)?/B,utf,no_auto_possess,auto_callout
/L(?#(|++<!(2)?/B,utf,ucp,auto_callout
# End of testinput5 # End of testinput5

36
testdata/testoutput2 vendored
View File

@ -14906,4 +14906,40 @@ Subject length lower bound = 0
End End
------------------------------------------------------------------ ------------------------------------------------------------------
/L(?#(|++<!(2)?/B
------------------------------------------------------------------
Bra
L?+
Ket
End
------------------------------------------------------------------
/L(?#(|++<!(2)?/B,no_auto_possess
------------------------------------------------------------------
Bra
L?
Ket
End
------------------------------------------------------------------
/L(?#(|++<!(2)?/B,auto_callout
------------------------------------------------------------------
Bra
Callout 255 0 14
L?+
Callout 255 14 0
Ket
End
------------------------------------------------------------------
/L(?#(|++<!(2)?/B,no_auto_possess,auto_callout
------------------------------------------------------------------
Bra
Callout 255 0 14
L?
Callout 255 14 0
Ket
End
------------------------------------------------------------------
# End of testinput2 # End of testinput2

20
testdata/testoutput5 vendored
View File

@ -4146,4 +4146,24 @@ No match
End End
------------------------------------------------------------------ ------------------------------------------------------------------
/L(?#(|++<!(2)?/B,utf,no_auto_possess,auto_callout
------------------------------------------------------------------
Bra
Callout 255 0 14
L?
Callout 255 14 0
Ket
End
------------------------------------------------------------------
/L(?#(|++<!(2)?/B,utf,ucp,auto_callout
------------------------------------------------------------------
Bra
Callout 255 0 14
L?+
Callout 255 14 0
Ket
End
------------------------------------------------------------------
# End of testinput5 # End of testinput5