Fix empty \Q\E between an item and a qualifier in auto-callout mode.
This commit is contained in:
parent
1b38451847
commit
12fc152074
|
@ -349,6 +349,10 @@ was set when the pmatch argument was NULL. It now returns REG_INVARG.
|
||||||
|
|
||||||
104. Allow for up to 32-bit numbers in the ordin() function in pcre2grep.
|
104. Allow for up to 32-bit numbers in the ordin() function in pcre2grep.
|
||||||
|
|
||||||
|
105. An empty \Q\E sequence between an item and its qualifier caused
|
||||||
|
pcre2_compile() to misbehave when auto callouts were enabled. This bug
|
||||||
|
was found by the LLVM fuzzer.
|
||||||
|
|
||||||
|
|
||||||
Version 10.20 30-June-2015
|
Version 10.20 30-June-2015
|
||||||
--------------------------
|
--------------------------
|
||||||
|
|
|
@ -3947,8 +3947,16 @@ for (;; ptr++)
|
||||||
last_code = code;
|
last_code = code;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If in \Q...\E, check for the end; if not, we have a literal. If not in
|
/* Before doing anything else we must handle all the special items that do
|
||||||
\Q...\E, an isolated \E is ignored. */
|
nothing, and which may come between an item and its quantifier. Otherwise,
|
||||||
|
when auto-callouts are enabled, a callout gets incorrectly inserted before
|
||||||
|
the quantifier is recognized. After recognizing a "do nothing" item, restart
|
||||||
|
the loop in case another one follows. */
|
||||||
|
|
||||||
|
/* If c is not NULL we are not at the end of the pattern. If it is NULL, we
|
||||||
|
may still be in the pattern with a NULL data item. In these cases, if we are
|
||||||
|
in \Q...\E, check for the \E that ends the literal string; if not, we have a
|
||||||
|
literal character. If not in \Q...\E, an isolated \E is ignored. */
|
||||||
|
|
||||||
if (c != CHAR_NULL || ptr < cb->end_pattern)
|
if (c != CHAR_NULL || ptr < cb->end_pattern)
|
||||||
{
|
{
|
||||||
|
@ -3958,7 +3966,7 @@ for (;; ptr++)
|
||||||
ptr++;
|
ptr++;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
else if (inescq)
|
else if (inescq) /* Literal character */
|
||||||
{
|
{
|
||||||
if (previous_callout != NULL)
|
if (previous_callout != NULL)
|
||||||
{
|
{
|
||||||
|
@ -3973,17 +3981,27 @@ for (;; ptr++)
|
||||||
}
|
}
|
||||||
goto NORMAL_CHAR;
|
goto NORMAL_CHAR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Check for the start of a \Q...\E sequence. We must do this here rather
|
||||||
|
than later in case it is immediately followed by \E, which turns it into a
|
||||||
|
"do nothing" sequence. */
|
||||||
|
|
||||||
|
if (c == CHAR_BACKSLASH && ptr[1] == CHAR_Q)
|
||||||
|
{
|
||||||
|
inescq = TRUE;
|
||||||
|
ptr++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* In extended mode, skip white space and comments. We need a loop in order
|
/* In extended mode, skip white space and #-comments that end at newline. */
|
||||||
to check for more white space and more comments after a comment. */
|
|
||||||
|
|
||||||
if ((options & PCRE2_EXTENDED) != 0)
|
if ((options & PCRE2_EXTENDED) != 0)
|
||||||
{
|
{
|
||||||
for (;;)
|
PCRE2_SPTR wscptr = ptr;
|
||||||
{
|
|
||||||
while (MAX_255(c) && (cb->ctypes[c] & ctype_space) != 0) c = *(++ptr);
|
while (MAX_255(c) && (cb->ctypes[c] & ctype_space) != 0) c = *(++ptr);
|
||||||
if (c != CHAR_NUMBER_SIGN) break;
|
if (c == CHAR_NUMBER_SIGN)
|
||||||
|
{
|
||||||
ptr++;
|
ptr++;
|
||||||
while (*ptr != CHAR_NULL)
|
while (*ptr != CHAR_NULL)
|
||||||
{
|
{
|
||||||
|
@ -3997,13 +4015,19 @@ for (;; ptr++)
|
||||||
if (utf) FORWARDCHAR(ptr);
|
if (utf) FORWARDCHAR(ptr);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
c = *ptr; /* Either NULL or the char after a newline */
|
}
|
||||||
|
|
||||||
|
/* If we skipped any characters, restart the loop. Otherwise, we didn't see
|
||||||
|
a comment. */
|
||||||
|
|
||||||
|
if (ptr > wscptr)
|
||||||
|
{
|
||||||
|
ptr--;
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Skip over (?# comments. We need to do this here because we want to know if
|
/* Skip over (?# comments. */
|
||||||
the next thing is a quantifier, and these comments may come between an item
|
|
||||||
and its quantifier. */
|
|
||||||
|
|
||||||
if (c == CHAR_LEFT_PARENTHESIS && ptr[1] == CHAR_QUESTION_MARK &&
|
if (c == CHAR_LEFT_PARENTHESIS && ptr[1] == CHAR_QUESTION_MARK &&
|
||||||
ptr[2] == CHAR_NUMBER_SIGN)
|
ptr[2] == CHAR_NUMBER_SIGN)
|
||||||
|
@ -4018,7 +4042,8 @@ for (;; ptr++)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* See if the next thing is a quantifier. */
|
/* End of processing "do nothing" items. See if the next thing is a
|
||||||
|
quantifier. */
|
||||||
|
|
||||||
is_quantifier =
|
is_quantifier =
|
||||||
c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK ||
|
c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK ||
|
||||||
|
@ -7133,7 +7158,10 @@ for (;; ptr++)
|
||||||
are negative the reference number. Only back references and those types
|
are negative the reference number. Only back references and those types
|
||||||
that consume a character may be repeated. We can test for values between
|
that consume a character may be repeated. We can test for values between
|
||||||
ESC_b and ESC_Z for the latter; this may have to change if any new ones are
|
ESC_b and ESC_Z for the latter; this may have to change if any new ones are
|
||||||
ever created. */
|
ever created.
|
||||||
|
|
||||||
|
Note: \Q and \E are handled at the start of the character-processing loop,
|
||||||
|
not here. */
|
||||||
|
|
||||||
case CHAR_BACKSLASH:
|
case CHAR_BACKSLASH:
|
||||||
tempptr = ptr;
|
tempptr = ptr;
|
||||||
|
@ -7145,16 +7173,6 @@ for (;; ptr++)
|
||||||
c = ec;
|
c = ec;
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (escape == ESC_Q) /* Handle start of quoted string */
|
|
||||||
{
|
|
||||||
if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
|
|
||||||
ptr += 2; /* avoid empty string */
|
|
||||||
else inescq = TRUE;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (escape == ESC_E) continue; /* Perl ignores an orphan \E */
|
|
||||||
|
|
||||||
/* For metasequences that actually match a character, we disable the
|
/* For metasequences that actually match a character, we disable the
|
||||||
setting of a first character if it hasn't already been set. */
|
setting of a first character if it hasn't already been set. */
|
||||||
|
|
||||||
|
|
|
@ -4699,4 +4699,7 @@ a)"xI
|
||||||
|
|
||||||
/(A*)\E+/B,auto_callout
|
/(A*)\E+/B,auto_callout
|
||||||
|
|
||||||
|
/()\Q\E*]/B,auto_callout
|
||||||
|
a[bc]d
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
|
@ -14956,4 +14956,27 @@ Subject length lower bound = 0
|
||||||
End
|
End
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
/()\Q\E*]/B,auto_callout
|
||||||
|
------------------------------------------------------------------
|
||||||
|
Bra
|
||||||
|
Callout 255 0 7
|
||||||
|
Brazero
|
||||||
|
SCBra 1
|
||||||
|
Callout 255 1 0
|
||||||
|
KetRmax
|
||||||
|
Callout 255 7 1
|
||||||
|
]
|
||||||
|
Callout 255 8 0
|
||||||
|
Ket
|
||||||
|
End
|
||||||
|
------------------------------------------------------------------
|
||||||
|
a[bc]d
|
||||||
|
--->a[bc]d
|
||||||
|
+0 ^ ()\Q\E*
|
||||||
|
+1 ^ )
|
||||||
|
+7 ^ ]
|
||||||
|
+8 ^^
|
||||||
|
0: ]
|
||||||
|
1:
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
Loading…
Reference in New Issue