Fix auto-anchor bug when .* is inside an assertion.
This commit is contained in:
parent
12a6d697fe
commit
4fd8feaa50
25
ChangeLog
25
ChangeLog
|
@ -29,15 +29,7 @@ some minor bugs and Perl incompatibilities were fixed, including:
|
|||
existing subpattern.
|
||||
(e) A conditional recursion test such as (?(R)...) misbehaved if there was a
|
||||
group whose name began with "R".
|
||||
(f) The amount of memory needed for a compiled pattern was miscalculated if a
|
||||
lookbehind contained more than one toplevel branch and the first branch
|
||||
was of length zero.
|
||||
(g) In UTF-8 or UTF-16 modes with PCRE2_EXTENDED (/x) set and a non-zero-
|
||||
terminated pattern, if a # comment ran on to the end of the pattern, one
|
||||
or more code units past the end were being read.
|
||||
(h) An unterminated repeat at the end of a non-zero-terminated pattern (e.g.
|
||||
"{2,2") could cause reading beyond the pattern.
|
||||
|
||||
|
||||
One effect of the refactoring is that some error numbers and messages have
|
||||
changed, and the pattern offset given for compiling errors is not always the
|
||||
right-most character that has been read. In particular, for a variable-length
|
||||
|
@ -61,6 +53,17 @@ Some bugs in the refactored code were subsequently fixed before release:
|
|||
a lookup outside one of the global tables. A similar bug existed for wide
|
||||
characters in *VERB names.
|
||||
|
||||
(d) The amount of memory needed for a compiled pattern was miscalculated if a
|
||||
lookbehind contained more than one toplevel branch and the first branch
|
||||
was of length zero.
|
||||
|
||||
(e) In UTF-8 or UTF-16 modes with PCRE2_EXTENDED (/x) set and a non-zero-
|
||||
terminated pattern, if a # comment ran on to the end of the pattern, one
|
||||
or more code units past the end were being read.
|
||||
|
||||
(f) An unterminated repeat at the end of a non-zero-terminated pattern (e.g.
|
||||
"{2,2") could cause reading beyond the pattern.
|
||||
|
||||
4. Back references are now permitted in lookbehind assertions when there are
|
||||
no duplicated group numbers (that is, (?| has not been used), and, if the
|
||||
reference is by name, there is only one group of that name. The referenced
|
||||
|
@ -122,6 +125,10 @@ library containing a test function that can be called by fuzzers to be
|
|||
compiled. A non-installed binary to run the test function locally, called
|
||||
pcre2fuzzcheck is also compiled.
|
||||
|
||||
18. A pattern with PCRE2_DOTALL (/s) set but not PCRE2_NO_DOTSTAR_ANCHOR, and
|
||||
which started with .* inside a positive lookahead was incorrectly being
|
||||
compiled as implicitly anchored.
|
||||
|
||||
|
||||
Version 10.22 29-July-2016
|
||||
--------------------------
|
||||
|
|
|
@ -7634,13 +7634,14 @@ Arguments:
|
|||
the less precise approach
|
||||
cb points to the compile data block
|
||||
atomcount atomic group level
|
||||
inassert TRUE if in an assertion
|
||||
|
||||
Returns: TRUE or FALSE
|
||||
*/
|
||||
|
||||
static BOOL
|
||||
is_anchored(PCRE2_SPTR code, unsigned int bracket_map, compile_block *cb,
|
||||
int atomcount)
|
||||
int atomcount, BOOL inassert)
|
||||
{
|
||||
do {
|
||||
PCRE2_SPTR scode = first_significant_code(
|
||||
|
@ -7652,7 +7653,8 @@ do {
|
|||
if (op == OP_BRA || op == OP_BRAPOS ||
|
||||
op == OP_SBRA || op == OP_SBRAPOS)
|
||||
{
|
||||
if (!is_anchored(scode, bracket_map, cb, atomcount)) return FALSE;
|
||||
if (!is_anchored(scode, bracket_map, cb, atomcount, inassert))
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* Capturing brackets */
|
||||
|
@ -7662,33 +7664,44 @@ do {
|
|||
{
|
||||
int n = GET2(scode, 1+LINK_SIZE);
|
||||
int new_map = bracket_map | ((n < 32)? (1u << n) : 1);
|
||||
if (!is_anchored(scode, new_map, cb, atomcount)) return FALSE;
|
||||
if (!is_anchored(scode, new_map, cb, atomcount, inassert)) return FALSE;
|
||||
}
|
||||
|
||||
/* Positive forward assertions and conditions */
|
||||
/* Positive forward assertion */
|
||||
|
||||
else if (op == OP_ASSERT || op == OP_COND)
|
||||
else if (op == OP_ASSERT)
|
||||
{
|
||||
if (!is_anchored(scode, bracket_map, cb, atomcount)) return FALSE;
|
||||
if (!is_anchored(scode, bracket_map, cb, atomcount, TRUE)) return FALSE;
|
||||
}
|
||||
|
||||
/* Condition */
|
||||
|
||||
else if (op == OP_COND)
|
||||
{
|
||||
if (!is_anchored(scode, bracket_map, cb, atomcount, inassert))
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* Atomic groups */
|
||||
|
||||
else if (op == OP_ONCE || op == OP_ONCE_NC)
|
||||
{
|
||||
if (!is_anchored(scode, bracket_map, cb, atomcount + 1))
|
||||
if (!is_anchored(scode, bracket_map, cb, atomcount + 1, inassert))
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* .* is not anchored unless DOTALL is set (which generates OP_ALLANY) and
|
||||
it isn't in brackets that are or may be referenced or inside an atomic
|
||||
group. There is also an option that disables auto-anchoring. */
|
||||
group or an assertion. Also the pattern must not contain *PRUNE or *SKIP,
|
||||
because these break the feature. Consider, for example, /(?s).*?(*PRUNE)b/
|
||||
with the subject "aab", which matches "b", i.e. not at the start of a line.
|
||||
There is also an option that disables auto-anchoring. */
|
||||
|
||||
else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR ||
|
||||
op == OP_TYPEPOSSTAR))
|
||||
{
|
||||
if (scode[1] != OP_ALLANY || (bracket_map & cb->backref_map) != 0 ||
|
||||
atomcount > 0 || cb->had_pruneorskip ||
|
||||
atomcount > 0 || cb->had_pruneorskip || inassert ||
|
||||
(cb->external_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)
|
||||
return FALSE;
|
||||
}
|
||||
|
@ -9423,7 +9436,7 @@ there are no occurrences of *PRUNE or *SKIP (though there is an option to
|
|||
disable this case). */
|
||||
|
||||
if ((re->overall_options & PCRE2_ANCHORED) == 0 &&
|
||||
is_anchored(codestart, 0, &cb, 0))
|
||||
is_anchored(codestart, 0, &cb, 0, FALSE))
|
||||
re->overall_options |= PCRE2_ANCHORED;
|
||||
|
||||
/* If the pattern is still not anchored and we do not have a first code unit,
|
||||
|
|
|
@ -5812,4 +5812,7 @@ ef) x/x,mark
|
|||
/(?=.*X)X$/
|
||||
\ X
|
||||
|
||||
/(?s)(?=.*?)b/
|
||||
aabc
|
||||
|
||||
# End of testinput1
|
||||
|
|
|
@ -9285,4 +9285,8 @@ No match
|
|||
\ X
|
||||
0: X
|
||||
|
||||
/(?s)(?=.*?)b/
|
||||
aabc
|
||||
0: b
|
||||
|
||||
# End of testinput1
|
||||
|
|
Loading…
Reference in New Issue