Fix auto-anchor bug when .* is inside an assertion.
This commit is contained in:
parent
12a6d697fe
commit
4fd8feaa50
23
ChangeLog
23
ChangeLog
|
@ -29,14 +29,6 @@ some minor bugs and Perl incompatibilities were fixed, including:
|
||||||
existing subpattern.
|
existing subpattern.
|
||||||
(e) A conditional recursion test such as (?(R)...) misbehaved if there was a
|
(e) A conditional recursion test such as (?(R)...) misbehaved if there was a
|
||||||
group whose name began with "R".
|
group whose name began with "R".
|
||||||
(f) The amount of memory needed for a compiled pattern was miscalculated if a
|
|
||||||
lookbehind contained more than one toplevel branch and the first branch
|
|
||||||
was of length zero.
|
|
||||||
(g) In UTF-8 or UTF-16 modes with PCRE2_EXTENDED (/x) set and a non-zero-
|
|
||||||
terminated pattern, if a # comment ran on to the end of the pattern, one
|
|
||||||
or more code units past the end were being read.
|
|
||||||
(h) An unterminated repeat at the end of a non-zero-terminated pattern (e.g.
|
|
||||||
"{2,2") could cause reading beyond the pattern.
|
|
||||||
|
|
||||||
One effect of the refactoring is that some error numbers and messages have
|
One effect of the refactoring is that some error numbers and messages have
|
||||||
changed, and the pattern offset given for compiling errors is not always the
|
changed, and the pattern offset given for compiling errors is not always the
|
||||||
|
@ -61,6 +53,17 @@ Some bugs in the refactored code were subsequently fixed before release:
|
||||||
a lookup outside one of the global tables. A similar bug existed for wide
|
a lookup outside one of the global tables. A similar bug existed for wide
|
||||||
characters in *VERB names.
|
characters in *VERB names.
|
||||||
|
|
||||||
|
(d) The amount of memory needed for a compiled pattern was miscalculated if a
|
||||||
|
lookbehind contained more than one toplevel branch and the first branch
|
||||||
|
was of length zero.
|
||||||
|
|
||||||
|
(e) In UTF-8 or UTF-16 modes with PCRE2_EXTENDED (/x) set and a non-zero-
|
||||||
|
terminated pattern, if a # comment ran on to the end of the pattern, one
|
||||||
|
or more code units past the end were being read.
|
||||||
|
|
||||||
|
(f) An unterminated repeat at the end of a non-zero-terminated pattern (e.g.
|
||||||
|
"{2,2") could cause reading beyond the pattern.
|
||||||
|
|
||||||
4. Back references are now permitted in lookbehind assertions when there are
|
4. Back references are now permitted in lookbehind assertions when there are
|
||||||
no duplicated group numbers (that is, (?| has not been used), and, if the
|
no duplicated group numbers (that is, (?| has not been used), and, if the
|
||||||
reference is by name, there is only one group of that name. The referenced
|
reference is by name, there is only one group of that name. The referenced
|
||||||
|
@ -122,6 +125,10 @@ library containing a test function that can be called by fuzzers to be
|
||||||
compiled. A non-installed binary to run the test function locally, called
|
compiled. A non-installed binary to run the test function locally, called
|
||||||
pcre2fuzzcheck is also compiled.
|
pcre2fuzzcheck is also compiled.
|
||||||
|
|
||||||
|
18. A pattern with PCRE2_DOTALL (/s) set but not PCRE2_NO_DOTSTAR_ANCHOR, and
|
||||||
|
which started with .* inside a positive lookahead was incorrectly being
|
||||||
|
compiled as implicitly anchored.
|
||||||
|
|
||||||
|
|
||||||
Version 10.22 29-July-2016
|
Version 10.22 29-July-2016
|
||||||
--------------------------
|
--------------------------
|
||||||
|
|
|
@ -7634,13 +7634,14 @@ Arguments:
|
||||||
the less precise approach
|
the less precise approach
|
||||||
cb points to the compile data block
|
cb points to the compile data block
|
||||||
atomcount atomic group level
|
atomcount atomic group level
|
||||||
|
inassert TRUE if in an assertion
|
||||||
|
|
||||||
Returns: TRUE or FALSE
|
Returns: TRUE or FALSE
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static BOOL
|
static BOOL
|
||||||
is_anchored(PCRE2_SPTR code, unsigned int bracket_map, compile_block *cb,
|
is_anchored(PCRE2_SPTR code, unsigned int bracket_map, compile_block *cb,
|
||||||
int atomcount)
|
int atomcount, BOOL inassert)
|
||||||
{
|
{
|
||||||
do {
|
do {
|
||||||
PCRE2_SPTR scode = first_significant_code(
|
PCRE2_SPTR scode = first_significant_code(
|
||||||
|
@ -7652,7 +7653,8 @@ do {
|
||||||
if (op == OP_BRA || op == OP_BRAPOS ||
|
if (op == OP_BRA || op == OP_BRAPOS ||
|
||||||
op == OP_SBRA || op == OP_SBRAPOS)
|
op == OP_SBRA || op == OP_SBRAPOS)
|
||||||
{
|
{
|
||||||
if (!is_anchored(scode, bracket_map, cb, atomcount)) return FALSE;
|
if (!is_anchored(scode, bracket_map, cb, atomcount, inassert))
|
||||||
|
return FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Capturing brackets */
|
/* Capturing brackets */
|
||||||
|
@ -7662,33 +7664,44 @@ do {
|
||||||
{
|
{
|
||||||
int n = GET2(scode, 1+LINK_SIZE);
|
int n = GET2(scode, 1+LINK_SIZE);
|
||||||
int new_map = bracket_map | ((n < 32)? (1u << n) : 1);
|
int new_map = bracket_map | ((n < 32)? (1u << n) : 1);
|
||||||
if (!is_anchored(scode, new_map, cb, atomcount)) return FALSE;
|
if (!is_anchored(scode, new_map, cb, atomcount, inassert)) return FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Positive forward assertions and conditions */
|
/* Positive forward assertion */
|
||||||
|
|
||||||
else if (op == OP_ASSERT || op == OP_COND)
|
else if (op == OP_ASSERT)
|
||||||
{
|
{
|
||||||
if (!is_anchored(scode, bracket_map, cb, atomcount)) return FALSE;
|
if (!is_anchored(scode, bracket_map, cb, atomcount, TRUE)) return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Condition */
|
||||||
|
|
||||||
|
else if (op == OP_COND)
|
||||||
|
{
|
||||||
|
if (!is_anchored(scode, bracket_map, cb, atomcount, inassert))
|
||||||
|
return FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Atomic groups */
|
/* Atomic groups */
|
||||||
|
|
||||||
else if (op == OP_ONCE || op == OP_ONCE_NC)
|
else if (op == OP_ONCE || op == OP_ONCE_NC)
|
||||||
{
|
{
|
||||||
if (!is_anchored(scode, bracket_map, cb, atomcount + 1))
|
if (!is_anchored(scode, bracket_map, cb, atomcount + 1, inassert))
|
||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* .* is not anchored unless DOTALL is set (which generates OP_ALLANY) and
|
/* .* is not anchored unless DOTALL is set (which generates OP_ALLANY) and
|
||||||
it isn't in brackets that are or may be referenced or inside an atomic
|
it isn't in brackets that are or may be referenced or inside an atomic
|
||||||
group. There is also an option that disables auto-anchoring. */
|
group or an assertion. Also the pattern must not contain *PRUNE or *SKIP,
|
||||||
|
because these break the feature. Consider, for example, /(?s).*?(*PRUNE)b/
|
||||||
|
with the subject "aab", which matches "b", i.e. not at the start of a line.
|
||||||
|
There is also an option that disables auto-anchoring. */
|
||||||
|
|
||||||
else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR ||
|
else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR ||
|
||||||
op == OP_TYPEPOSSTAR))
|
op == OP_TYPEPOSSTAR))
|
||||||
{
|
{
|
||||||
if (scode[1] != OP_ALLANY || (bracket_map & cb->backref_map) != 0 ||
|
if (scode[1] != OP_ALLANY || (bracket_map & cb->backref_map) != 0 ||
|
||||||
atomcount > 0 || cb->had_pruneorskip ||
|
atomcount > 0 || cb->had_pruneorskip || inassert ||
|
||||||
(cb->external_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)
|
(cb->external_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)
|
||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
|
@ -9423,7 +9436,7 @@ there are no occurrences of *PRUNE or *SKIP (though there is an option to
|
||||||
disable this case). */
|
disable this case). */
|
||||||
|
|
||||||
if ((re->overall_options & PCRE2_ANCHORED) == 0 &&
|
if ((re->overall_options & PCRE2_ANCHORED) == 0 &&
|
||||||
is_anchored(codestart, 0, &cb, 0))
|
is_anchored(codestart, 0, &cb, 0, FALSE))
|
||||||
re->overall_options |= PCRE2_ANCHORED;
|
re->overall_options |= PCRE2_ANCHORED;
|
||||||
|
|
||||||
/* If the pattern is still not anchored and we do not have a first code unit,
|
/* If the pattern is still not anchored and we do not have a first code unit,
|
||||||
|
|
|
@ -5812,4 +5812,7 @@ ef) x/x,mark
|
||||||
/(?=.*X)X$/
|
/(?=.*X)X$/
|
||||||
\ X
|
\ X
|
||||||
|
|
||||||
|
/(?s)(?=.*?)b/
|
||||||
|
aabc
|
||||||
|
|
||||||
# End of testinput1
|
# End of testinput1
|
||||||
|
|
|
@ -9285,4 +9285,8 @@ No match
|
||||||
\ X
|
\ X
|
||||||
0: X
|
0: X
|
||||||
|
|
||||||
|
/(?s)(?=.*?)b/
|
||||||
|
aabc
|
||||||
|
0: b
|
||||||
|
|
||||||
# End of testinput1
|
# End of testinput1
|
||||||
|
|
Loading…
Reference in New Issue