Another extension to minimum length calculation.
This commit is contained in:
parent
ead78198d1
commit
1ebc2c50cc
|
@ -42,6 +42,10 @@ minimum is potentially useful.
|
||||||
such references as not adding to the minimum length (which it should have
|
such references as not adding to the minimum length (which it should have
|
||||||
done all along).
|
done all along).
|
||||||
|
|
||||||
|
* Furthermore, the above action now happens only if the back reference is to
|
||||||
|
a group that exists more than once in a pattern instead of any back
|
||||||
|
reference in a pattern with duplicate numbers.
|
||||||
|
|
||||||
10. A (*MARK) value inside a successful condition was not being returned by the
|
10. A (*MARK) value inside a successful condition was not being returned by the
|
||||||
interpretive matcher (it was returned by JIT). This bug has been mended.
|
interpretive matcher (it was returned by JIT). This bug has been mended.
|
||||||
|
|
||||||
|
|
|
@ -134,7 +134,7 @@ for (;;)
|
||||||
int d, min, recno;
|
int d, min, recno;
|
||||||
PCRE2_UCHAR *cs, *ce;
|
PCRE2_UCHAR *cs, *ce;
|
||||||
PCRE2_UCHAR op = *cc;
|
PCRE2_UCHAR op = *cc;
|
||||||
|
|
||||||
if (branchlength >= UINT16_MAX) return UINT16_MAX;
|
if (branchlength >= UINT16_MAX) return UINT16_MAX;
|
||||||
|
|
||||||
switch (op)
|
switch (op)
|
||||||
|
@ -448,11 +448,13 @@ for (;;)
|
||||||
|
|
||||||
If PCRE2_MATCH_UNSET_BACKREF is set, a backreference to an unset bracket
|
If PCRE2_MATCH_UNSET_BACKREF is set, a backreference to an unset bracket
|
||||||
matches an empty string (by default it causes a matching failure), so in
|
matches an empty string (by default it causes a matching failure), so in
|
||||||
that case we must set the minimum length to zero. */
|
that case we must set the minimum length to zero.
|
||||||
|
|
||||||
/* Duplicate named pattern back reference. We cannot reliably find a length
|
For backreferenes, if duplicate numbers are present in the pattern we check
|
||||||
for this if duplicate numbers are present in the pattern, so we set the
|
for a reference to a duplicate. If it is, we don't know which version will
|
||||||
length to zero here also. */
|
be referenced, so we have to set the minimum length to zero. */
|
||||||
|
|
||||||
|
/* Duplicate named pattern back reference. */
|
||||||
|
|
||||||
case OP_DNREF:
|
case OP_DNREF:
|
||||||
case OP_DNREFI:
|
case OP_DNREFI:
|
||||||
|
@ -479,30 +481,34 @@ for (;;)
|
||||||
ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, recno);
|
ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, recno);
|
||||||
if (cs == NULL) return -2;
|
if (cs == NULL) return -2;
|
||||||
do ce += GET(ce, 1); while (*ce == OP_ALT);
|
do ce += GET(ce, 1); while (*ce == OP_ALT);
|
||||||
if (cc > cs && cc < ce) /* Simple recursion */
|
|
||||||
|
dd = 0;
|
||||||
|
if (!dupcapused ||
|
||||||
|
(PCRE2_UCHAR *)PRIV(find_bracket)(ce, utf, recno) == NULL)
|
||||||
{
|
{
|
||||||
dd = 0;
|
if (cc > cs && cc < ce) /* Simple recursion */
|
||||||
had_recurse = TRUE;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
recurse_check *r = recurses;
|
|
||||||
for (r = recurses; r != NULL; r = r->prev)
|
|
||||||
if (r->group == cs) break;
|
|
||||||
if (r != NULL) /* Mutual recursion */
|
|
||||||
{
|
{
|
||||||
dd = 0;
|
|
||||||
had_recurse = TRUE;
|
had_recurse = TRUE;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
this_recurse.prev = recurses;
|
recurse_check *r = recurses;
|
||||||
this_recurse.group = cs;
|
for (r = recurses; r != NULL; r = r->prev)
|
||||||
dd = find_minlength(re, cs, startcode, utf, &this_recurse,
|
if (r->group == cs) break;
|
||||||
countptr, backref_cache);
|
if (r != NULL) /* Mutual recursion */
|
||||||
if (dd < 0) return dd;
|
{
|
||||||
|
had_recurse = TRUE;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
this_recurse.prev = recurses; /* No recursion */
|
||||||
|
this_recurse.group = cs;
|
||||||
|
dd = find_minlength(re, cs, startcode, utf, &this_recurse,
|
||||||
|
countptr, backref_cache);
|
||||||
|
if (dd < 0) return dd;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
backref_cache[recno] = dd;
|
backref_cache[recno] = dd;
|
||||||
for (i = backref_cache[0] + 1; i < recno; i++) backref_cache[i] = -1;
|
for (i = backref_cache[0] + 1; i < recno; i++) backref_cache[i] = -1;
|
||||||
|
@ -518,8 +524,8 @@ for (;;)
|
||||||
cc += 1 + 2*IMM2_SIZE;
|
cc += 1 + 2*IMM2_SIZE;
|
||||||
goto REPEAT_BACK_REFERENCE;
|
goto REPEAT_BACK_REFERENCE;
|
||||||
|
|
||||||
/* Single back reference. We cannot find a length for this if duplicate
|
/* Single back reference by number. References by name are converted to by
|
||||||
numbers are present in the pattern. */
|
number when there is no duplication. */
|
||||||
|
|
||||||
case OP_REF:
|
case OP_REF:
|
||||||
case OP_REFI:
|
case OP_REFI:
|
||||||
|
@ -529,36 +535,40 @@ for (;;)
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
if (!dupcapused && (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
|
d = 0;
|
||||||
|
|
||||||
|
if ((re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
|
||||||
{
|
{
|
||||||
ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, recno);
|
ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, recno);
|
||||||
if (cs == NULL) return -2;
|
if (cs == NULL) return -2;
|
||||||
do ce += GET(ce, 1); while (*ce == OP_ALT);
|
do ce += GET(ce, 1); while (*ce == OP_ALT);
|
||||||
if (cc > cs && cc < ce) /* Simple recursion */
|
|
||||||
|
if (!dupcapused ||
|
||||||
|
(PCRE2_UCHAR *)PRIV(find_bracket)(ce, utf, recno) == NULL)
|
||||||
{
|
{
|
||||||
d = 0;
|
if (cc > cs && cc < ce) /* Simple recursion */
|
||||||
had_recurse = TRUE;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
recurse_check *r = recurses;
|
|
||||||
for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
|
|
||||||
if (r != NULL) /* Mutual recursion */
|
|
||||||
{
|
{
|
||||||
d = 0;
|
|
||||||
had_recurse = TRUE;
|
had_recurse = TRUE;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
this_recurse.prev = recurses;
|
recurse_check *r = recurses;
|
||||||
this_recurse.group = cs;
|
for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
|
||||||
d = find_minlength(re, cs, startcode, utf, &this_recurse, countptr,
|
if (r != NULL) /* Mutual recursion */
|
||||||
backref_cache);
|
{
|
||||||
if (d < 0) return d;
|
had_recurse = TRUE;
|
||||||
|
}
|
||||||
|
else /* No recursion */
|
||||||
|
{
|
||||||
|
this_recurse.prev = recurses;
|
||||||
|
this_recurse.group = cs;
|
||||||
|
d = find_minlength(re, cs, startcode, utf, &this_recurse, countptr,
|
||||||
|
backref_cache);
|
||||||
|
if (d < 0) return d;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else d = 0;
|
|
||||||
|
|
||||||
backref_cache[recno] = d;
|
backref_cache[recno] = d;
|
||||||
for (i = backref_cache[0] + 1; i < recno; i++) backref_cache[i] = -1;
|
for (i = backref_cache[0] + 1; i < recno; i++) backref_cache[i] = -1;
|
||||||
|
|
|
@ -557,4 +557,6 @@
|
||||||
|
|
||||||
# -------------------------------------
|
# -------------------------------------
|
||||||
|
|
||||||
|
/(*UTF)(?=\x{123})/I
|
||||||
|
|
||||||
# End of testinput10
|
# End of testinput10
|
||||||
|
|
|
@ -447,4 +447,6 @@
|
||||||
|
|
||||||
# ----------------------------------------------------
|
# ----------------------------------------------------
|
||||||
|
|
||||||
|
/(*UTF)(?=\x{123})/I
|
||||||
|
|
||||||
# End of testinput12
|
# End of testinput12
|
||||||
|
|
|
@ -5607,4 +5607,20 @@ a)"xI
|
||||||
|
|
||||||
/(*:\Q \E){5}/alt_verbnames
|
/(*:\Q \E){5}/alt_verbnames
|
||||||
|
|
||||||
|
/(?=abc)/I
|
||||||
|
|
||||||
|
/(?|(X)|(XY))\1abc/I
|
||||||
|
|
||||||
|
/(?|(a)|(bcde))(c)\2/I
|
||||||
|
|
||||||
|
/(?|(a)|(bcde))(c)\1/I
|
||||||
|
|
||||||
|
/(?|(?'A'a)|(?'A'bcde))(?'B'c)\k'B'(?'A')/I,dupnames
|
||||||
|
|
||||||
|
/(?|(?'A'a)|(?'A'bcde))(?'B'c)\k'A'(?'A')/I,dupnames
|
||||||
|
|
||||||
|
/((a|)+)+Z/I
|
||||||
|
|
||||||
|
/((?=a))[abcd]/I
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
|
@ -1757,4 +1757,13 @@ No match
|
||||||
|
|
||||||
# -------------------------------------
|
# -------------------------------------
|
||||||
|
|
||||||
|
/(*UTF)(?=\x{123})/I
|
||||||
|
Capture group count = 0
|
||||||
|
May match empty string
|
||||||
|
Compile options: <none>
|
||||||
|
Overall options: utf
|
||||||
|
First code unit = \xc4
|
||||||
|
Last code unit = \xa3
|
||||||
|
Subject length lower bound = 1
|
||||||
|
|
||||||
# End of testinput10
|
# End of testinput10
|
||||||
|
|
|
@ -1579,4 +1579,12 @@ No match
|
||||||
|
|
||||||
# ----------------------------------------------------
|
# ----------------------------------------------------
|
||||||
|
|
||||||
|
/(*UTF)(?=\x{123})/I
|
||||||
|
Capture group count = 0
|
||||||
|
May match empty string
|
||||||
|
Compile options: <none>
|
||||||
|
Overall options: utf
|
||||||
|
First code unit = \x{123}
|
||||||
|
Subject length lower bound = 1
|
||||||
|
|
||||||
# End of testinput12
|
# End of testinput12
|
||||||
|
|
|
@ -1577,4 +1577,12 @@ No match
|
||||||
|
|
||||||
# ----------------------------------------------------
|
# ----------------------------------------------------
|
||||||
|
|
||||||
|
/(*UTF)(?=\x{123})/I
|
||||||
|
Capture group count = 0
|
||||||
|
May match empty string
|
||||||
|
Compile options: <none>
|
||||||
|
Overall options: utf
|
||||||
|
First code unit = \x{123}
|
||||||
|
Subject length lower bound = 1
|
||||||
|
|
||||||
# End of testinput12
|
# End of testinput12
|
||||||
|
|
|
@ -16963,6 +16963,69 @@ Failed: error 109 at offset 5: quantifier does not follow a repeatable item
|
||||||
/(*:\Q \E){5}/alt_verbnames
|
/(*:\Q \E){5}/alt_verbnames
|
||||||
Failed: error 109 at offset 11: quantifier does not follow a repeatable item
|
Failed: error 109 at offset 11: quantifier does not follow a repeatable item
|
||||||
|
|
||||||
|
/(?=abc)/I
|
||||||
|
Capture group count = 0
|
||||||
|
May match empty string
|
||||||
|
First code unit = 'a'
|
||||||
|
Last code unit = 'c'
|
||||||
|
Subject length lower bound = 2
|
||||||
|
|
||||||
|
/(?|(X)|(XY))\1abc/I
|
||||||
|
Capture group count = 1
|
||||||
|
Max back reference = 1
|
||||||
|
First code unit = 'X'
|
||||||
|
Last code unit = 'c'
|
||||||
|
Subject length lower bound = 4
|
||||||
|
|
||||||
|
/(?|(a)|(bcde))(c)\2/I
|
||||||
|
Capture group count = 2
|
||||||
|
Max back reference = 2
|
||||||
|
Starting code units: a b
|
||||||
|
Last code unit = 'c'
|
||||||
|
Subject length lower bound = 3
|
||||||
|
|
||||||
|
/(?|(a)|(bcde))(c)\1/I
|
||||||
|
Capture group count = 2
|
||||||
|
Max back reference = 1
|
||||||
|
Starting code units: a b
|
||||||
|
Last code unit = 'c'
|
||||||
|
Subject length lower bound = 2
|
||||||
|
|
||||||
|
/(?|(?'A'a)|(?'A'bcde))(?'B'c)\k'B'(?'A')/I,dupnames
|
||||||
|
Capture group count = 3
|
||||||
|
Max back reference = 2
|
||||||
|
Named capture groups:
|
||||||
|
A 1
|
||||||
|
A 3
|
||||||
|
B 2
|
||||||
|
Options: dupnames
|
||||||
|
Starting code units: a b
|
||||||
|
Last code unit = 'c'
|
||||||
|
Subject length lower bound = 3
|
||||||
|
|
||||||
|
/(?|(?'A'a)|(?'A'bcde))(?'B'c)\k'A'(?'A')/I,dupnames
|
||||||
|
Capture group count = 3
|
||||||
|
Max back reference = 3
|
||||||
|
Named capture groups:
|
||||||
|
A 1
|
||||||
|
A 3
|
||||||
|
B 2
|
||||||
|
Options: dupnames
|
||||||
|
Starting code units: a b
|
||||||
|
Last code unit = 'c'
|
||||||
|
Subject length lower bound = 2
|
||||||
|
|
||||||
|
/((a|)+)+Z/I
|
||||||
|
Capture group count = 2
|
||||||
|
Starting code units: Z a
|
||||||
|
Last code unit = 'Z'
|
||||||
|
Subject length lower bound = 1
|
||||||
|
|
||||||
|
/((?=a))[abcd]/I
|
||||||
|
Capture group count = 1
|
||||||
|
First code unit = 'a'
|
||||||
|
Subject length lower bound = 1
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
|
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
|
||||||
Error -62: bad serialized data
|
Error -62: bad serialized data
|
||||||
|
|
Loading…
Reference in New Issue