Improve minimum length finder in the presence of back references when there are
multiple groups with the same number.
This commit is contained in:
parent
0d1ab8515f
commit
ead78198d1
|
@ -31,11 +31,17 @@ minimum is potentially useful.
|
||||||
9. Some changes to the way the minimum subject length is handled:
|
9. Some changes to the way the minimum subject length is handled:
|
||||||
|
|
||||||
* When PCRE2_NO_START_OPTIMIZE is set, no minimum length is computed;
|
* When PCRE2_NO_START_OPTIMIZE is set, no minimum length is computed;
|
||||||
pcre2test no longer shows a value (of zero).
|
pcre2test omits this item instead of showing a value of zero.
|
||||||
|
|
||||||
* When no minimum length is set by the normal scan, but a first and/or last
|
* When no minimum length is set by the normal scan, but a first and/or last
|
||||||
code unit is recorded, set the minimum to 1 or 2 as appropriate.
|
code unit is recorded, set the minimum to 1 or 2 as appropriate.
|
||||||
|
|
||||||
|
* When a pattern contains multiple groups with the same number, a back
|
||||||
|
reference cannot know which one to scan for a minimum length. This used to
|
||||||
|
cause the minimum length finder to give up with no result. Now it treats
|
||||||
|
such references as not adding to the minimum length (which it should have
|
||||||
|
done all along).
|
||||||
|
|
||||||
10. A (*MARK) value inside a successful condition was not being returned by the
|
10. A (*MARK) value inside a successful condition was not being returned by the
|
||||||
interpretive matcher (it was returned by JIT). This bug has been mended.
|
interpretive matcher (it was returned by JIT). This bug has been mended.
|
||||||
|
|
||||||
|
|
|
@ -92,7 +92,6 @@ Returns: the minimum length
|
||||||
-1 \C in UTF-8 mode
|
-1 \C in UTF-8 mode
|
||||||
or (*ACCEPT)
|
or (*ACCEPT)
|
||||||
or pattern too complicated
|
or pattern too complicated
|
||||||
or back reference to duplicate name/number
|
|
||||||
-2 internal error (missing capturing bracket)
|
-2 internal error (missing capturing bracket)
|
||||||
-3 internal error (opcode not listed)
|
-3 internal error (opcode not listed)
|
||||||
*/
|
*/
|
||||||
|
@ -135,7 +134,7 @@ for (;;)
|
||||||
int d, min, recno;
|
int d, min, recno;
|
||||||
PCRE2_UCHAR *cs, *ce;
|
PCRE2_UCHAR *cs, *ce;
|
||||||
PCRE2_UCHAR op = *cc;
|
PCRE2_UCHAR op = *cc;
|
||||||
|
|
||||||
if (branchlength >= UINT16_MAX) return UINT16_MAX;
|
if (branchlength >= UINT16_MAX) return UINT16_MAX;
|
||||||
|
|
||||||
switch (op)
|
switch (op)
|
||||||
|
@ -452,12 +451,12 @@ for (;;)
|
||||||
that case we must set the minimum length to zero. */
|
that case we must set the minimum length to zero. */
|
||||||
|
|
||||||
/* Duplicate named pattern back reference. We cannot reliably find a length
|
/* Duplicate named pattern back reference. We cannot reliably find a length
|
||||||
for this if duplicate numbers are present in the pattern. */
|
for this if duplicate numbers are present in the pattern, so we set the
|
||||||
|
length to zero here also. */
|
||||||
|
|
||||||
case OP_DNREF:
|
case OP_DNREF:
|
||||||
case OP_DNREFI:
|
case OP_DNREFI:
|
||||||
if (dupcapused) return -1;
|
if (!dupcapused && (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
|
||||||
if ((re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
|
|
||||||
{
|
{
|
||||||
int count = GET2(cc, 1+IMM2_SIZE);
|
int count = GET2(cc, 1+IMM2_SIZE);
|
||||||
PCRE2_UCHAR *slot =
|
PCRE2_UCHAR *slot =
|
||||||
|
@ -524,14 +523,13 @@ for (;;)
|
||||||
|
|
||||||
case OP_REF:
|
case OP_REF:
|
||||||
case OP_REFI:
|
case OP_REFI:
|
||||||
if (dupcapused) return -1;
|
|
||||||
recno = GET2(cc, 1);
|
recno = GET2(cc, 1);
|
||||||
if (recno <= backref_cache[0] && backref_cache[recno] >= 0)
|
if (recno <= backref_cache[0] && backref_cache[recno] >= 0)
|
||||||
d = backref_cache[recno];
|
d = backref_cache[recno];
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
if ((re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
|
if (!dupcapused && (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
|
||||||
{
|
{
|
||||||
ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, recno);
|
ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, recno);
|
||||||
if (cs == NULL) return -2;
|
if (cs == NULL) return -2;
|
||||||
|
|
|
@ -14607,7 +14607,7 @@ Subject length lower bound = 65535
|
||||||
Capture group count = 1
|
Capture group count = 1
|
||||||
Max back reference = 1
|
Max back reference = 1
|
||||||
Starting code units: a b
|
Starting code units: a b
|
||||||
Subject length lower bound = 0
|
Subject length lower bound = 1
|
||||||
|
|
||||||
/(?|(aaa)|(b))(?1)/I
|
/(?|(aaa)|(b))(?1)/I
|
||||||
Capture group count = 1
|
Capture group count = 1
|
||||||
|
@ -14625,7 +14625,7 @@ Max back reference = 1
|
||||||
Named capture groups:
|
Named capture groups:
|
||||||
a 1
|
a 1
|
||||||
Starting code units: a b
|
Starting code units: a b
|
||||||
Subject length lower bound = 0
|
Subject length lower bound = 1
|
||||||
|
|
||||||
/(?|(?'a'aaa)|(?'a'b))(?'a'cccc)\k'a'/I,dupnames
|
/(?|(?'a'aaa)|(?'a'b))(?'a'cccc)\k'a'/I,dupnames
|
||||||
Capture group count = 2
|
Capture group count = 2
|
||||||
|
@ -14636,7 +14636,7 @@ Named capture groups:
|
||||||
Options: dupnames
|
Options: dupnames
|
||||||
Starting code units: a b
|
Starting code units: a b
|
||||||
Last code unit = 'c'
|
Last code unit = 'c'
|
||||||
Subject length lower bound = 1
|
Subject length lower bound = 5
|
||||||
|
|
||||||
/ab{3cd/
|
/ab{3cd/
|
||||||
ab{3cd
|
ab{3cd
|
||||||
|
|
Loading…
Reference in New Issue