Fix minimum length bug for patterns containing (*ACCEPT).
This commit is contained in:
parent
3b2fa4dff2
commit
ef79b978a6
|
@ -31,7 +31,13 @@ minimum is potentially useful.
|
||||||
9. Some changes to the way the minimum subject length is handled:
|
9. Some changes to the way the minimum subject length is handled:
|
||||||
|
|
||||||
* When PCRE2_NO_START_OPTIMIZE is set, no minimum length is computed;
|
* When PCRE2_NO_START_OPTIMIZE is set, no minimum length is computed;
|
||||||
pcre2test omits this item instead of showing a value of zero.
|
pcre2test now omits this item instead of showing a value of zero.
|
||||||
|
|
||||||
|
* An incorrect minimum length could be calculated for a pattern that
|
||||||
|
contained (*ACCEPT) inside a qualified group whose minimum repetition was
|
||||||
|
zero, for example /A(?:(*ACCEPT))?B/, which incorrectly computed a minimum
|
||||||
|
of 2. The minimum length scan no longer happens for a pattern that
|
||||||
|
contains (*ACCEPT).
|
||||||
|
|
||||||
* When no minimum length is set by the normal scan, but a first and/or last
|
* When no minimum length is set by the normal scan, but a first and/or last
|
||||||
code unit is recorded, set the minimum to 1 or 2 as appropriate.
|
code unit is recorded, set the minimum to 1 or 2 as appropriate.
|
||||||
|
|
|
@ -10039,8 +10039,9 @@ re->max_lookbehind = cb.max_lookbehind;
|
||||||
|
|
||||||
if (cb.had_accept)
|
if (cb.had_accept)
|
||||||
{
|
{
|
||||||
reqcu = 0; /* Must disable after (*ACCEPT) */
|
reqcu = 0; /* Must disable after (*ACCEPT) */
|
||||||
reqcuflags = REQ_NONE;
|
reqcuflags = REQ_NONE;
|
||||||
|
re->flags |= PCRE2_HASACCEPT; /* Disables minimum length */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Fill in the final opcode and check for disastrous overflow. If no overflow,
|
/* Fill in the final opcode and check for disastrous overflow. If no overflow,
|
||||||
|
|
|
@ -517,6 +517,7 @@ bytes in a code unit in that mode. */
|
||||||
#define PCRE2_HASBKPORX 0x00100000 /* contains \P, \p, or \X */
|
#define PCRE2_HASBKPORX 0x00100000 /* contains \P, \p, or \X */
|
||||||
#define PCRE2_DUPCAPUSED 0x00200000 /* contains (?| */
|
#define PCRE2_DUPCAPUSED 0x00200000 /* contains (?| */
|
||||||
#define PCRE2_HASBKC 0x00400000 /* contains \C */
|
#define PCRE2_HASBKC 0x00400000 /* contains \C */
|
||||||
|
#define PCRE2_HASACCEPT 0x00800000 /* contains (*ACCEPT) */
|
||||||
|
|
||||||
#define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32)
|
#define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32)
|
||||||
|
|
||||||
|
|
|
@ -1607,13 +1607,13 @@ if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Find the minimum length of subject string. If the pattern can match an empty
|
/* Find the minimum length of subject string. If the pattern can match an empty
|
||||||
string, the minimum length is already known. If there are more back references
|
string, the minimum length is already known. If the pattern contains (*ACCEPT)
|
||||||
than the size of the vector we are going to cache them in, do nothing. A
|
all bets are off. If there are more back references than the size of the vector
|
||||||
pattern that complicated will probably take a long time to analyze and may in
|
we are going to cache them in, do nothing. A pattern that complicated will
|
||||||
any case turn out to be too complicated. Note that back reference minima are
|
probably take a long time to analyze and may in any case turn out to be too
|
||||||
held as 16-bit numbers. */
|
complicated. Note that back reference minima are held as 16-bit numbers. */
|
||||||
|
|
||||||
if ((re->flags & PCRE2_MATCH_EMPTY) == 0 &&
|
if ((re->flags & (PCRE2_MATCH_EMPTY|PCRE2_HASACCEPT)) == 0 &&
|
||||||
re->top_backref <= MAX_CACHE_BACKREF)
|
re->top_backref <= MAX_CACHE_BACKREF)
|
||||||
{
|
{
|
||||||
int backref_cache[MAX_CACHE_BACKREF+1];
|
int backref_cache[MAX_CACHE_BACKREF+1];
|
||||||
|
|
|
@ -5623,4 +5623,6 @@ a)"xI
|
||||||
|
|
||||||
/((?=a))[abcd]/I
|
/((?=a))[abcd]/I
|
||||||
|
|
||||||
|
/A(?:(*ACCEPT))?B/info
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
|
@ -17026,6 +17026,11 @@ Capture group count = 1
|
||||||
First code unit = 'a'
|
First code unit = 'a'
|
||||||
Subject length lower bound = 1
|
Subject length lower bound = 1
|
||||||
|
|
||||||
|
/A(?:(*ACCEPT))?B/info
|
||||||
|
Capture group count = 0
|
||||||
|
First code unit = 'A'
|
||||||
|
Subject length lower bound = 1
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
|
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
|
||||||
Error -62: bad serialized data
|
Error -62: bad serialized data
|
||||||
|
|
Loading…
Reference in New Issue