diff --git a/ChangeLog b/ChangeLog index 52f22f7..ea34c78 100644 --- a/ChangeLog +++ b/ChangeLog @@ -31,7 +31,13 @@ minimum is potentially useful. 9. Some changes to the way the minimum subject length is handled: * When PCRE2_NO_START_OPTIMIZE is set, no minimum length is computed; - pcre2test omits this item instead of showing a value of zero. + pcre2test now omits this item instead of showing a value of zero. + + * An incorrect minimum length could be calculated for a pattern that + contained (*ACCEPT) inside a qualified group whose minimum repetition was + zero, for example /A(?:(*ACCEPT))?B/, which incorrectly computed a minimum + of 2. The minimum length scan no longer happens for a pattern that + contains (*ACCEPT). * When no minimum length is set by the normal scan, but a first and/or last code unit is recorded, set the minimum to 1 or 2 as appropriate. diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index bf2403e..7fdcc16 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -10039,8 +10039,9 @@ re->max_lookbehind = cb.max_lookbehind; if (cb.had_accept) { - reqcu = 0; /* Must disable after (*ACCEPT) */ + reqcu = 0; /* Must disable after (*ACCEPT) */ reqcuflags = REQ_NONE; + re->flags |= PCRE2_HASACCEPT; /* Disables minimum length */ } /* Fill in the final opcode and check for disastrous overflow. If no overflow, diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h index 7fd8044..5f06ac4 100644 --- a/src/pcre2_internal.h +++ b/src/pcre2_internal.h @@ -517,6 +517,7 @@ bytes in a code unit in that mode. */ #define PCRE2_HASBKPORX 0x00100000 /* contains \P, \p, or \X */ #define PCRE2_DUPCAPUSED 0x00200000 /* contains (?| */ #define PCRE2_HASBKC 0x00400000 /* contains \C */ +#define PCRE2_HASACCEPT 0x00800000 /* contains (*ACCEPT) */ #define PCRE2_MODE_MASK (PCRE2_MODE8 | PCRE2_MODE16 | PCRE2_MODE32) diff --git a/src/pcre2_study.c b/src/pcre2_study.c index a6790be..88588d9 100644 --- a/src/pcre2_study.c +++ b/src/pcre2_study.c @@ -1607,13 +1607,13 @@ if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0) } /* Find the minimum length of subject string. If the pattern can match an empty -string, the minimum length is already known. If there are more back references -than the size of the vector we are going to cache them in, do nothing. A -pattern that complicated will probably take a long time to analyze and may in -any case turn out to be too complicated. Note that back reference minima are -held as 16-bit numbers. */ +string, the minimum length is already known. If the pattern contains (*ACCEPT) +all bets are off. If there are more back references than the size of the vector +we are going to cache them in, do nothing. A pattern that complicated will +probably take a long time to analyze and may in any case turn out to be too +complicated. Note that back reference minima are held as 16-bit numbers. */ -if ((re->flags & PCRE2_MATCH_EMPTY) == 0 && +if ((re->flags & (PCRE2_MATCH_EMPTY|PCRE2_HASACCEPT)) == 0 && re->top_backref <= MAX_CACHE_BACKREF) { int backref_cache[MAX_CACHE_BACKREF+1]; diff --git a/testdata/testinput2 b/testdata/testinput2 index 3c1e589..2b4ced0 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -5623,4 +5623,6 @@ a)"xI /((?=a))[abcd]/I +/A(?:(*ACCEPT))?B/info + # End of testinput2 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 6a93525..b98e98d 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -17026,6 +17026,11 @@ Capture group count = 1 First code unit = 'a' Subject length lower bound = 1 +/A(?:(*ACCEPT))?B/info +Capture group count = 0 +First code unit = 'A' +Subject length lower bound = 1 + # End of testinput2 Error -70: PCRE2_ERROR_BADDATA (unknown error number) Error -62: bad serialized data