Minor code and comment tidies.

This commit is contained in:
Philip.Hazel 2019-06-19 16:39:18 +00:00
parent da5155fed3
commit 9c53b6b11a
1 changed files with 20 additions and 15 deletions

View File

@ -88,6 +88,9 @@ Arguments:
countptr pointer to call count (to catch over complexity) countptr pointer to call count (to catch over complexity)
backref_cache vector for caching back references. backref_cache vector for caching back references.
This function is no longer called when the pattern contains (*ACCEPT); however,
the old code for returning -1 is retained, just in case.
Returns: the minimum length Returns: the minimum length
-1 \C in UTF-8 mode -1 \C in UTF-8 mode
or (*ACCEPT) or (*ACCEPT)
@ -205,7 +208,9 @@ for (;;)
cc += 1 + LINK_SIZE; cc += 1 + LINK_SIZE;
break; break;
/* ACCEPT makes things far too complicated; we have to give up. */ /* ACCEPT makes things far too complicated; we have to give up. In fact,
from 10.34 onwards, if a pattern contains (*ACCEPT), this function is not
used. However, leave the code in place, just in case. */
case OP_ACCEPT: case OP_ACCEPT:
case OP_ASSERT_ACCEPT: case OP_ASSERT_ACCEPT:
@ -1585,7 +1590,6 @@ Returns: 0 normally; non-zero should never normally occur
int int
PRIV(study)(pcre2_real_code *re) PRIV(study)(pcre2_real_code *re)
{ {
int min;
int count = 0; int count = 0;
PCRE2_UCHAR *code; PCRE2_UCHAR *code;
BOOL utf = (re->overall_options & PCRE2_UTF) != 0; BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
@ -1608,20 +1612,22 @@ if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0)
/* Find the minimum length of subject string. If the pattern can match an empty /* Find the minimum length of subject string. If the pattern can match an empty
string, the minimum length is already known. If the pattern contains (*ACCEPT) string, the minimum length is already known. If the pattern contains (*ACCEPT)
all bets are off. If there are more back references than the size of the vector all bets are off, and we don't even try to find a minimum length. If there are
we are going to cache them in, do nothing. A pattern that complicated will more back references than the size of the vector we are going to cache them in,
probably take a long time to analyze and may in any case turn out to be too do nothing. A pattern that complicated will probably take a long time to
complicated. Note that back reference minima are held as 16-bit numbers. */ analyze and may in any case turn out to be too complicated. Note that back
reference minima are held as 16-bit numbers. */
if ((re->flags & (PCRE2_MATCH_EMPTY|PCRE2_HASACCEPT)) == 0 && if ((re->flags & (PCRE2_MATCH_EMPTY|PCRE2_HASACCEPT)) == 0 &&
re->top_backref <= MAX_CACHE_BACKREF) re->top_backref <= MAX_CACHE_BACKREF)
{ {
int min;
int backref_cache[MAX_CACHE_BACKREF+1]; int backref_cache[MAX_CACHE_BACKREF+1];
backref_cache[0] = 0; /* Highest one that is set */ backref_cache[0] = 0; /* Highest one that is set */
min = find_minlength(re, code, code, utf, NULL, &count, backref_cache); min = find_minlength(re, code, code, utf, NULL, &count, backref_cache);
switch(min) switch(min)
{ {
case -1: /* \C in UTF mode or (*ACCEPT) or over-complex regex */ case -1: /* \C in UTF mode or over-complex regex */
break; /* Leave minlength unchanged (will be zero) */ break; /* Leave minlength unchanged (will be zero) */
case -2: case -2:
@ -1631,8 +1637,7 @@ if ((re->flags & (PCRE2_MATCH_EMPTY|PCRE2_HASACCEPT)) == 0 &&
return 3; /* unrecognized opcode */ return 3; /* unrecognized opcode */
default: default:
if (min > UINT16_MAX) min = UINT16_MAX; re->minlength = (min > UINT16_MAX)? UINT16_MAX : min;
re->minlength = min;
break; break;
} }
} }