Small optimizations in pcre2_study.c
This commit is contained in:
parent
47c21334cf
commit
7bf5d9efd0
|
@ -282,6 +282,8 @@ a factor of the size of the compiling workspace (it currently is).
|
|||
81. Check for integer overflow in minimum length calculation and cap it at
|
||||
65535.
|
||||
|
||||
82. Small optimizations in code for finding the minimum matching length.
|
||||
|
||||
|
||||
Version 10.20 30-June-2015
|
||||
--------------------------
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2API 3 "05 November 2015" "PCRE2 10.21"
|
||||
.TH PCRE2API 3 "10 November 2015" "PCRE2 10.21"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.sp
|
||||
|
@ -1684,8 +1684,11 @@ value, 0 is returned.
|
|||
.sp
|
||||
PCRE2_INFO_MATCHEMPTY
|
||||
.sp
|
||||
Return 1 if the pattern can match an empty string, otherwise 0. The third
|
||||
argument should point to an \fBuint32_t\fP variable.
|
||||
Return 1 if the pattern might match an empty string, otherwise 0. The third
|
||||
argument should point to an \fBuint32_t\fP variable. When a pattern contains
|
||||
recursive subroutine calls it is not always possible to determine whether or
|
||||
not it can match an empty string. PCRE2 takes a cautious approach and returns 1
|
||||
in such cases.
|
||||
.sp
|
||||
PCRE2_INFO_MATCHLIMIT
|
||||
.sp
|
||||
|
@ -3084,6 +3087,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 05 November 2015
|
||||
Last updated: 10 November 2015
|
||||
Copyright (c) 1997-2015 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -104,18 +104,21 @@ recurse_check this_recurse;
|
|||
register int branchlength = 0;
|
||||
register PCRE2_UCHAR *cc = (PCRE2_UCHAR *)code + 1 + LINK_SIZE;
|
||||
|
||||
/* If this is a "could be empty" group, its minimum length is 0. */
|
||||
|
||||
if (*code >= OP_SBRA && *code <= OP_SCOND) return 0;
|
||||
|
||||
/* Skip over capturing bracket number */
|
||||
|
||||
if (*code == OP_CBRA || *code == OP_CBRAPOS) cc += IMM2_SIZE;
|
||||
|
||||
/* A large and/or complex regex can take too long to process. */
|
||||
|
||||
if ((*countptr)++ > 1000) return -1;
|
||||
|
||||
/* Skip over capturing bracket number */
|
||||
|
||||
if (*code == OP_CBRA || *code == OP_SCBRA ||
|
||||
*code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += IMM2_SIZE;
|
||||
|
||||
/* Scan along the opcodes for this branch. If we get to the end of the
|
||||
branch, check the length against that of the other branches. If the accumulated
|
||||
length passes 16-bits, stop and return it. */
|
||||
/* Scan along the opcodes for this branch. If we get to the end of the branch,
|
||||
check the length against that of the other branches. If the accumulated length
|
||||
passes 16-bits, stop. */
|
||||
|
||||
for (;;)
|
||||
{
|
||||
|
@ -1543,23 +1546,27 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0 &&
|
|||
if (rc == SSB_DONE) re->flags |= PCRE2_FIRSTMAPSET;
|
||||
}
|
||||
|
||||
/* Find the minimum length of subject string. */
|
||||
/* Find the minimum length of subject string. If it can match an empty string,
|
||||
the minimum length is already known. */
|
||||
|
||||
switch(min = find_minlength(re, code, code, utf, NULL, &count))
|
||||
if ((re->flags & PCRE2_MATCH_EMPTY) == 0)
|
||||
{
|
||||
case -1: /* \C in UTF mode or (*ACCEPT) or over-complex regex */
|
||||
break; /* Leave minlength unchanged (will be zero) */
|
||||
switch(min = find_minlength(re, code, code, utf, NULL, &count))
|
||||
{
|
||||
case -1: /* \C in UTF mode or (*ACCEPT) or over-complex regex */
|
||||
break; /* Leave minlength unchanged (will be zero) */
|
||||
|
||||
case -2:
|
||||
return 2; /* missing capturing bracket */
|
||||
case -2:
|
||||
return 2; /* missing capturing bracket */
|
||||
|
||||
case -3:
|
||||
return 3; /* unrecognized opcode */
|
||||
case -3:
|
||||
return 3; /* unrecognized opcode */
|
||||
|
||||
default:
|
||||
if (min > UINT16_MAX) min = UINT16_MAX;
|
||||
re->minlength = min;
|
||||
break;
|
||||
default:
|
||||
if (min > UINT16_MAX) min = UINT16_MAX;
|
||||
re->minlength = min;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -252,7 +252,7 @@ Failed: error -52: nested recursion at the same subject position
|
|||
/(a|(?R))/I
|
||||
Capturing subpattern count = 1
|
||||
May match empty string
|
||||
Subject length lower bound = 1
|
||||
Subject length lower bound = 0
|
||||
abcd
|
||||
0: a
|
||||
1: a
|
||||
|
@ -262,7 +262,7 @@ Failed: error -52: nested recursion at the same subject position
|
|||
/(ab|(bc|(de|(?R))))/I
|
||||
Capturing subpattern count = 3
|
||||
May match empty string
|
||||
Subject length lower bound = 2
|
||||
Subject length lower bound = 0
|
||||
abcd
|
||||
0: ab
|
||||
1: ab
|
||||
|
@ -272,7 +272,7 @@ Failed: error -52: nested recursion at the same subject position
|
|||
/(ab|(bc|(de|(?1))))/I
|
||||
Capturing subpattern count = 3
|
||||
May match empty string
|
||||
Subject length lower bound = 2
|
||||
Subject length lower bound = 0
|
||||
abcd
|
||||
0: ab
|
||||
1: ab
|
||||
|
|
|
@ -416,7 +416,7 @@ Failed: error -46: JIT stack limit reached
|
|||
/(a|(?R))/I
|
||||
Capturing subpattern count = 1
|
||||
May match empty string
|
||||
Subject length lower bound = 1
|
||||
Subject length lower bound = 0
|
||||
JIT compilation was successful
|
||||
abcd
|
||||
0: a (JIT)
|
||||
|
@ -427,7 +427,7 @@ Failed: error -46: JIT stack limit reached
|
|||
/(ab|(bc|(de|(?R))))/I
|
||||
Capturing subpattern count = 3
|
||||
May match empty string
|
||||
Subject length lower bound = 2
|
||||
Subject length lower bound = 0
|
||||
JIT compilation was successful
|
||||
abcd
|
||||
0: ab (JIT)
|
||||
|
@ -438,7 +438,7 @@ Failed: error -46: JIT stack limit reached
|
|||
/(ab|(bc|(de|(?1))))/I
|
||||
Capturing subpattern count = 3
|
||||
May match empty string
|
||||
Subject length lower bound = 2
|
||||
Subject length lower bound = 0
|
||||
JIT compilation was successful
|
||||
abcd
|
||||
0: ab (JIT)
|
||||
|
|
|
@ -3960,7 +3960,7 @@ Subject length lower bound = 3
|
|||
Capturing subpattern count = 2
|
||||
Compile options: <none>
|
||||
Overall options: anchored
|
||||
Subject length lower bound = 3
|
||||
Subject length lower bound = 2
|
||||
a=a
|
||||
0: a=a
|
||||
1: a
|
||||
|
|
Loading…
Reference in New Issue