Small optimizations in pcre2_study.c

This commit is contained in:
Philip.Hazel 2015-11-11 09:42:26 +00:00
parent 47c21334cf
commit 7bf5d9efd0
6 changed files with 47 additions and 35 deletions

View File

@ -282,6 +282,8 @@ a factor of the size of the compiling workspace (it currently is).
81. Check for integer overflow in minimum length calculation and cap it at
65535.
82. Small optimizations in code for finding the minimum matching length.
Version 10.20 30-June-2015
--------------------------

View File

@ -1,4 +1,4 @@
.TH PCRE2API 3 "05 November 2015" "PCRE2 10.21"
.TH PCRE2API 3 "10 November 2015" "PCRE2 10.21"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.sp
@ -1684,8 +1684,11 @@ value, 0 is returned.
.sp
PCRE2_INFO_MATCHEMPTY
.sp
Return 1 if the pattern can match an empty string, otherwise 0. The third
argument should point to an \fBuint32_t\fP variable.
Return 1 if the pattern might match an empty string, otherwise 0. The third
argument should point to an \fBuint32_t\fP variable. When a pattern contains
recursive subroutine calls it is not always possible to determine whether or
not it can match an empty string. PCRE2 takes a cautious approach and returns 1
in such cases.
.sp
PCRE2_INFO_MATCHLIMIT
.sp
@ -3084,6 +3087,6 @@ Cambridge, England.
.rs
.sp
.nf
Last updated: 05 November 2015
Last updated: 10 November 2015
Copyright (c) 1997-2015 University of Cambridge.
.fi

View File

@ -104,18 +104,21 @@ recurse_check this_recurse;
register int branchlength = 0;
register PCRE2_UCHAR *cc = (PCRE2_UCHAR *)code + 1 + LINK_SIZE;
/* If this is a "could be empty" group, its minimum length is 0. */
if (*code >= OP_SBRA && *code <= OP_SCOND) return 0;
/* Skip over capturing bracket number */
if (*code == OP_CBRA || *code == OP_CBRAPOS) cc += IMM2_SIZE;
/* A large and/or complex regex can take too long to process. */
if ((*countptr)++ > 1000) return -1;
/* Skip over capturing bracket number */
if (*code == OP_CBRA || *code == OP_SCBRA ||
*code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += IMM2_SIZE;
/* Scan along the opcodes for this branch. If we get to the end of the
branch, check the length against that of the other branches. If the accumulated
length passes 16-bits, stop and return it. */
/* Scan along the opcodes for this branch. If we get to the end of the branch,
check the length against that of the other branches. If the accumulated length
passes 16-bits, stop. */
for (;;)
{
@ -1543,24 +1546,28 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0 &&
if (rc == SSB_DONE) re->flags |= PCRE2_FIRSTMAPSET;
}
/* Find the minimum length of subject string. */
/* Find the minimum length of subject string. If it can match an empty string,
the minimum length is already known. */
switch(min = find_minlength(re, code, code, utf, NULL, &count))
if ((re->flags & PCRE2_MATCH_EMPTY) == 0)
{
case -1: /* \C in UTF mode or (*ACCEPT) or over-complex regex */
break; /* Leave minlength unchanged (will be zero) */
case -2:
return 2; /* missing capturing bracket */
case -3:
return 3; /* unrecognized opcode */
default:
if (min > UINT16_MAX) min = UINT16_MAX;
re->minlength = min;
break;
}
switch(min = find_minlength(re, code, code, utf, NULL, &count))
{
case -1: /* \C in UTF mode or (*ACCEPT) or over-complex regex */
break; /* Leave minlength unchanged (will be zero) */
case -2:
return 2; /* missing capturing bracket */
case -3:
return 3; /* unrecognized opcode */
default:
if (min > UINT16_MAX) min = UINT16_MAX;
re->minlength = min;
break;
}
}
return 0;
}

View File

@ -252,7 +252,7 @@ Failed: error -52: nested recursion at the same subject position
/(a|(?R))/I
Capturing subpattern count = 1
May match empty string
Subject length lower bound = 1
Subject length lower bound = 0
abcd
0: a
1: a
@ -262,7 +262,7 @@ Failed: error -52: nested recursion at the same subject position
/(ab|(bc|(de|(?R))))/I
Capturing subpattern count = 3
May match empty string
Subject length lower bound = 2
Subject length lower bound = 0
abcd
0: ab
1: ab
@ -272,7 +272,7 @@ Failed: error -52: nested recursion at the same subject position
/(ab|(bc|(de|(?1))))/I
Capturing subpattern count = 3
May match empty string
Subject length lower bound = 2
Subject length lower bound = 0
abcd
0: ab
1: ab

View File

@ -416,7 +416,7 @@ Failed: error -46: JIT stack limit reached
/(a|(?R))/I
Capturing subpattern count = 1
May match empty string
Subject length lower bound = 1
Subject length lower bound = 0
JIT compilation was successful
abcd
0: a (JIT)
@ -427,7 +427,7 @@ Failed: error -46: JIT stack limit reached
/(ab|(bc|(de|(?R))))/I
Capturing subpattern count = 3
May match empty string
Subject length lower bound = 2
Subject length lower bound = 0
JIT compilation was successful
abcd
0: ab (JIT)
@ -438,7 +438,7 @@ Failed: error -46: JIT stack limit reached
/(ab|(bc|(de|(?1))))/I
Capturing subpattern count = 3
May match empty string
Subject length lower bound = 2
Subject length lower bound = 0
JIT compilation was successful
abcd
0: ab (JIT)

View File

@ -3960,7 +3960,7 @@ Subject length lower bound = 3
Capturing subpattern count = 2
Compile options: <none>
Overall options: anchored
Subject length lower bound = 3
Subject length lower bound = 2
a=a
0: a=a
1: a