Small optimizations in pcre2_study.c

This commit is contained in:
Philip.Hazel 2015-11-11 09:42:26 +00:00
parent 47c21334cf
commit 7bf5d9efd0
6 changed files with 47 additions and 35 deletions

View File

@ -282,6 +282,8 @@ a factor of the size of the compiling workspace (it currently is).
81. Check for integer overflow in minimum length calculation and cap it at 81. Check for integer overflow in minimum length calculation and cap it at
65535. 65535.
82. Small optimizations in code for finding the minimum matching length.
Version 10.20 30-June-2015 Version 10.20 30-June-2015
-------------------------- --------------------------

View File

@ -1,4 +1,4 @@
.TH PCRE2API 3 "05 November 2015" "PCRE2 10.21" .TH PCRE2API 3 "10 November 2015" "PCRE2 10.21"
.SH NAME .SH NAME
PCRE2 - Perl-compatible regular expressions (revised API) PCRE2 - Perl-compatible regular expressions (revised API)
.sp .sp
@ -1684,8 +1684,11 @@ value, 0 is returned.
.sp .sp
PCRE2_INFO_MATCHEMPTY PCRE2_INFO_MATCHEMPTY
.sp .sp
Return 1 if the pattern can match an empty string, otherwise 0. The third Return 1 if the pattern might match an empty string, otherwise 0. The third
argument should point to an \fBuint32_t\fP variable. argument should point to an \fBuint32_t\fP variable. When a pattern contains
recursive subroutine calls it is not always possible to determine whether or
not it can match an empty string. PCRE2 takes a cautious approach and returns 1
in such cases.
.sp .sp
PCRE2_INFO_MATCHLIMIT PCRE2_INFO_MATCHLIMIT
.sp .sp
@ -3084,6 +3087,6 @@ Cambridge, England.
.rs .rs
.sp .sp
.nf .nf
Last updated: 05 November 2015 Last updated: 10 November 2015
Copyright (c) 1997-2015 University of Cambridge. Copyright (c) 1997-2015 University of Cambridge.
.fi .fi

View File

@ -104,18 +104,21 @@ recurse_check this_recurse;
register int branchlength = 0; register int branchlength = 0;
register PCRE2_UCHAR *cc = (PCRE2_UCHAR *)code + 1 + LINK_SIZE; register PCRE2_UCHAR *cc = (PCRE2_UCHAR *)code + 1 + LINK_SIZE;
/* If this is a "could be empty" group, its minimum length is 0. */
if (*code >= OP_SBRA && *code <= OP_SCOND) return 0;
/* Skip over capturing bracket number */
if (*code == OP_CBRA || *code == OP_CBRAPOS) cc += IMM2_SIZE;
/* A large and/or complex regex can take too long to process. */ /* A large and/or complex regex can take too long to process. */
if ((*countptr)++ > 1000) return -1; if ((*countptr)++ > 1000) return -1;
/* Skip over capturing bracket number */ /* Scan along the opcodes for this branch. If we get to the end of the branch,
check the length against that of the other branches. If the accumulated length
if (*code == OP_CBRA || *code == OP_SCBRA || passes 16-bits, stop. */
*code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += IMM2_SIZE;
/* Scan along the opcodes for this branch. If we get to the end of the
branch, check the length against that of the other branches. If the accumulated
length passes 16-bits, stop and return it. */
for (;;) for (;;)
{ {
@ -1543,24 +1546,28 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0 &&
if (rc == SSB_DONE) re->flags |= PCRE2_FIRSTMAPSET; if (rc == SSB_DONE) re->flags |= PCRE2_FIRSTMAPSET;
} }
/* Find the minimum length of subject string. */ /* Find the minimum length of subject string. If it can match an empty string,
the minimum length is already known. */
switch(min = find_minlength(re, code, code, utf, NULL, &count)) if ((re->flags & PCRE2_MATCH_EMPTY) == 0)
{ {
case -1: /* \C in UTF mode or (*ACCEPT) or over-complex regex */ switch(min = find_minlength(re, code, code, utf, NULL, &count))
break; /* Leave minlength unchanged (will be zero) */ {
case -1: /* \C in UTF mode or (*ACCEPT) or over-complex regex */
case -2: break; /* Leave minlength unchanged (will be zero) */
return 2; /* missing capturing bracket */
case -2:
case -3: return 2; /* missing capturing bracket */
return 3; /* unrecognized opcode */
case -3:
default: return 3; /* unrecognized opcode */
if (min > UINT16_MAX) min = UINT16_MAX;
re->minlength = min; default:
break; if (min > UINT16_MAX) min = UINT16_MAX;
} re->minlength = min;
break;
}
}
return 0; return 0;
} }

View File

@ -252,7 +252,7 @@ Failed: error -52: nested recursion at the same subject position
/(a|(?R))/I /(a|(?R))/I
Capturing subpattern count = 1 Capturing subpattern count = 1
May match empty string May match empty string
Subject length lower bound = 1 Subject length lower bound = 0
abcd abcd
0: a 0: a
1: a 1: a
@ -262,7 +262,7 @@ Failed: error -52: nested recursion at the same subject position
/(ab|(bc|(de|(?R))))/I /(ab|(bc|(de|(?R))))/I
Capturing subpattern count = 3 Capturing subpattern count = 3
May match empty string May match empty string
Subject length lower bound = 2 Subject length lower bound = 0
abcd abcd
0: ab 0: ab
1: ab 1: ab
@ -272,7 +272,7 @@ Failed: error -52: nested recursion at the same subject position
/(ab|(bc|(de|(?1))))/I /(ab|(bc|(de|(?1))))/I
Capturing subpattern count = 3 Capturing subpattern count = 3
May match empty string May match empty string
Subject length lower bound = 2 Subject length lower bound = 0
abcd abcd
0: ab 0: ab
1: ab 1: ab

View File

@ -416,7 +416,7 @@ Failed: error -46: JIT stack limit reached
/(a|(?R))/I /(a|(?R))/I
Capturing subpattern count = 1 Capturing subpattern count = 1
May match empty string May match empty string
Subject length lower bound = 1 Subject length lower bound = 0
JIT compilation was successful JIT compilation was successful
abcd abcd
0: a (JIT) 0: a (JIT)
@ -427,7 +427,7 @@ Failed: error -46: JIT stack limit reached
/(ab|(bc|(de|(?R))))/I /(ab|(bc|(de|(?R))))/I
Capturing subpattern count = 3 Capturing subpattern count = 3
May match empty string May match empty string
Subject length lower bound = 2 Subject length lower bound = 0
JIT compilation was successful JIT compilation was successful
abcd abcd
0: ab (JIT) 0: ab (JIT)
@ -438,7 +438,7 @@ Failed: error -46: JIT stack limit reached
/(ab|(bc|(de|(?1))))/I /(ab|(bc|(de|(?1))))/I
Capturing subpattern count = 3 Capturing subpattern count = 3
May match empty string May match empty string
Subject length lower bound = 2 Subject length lower bound = 0
JIT compilation was successful JIT compilation was successful
abcd abcd
0: ab (JIT) 0: ab (JIT)

View File

@ -3960,7 +3960,7 @@ Subject length lower bound = 3
Capturing subpattern count = 2 Capturing subpattern count = 2
Compile options: <none> Compile options: <none>
Overall options: anchored Overall options: anchored
Subject length lower bound = 3 Subject length lower bound = 2
a=a a=a
0: a=a 0: a=a
1: a 1: a