Small optimizations in pcre2_study.c
This commit is contained in:
parent
47c21334cf
commit
7bf5d9efd0
|
@ -282,6 +282,8 @@ a factor of the size of the compiling workspace (it currently is).
|
||||||
81. Check for integer overflow in minimum length calculation and cap it at
|
81. Check for integer overflow in minimum length calculation and cap it at
|
||||||
65535.
|
65535.
|
||||||
|
|
||||||
|
82. Small optimizations in code for finding the minimum matching length.
|
||||||
|
|
||||||
|
|
||||||
Version 10.20 30-June-2015
|
Version 10.20 30-June-2015
|
||||||
--------------------------
|
--------------------------
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2API 3 "05 November 2015" "PCRE2 10.21"
|
.TH PCRE2API 3 "10 November 2015" "PCRE2 10.21"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.sp
|
.sp
|
||||||
|
@ -1684,8 +1684,11 @@ value, 0 is returned.
|
||||||
.sp
|
.sp
|
||||||
PCRE2_INFO_MATCHEMPTY
|
PCRE2_INFO_MATCHEMPTY
|
||||||
.sp
|
.sp
|
||||||
Return 1 if the pattern can match an empty string, otherwise 0. The third
|
Return 1 if the pattern might match an empty string, otherwise 0. The third
|
||||||
argument should point to an \fBuint32_t\fP variable.
|
argument should point to an \fBuint32_t\fP variable. When a pattern contains
|
||||||
|
recursive subroutine calls it is not always possible to determine whether or
|
||||||
|
not it can match an empty string. PCRE2 takes a cautious approach and returns 1
|
||||||
|
in such cases.
|
||||||
.sp
|
.sp
|
||||||
PCRE2_INFO_MATCHLIMIT
|
PCRE2_INFO_MATCHLIMIT
|
||||||
.sp
|
.sp
|
||||||
|
@ -3084,6 +3087,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 05 November 2015
|
Last updated: 10 November 2015
|
||||||
Copyright (c) 1997-2015 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -104,18 +104,21 @@ recurse_check this_recurse;
|
||||||
register int branchlength = 0;
|
register int branchlength = 0;
|
||||||
register PCRE2_UCHAR *cc = (PCRE2_UCHAR *)code + 1 + LINK_SIZE;
|
register PCRE2_UCHAR *cc = (PCRE2_UCHAR *)code + 1 + LINK_SIZE;
|
||||||
|
|
||||||
|
/* If this is a "could be empty" group, its minimum length is 0. */
|
||||||
|
|
||||||
|
if (*code >= OP_SBRA && *code <= OP_SCOND) return 0;
|
||||||
|
|
||||||
|
/* Skip over capturing bracket number */
|
||||||
|
|
||||||
|
if (*code == OP_CBRA || *code == OP_CBRAPOS) cc += IMM2_SIZE;
|
||||||
|
|
||||||
/* A large and/or complex regex can take too long to process. */
|
/* A large and/or complex regex can take too long to process. */
|
||||||
|
|
||||||
if ((*countptr)++ > 1000) return -1;
|
if ((*countptr)++ > 1000) return -1;
|
||||||
|
|
||||||
/* Skip over capturing bracket number */
|
/* Scan along the opcodes for this branch. If we get to the end of the branch,
|
||||||
|
check the length against that of the other branches. If the accumulated length
|
||||||
if (*code == OP_CBRA || *code == OP_SCBRA ||
|
passes 16-bits, stop. */
|
||||||
*code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += IMM2_SIZE;
|
|
||||||
|
|
||||||
/* Scan along the opcodes for this branch. If we get to the end of the
|
|
||||||
branch, check the length against that of the other branches. If the accumulated
|
|
||||||
length passes 16-bits, stop and return it. */
|
|
||||||
|
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
|
@ -1543,23 +1546,27 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0 &&
|
||||||
if (rc == SSB_DONE) re->flags |= PCRE2_FIRSTMAPSET;
|
if (rc == SSB_DONE) re->flags |= PCRE2_FIRSTMAPSET;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Find the minimum length of subject string. */
|
/* Find the minimum length of subject string. If it can match an empty string,
|
||||||
|
the minimum length is already known. */
|
||||||
|
|
||||||
switch(min = find_minlength(re, code, code, utf, NULL, &count))
|
if ((re->flags & PCRE2_MATCH_EMPTY) == 0)
|
||||||
{
|
{
|
||||||
case -1: /* \C in UTF mode or (*ACCEPT) or over-complex regex */
|
switch(min = find_minlength(re, code, code, utf, NULL, &count))
|
||||||
break; /* Leave minlength unchanged (will be zero) */
|
{
|
||||||
|
case -1: /* \C in UTF mode or (*ACCEPT) or over-complex regex */
|
||||||
|
break; /* Leave minlength unchanged (will be zero) */
|
||||||
|
|
||||||
case -2:
|
case -2:
|
||||||
return 2; /* missing capturing bracket */
|
return 2; /* missing capturing bracket */
|
||||||
|
|
||||||
case -3:
|
case -3:
|
||||||
return 3; /* unrecognized opcode */
|
return 3; /* unrecognized opcode */
|
||||||
|
|
||||||
default:
|
default:
|
||||||
if (min > UINT16_MAX) min = UINT16_MAX;
|
if (min > UINT16_MAX) min = UINT16_MAX;
|
||||||
re->minlength = min;
|
re->minlength = min;
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -252,7 +252,7 @@ Failed: error -52: nested recursion at the same subject position
|
||||||
/(a|(?R))/I
|
/(a|(?R))/I
|
||||||
Capturing subpattern count = 1
|
Capturing subpattern count = 1
|
||||||
May match empty string
|
May match empty string
|
||||||
Subject length lower bound = 1
|
Subject length lower bound = 0
|
||||||
abcd
|
abcd
|
||||||
0: a
|
0: a
|
||||||
1: a
|
1: a
|
||||||
|
@ -262,7 +262,7 @@ Failed: error -52: nested recursion at the same subject position
|
||||||
/(ab|(bc|(de|(?R))))/I
|
/(ab|(bc|(de|(?R))))/I
|
||||||
Capturing subpattern count = 3
|
Capturing subpattern count = 3
|
||||||
May match empty string
|
May match empty string
|
||||||
Subject length lower bound = 2
|
Subject length lower bound = 0
|
||||||
abcd
|
abcd
|
||||||
0: ab
|
0: ab
|
||||||
1: ab
|
1: ab
|
||||||
|
@ -272,7 +272,7 @@ Failed: error -52: nested recursion at the same subject position
|
||||||
/(ab|(bc|(de|(?1))))/I
|
/(ab|(bc|(de|(?1))))/I
|
||||||
Capturing subpattern count = 3
|
Capturing subpattern count = 3
|
||||||
May match empty string
|
May match empty string
|
||||||
Subject length lower bound = 2
|
Subject length lower bound = 0
|
||||||
abcd
|
abcd
|
||||||
0: ab
|
0: ab
|
||||||
1: ab
|
1: ab
|
||||||
|
|
|
@ -416,7 +416,7 @@ Failed: error -46: JIT stack limit reached
|
||||||
/(a|(?R))/I
|
/(a|(?R))/I
|
||||||
Capturing subpattern count = 1
|
Capturing subpattern count = 1
|
||||||
May match empty string
|
May match empty string
|
||||||
Subject length lower bound = 1
|
Subject length lower bound = 0
|
||||||
JIT compilation was successful
|
JIT compilation was successful
|
||||||
abcd
|
abcd
|
||||||
0: a (JIT)
|
0: a (JIT)
|
||||||
|
@ -427,7 +427,7 @@ Failed: error -46: JIT stack limit reached
|
||||||
/(ab|(bc|(de|(?R))))/I
|
/(ab|(bc|(de|(?R))))/I
|
||||||
Capturing subpattern count = 3
|
Capturing subpattern count = 3
|
||||||
May match empty string
|
May match empty string
|
||||||
Subject length lower bound = 2
|
Subject length lower bound = 0
|
||||||
JIT compilation was successful
|
JIT compilation was successful
|
||||||
abcd
|
abcd
|
||||||
0: ab (JIT)
|
0: ab (JIT)
|
||||||
|
@ -438,7 +438,7 @@ Failed: error -46: JIT stack limit reached
|
||||||
/(ab|(bc|(de|(?1))))/I
|
/(ab|(bc|(de|(?1))))/I
|
||||||
Capturing subpattern count = 3
|
Capturing subpattern count = 3
|
||||||
May match empty string
|
May match empty string
|
||||||
Subject length lower bound = 2
|
Subject length lower bound = 0
|
||||||
JIT compilation was successful
|
JIT compilation was successful
|
||||||
abcd
|
abcd
|
||||||
0: ab (JIT)
|
0: ab (JIT)
|
||||||
|
|
|
@ -3960,7 +3960,7 @@ Subject length lower bound = 3
|
||||||
Capturing subpattern count = 2
|
Capturing subpattern count = 2
|
||||||
Compile options: <none>
|
Compile options: <none>
|
||||||
Overall options: anchored
|
Overall options: anchored
|
||||||
Subject length lower bound = 3
|
Subject length lower bound = 2
|
||||||
a=a
|
a=a
|
||||||
0: a=a
|
0: a=a
|
||||||
1: a
|
1: a
|
||||||
|
|
Loading…
Reference in New Issue