Ignore {1} quantifiers.

This commit is contained in:
Philip.Hazel 2015-07-17 13:41:09 +00:00
parent 5cd731f314
commit 54f59d3c05
4 changed files with 45 additions and 17 deletions

View File

@ -37,6 +37,9 @@ overflow. This bug was discovered by Karl Skomski with the LLVM fuzzer.
9. The handling of callouts during the pre-pass for named group identification 9. The handling of callouts during the pre-pass for named group identification
has been tightened up. has been tightened up.
10. The quantifier {1} can be ignored, whether greedy, non-greedy, or
possessive. This is a very minor optimization.
Version 10.20 30-June-2015 Version 10.20 30-June-2015
-------------------------- --------------------------

View File

@ -3253,11 +3253,11 @@ for (; ptr < cb->end_pattern; ptr++)
else top_nest->nest_depth = nest_depth; else top_nest->nest_depth = nest_depth;
} }
break; break;
/* Skip over a numerical or string argument for a callout. */ /* Skip over a numerical or string argument for a callout. */
case CHAR_C: case CHAR_C:
ptr += 2; ptr += 2;
if (ptr[1] == CHAR_RIGHT_PARENTHESIS) break; if (ptr[1] == CHAR_RIGHT_PARENTHESIS) break;
if (IS_DIGIT(ptr[1])) if (IS_DIGIT(ptr[1]))
{ {
@ -3265,14 +3265,14 @@ for (; ptr < cb->end_pattern; ptr++)
if (ptr[1] != CHAR_RIGHT_PARENTHESIS) if (ptr[1] != CHAR_RIGHT_PARENTHESIS)
{ {
errorcode = ERR39; errorcode = ERR39;
ptr++; ptr++;
goto FAILED; goto FAILED;
} }
break; break;
} }
/* Handle a string argument */ /* Handle a string argument */
ptr++; ptr++;
delimiter = 0; delimiter = 0;
for (i = 0; PRIV(callout_start_delims)[i] != 0; i++) for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
@ -3302,8 +3302,8 @@ for (; ptr < cb->end_pattern; ptr++)
if (ptr[0] == delimiter && ptr[1] == delimiter) ptr += 2; if (ptr[0] == delimiter && ptr[1] == delimiter) ptr += 2;
} }
while (ptr[0] != delimiter); while (ptr[0] != delimiter);
break; break;
case CHAR_NUMBER_SIGN: case CHAR_NUMBER_SIGN:
ptr += 3; ptr += 3;
while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
@ -4719,6 +4719,10 @@ for (;; ptr++)
} }
else repeat_type = greedy_default; else repeat_type = greedy_default;
/* If the repeat is {1} we can ignore it. */
if (repeat_max == 1 && repeat_min == 1) goto END_REPEAT;
/* If previous was a recursion call, wrap it in atomic brackets so that /* If previous was a recursion call, wrap it in atomic brackets so that
previous becomes the atomic group. All recursions were so wrapped in the previous becomes the atomic group. All recursions were so wrapped in the
past, but it no longer happens for non-repeated recursions. In fact, the past, but it no longer happens for non-repeated recursions. In fact, the
@ -6113,8 +6117,8 @@ for (;; ptr++)
} }
/* During the pre-compile phase, we parse the string and update the /* During the pre-compile phase, we parse the string and update the
length. There is no need to generate any code. (In fact, the string length. There is no need to generate any code. (In fact, the string
has already been parsed in the pre-pass that looks for named has already been parsed in the pre-pass that looks for named
parentheses, but it does no harm to leave this code in.) */ parentheses, but it does no harm to leave this code in.) */
if (lengthptr != NULL) /* Only check the string */ if (lengthptr != NULL) /* Only check the string */

6
testdata/testinput2 vendored
View File

@ -1259,7 +1259,11 @@
/(a(b(?2)c)){0,2}/IB /(a(b(?2)c)){0,2}/IB
/[ab]{1}+/IB /[ab]{1}+/B
/()(?1){1}/B
/()(?1)/B
/((w\/|-|with)*(free|immediate)*.*?shipping\s*[!.-]*)/Ii /((w\/|-|with)*(free|immediate)*.*?shipping\s*[!.-]*)/Ii
Baby Bjorn Active Carrier - With free SHIPPING!! Baby Bjorn Active Carrier - With free SHIPPING!!

27
testdata/testoutput2 vendored
View File

@ -4533,16 +4533,33 @@ Capturing subpattern count = 2
May match empty string May match empty string
Subject length lower bound = 0 Subject length lower bound = 0
/[ab]{1}+/IB /[ab]{1}+/B
------------------------------------------------------------------ ------------------------------------------------------------------
Bra Bra
[ab]{1,1}+ [ab]
Ket
End
------------------------------------------------------------------
/()(?1){1}/B
------------------------------------------------------------------
Bra
CBra 1
Ket
Recurse
Ket
End
------------------------------------------------------------------
/()(?1)/B
------------------------------------------------------------------
Bra
CBra 1
Ket
Recurse
Ket Ket
End End
------------------------------------------------------------------ ------------------------------------------------------------------
Capturing subpattern count = 0
Starting code units: a b
Subject length lower bound = 1
/((w\/|-|with)*(free|immediate)*.*?shipping\s*[!.-]*)/Ii /((w\/|-|with)*(free|immediate)*.*?shipping\s*[!.-]*)/Ii
Capturing subpattern count = 3 Capturing subpattern count = 3