Ignore {1} quantifiers.

This commit is contained in:
Philip.Hazel 2015-07-17 13:41:09 +00:00
parent 5cd731f314
commit 54f59d3c05
4 changed files with 45 additions and 17 deletions

View File

@ -37,6 +37,9 @@ overflow. This bug was discovered by Karl Skomski with the LLVM fuzzer.
9. The handling of callouts during the pre-pass for named group identification
has been tightened up.
10. The quantifier {1} can be ignored, whether greedy, non-greedy, or
possessive. This is a very minor optimization.
Version 10.20 30-June-2015
--------------------------

View File

@ -3253,11 +3253,11 @@ for (; ptr < cb->end_pattern; ptr++)
else top_nest->nest_depth = nest_depth;
}
break;
/* Skip over a numerical or string argument for a callout. */
case CHAR_C:
ptr += 2;
ptr += 2;
if (ptr[1] == CHAR_RIGHT_PARENTHESIS) break;
if (IS_DIGIT(ptr[1]))
{
@ -3265,14 +3265,14 @@ for (; ptr < cb->end_pattern; ptr++)
if (ptr[1] != CHAR_RIGHT_PARENTHESIS)
{
errorcode = ERR39;
ptr++;
ptr++;
goto FAILED;
}
}
break;
}
}
/* Handle a string argument */
ptr++;
delimiter = 0;
for (i = 0; PRIV(callout_start_delims)[i] != 0; i++)
@ -3302,8 +3302,8 @@ for (; ptr < cb->end_pattern; ptr++)
if (ptr[0] == delimiter && ptr[1] == delimiter) ptr += 2;
}
while (ptr[0] != delimiter);
break;
break;
case CHAR_NUMBER_SIGN:
ptr += 3;
while (ptr < cb->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
@ -4719,6 +4719,10 @@ for (;; ptr++)
}
else repeat_type = greedy_default;
/* If the repeat is {1} we can ignore it. */
if (repeat_max == 1 && repeat_min == 1) goto END_REPEAT;
/* If previous was a recursion call, wrap it in atomic brackets so that
previous becomes the atomic group. All recursions were so wrapped in the
past, but it no longer happens for non-repeated recursions. In fact, the
@ -6113,8 +6117,8 @@ for (;; ptr++)
}
/* During the pre-compile phase, we parse the string and update the
length. There is no need to generate any code. (In fact, the string
has already been parsed in the pre-pass that looks for named
length. There is no need to generate any code. (In fact, the string
has already been parsed in the pre-pass that looks for named
parentheses, but it does no harm to leave this code in.) */
if (lengthptr != NULL) /* Only check the string */

6
testdata/testinput2 vendored
View File

@ -1259,7 +1259,11 @@
/(a(b(?2)c)){0,2}/IB
/[ab]{1}+/IB
/[ab]{1}+/B
/()(?1){1}/B
/()(?1)/B
/((w\/|-|with)*(free|immediate)*.*?shipping\s*[!.-]*)/Ii
Baby Bjorn Active Carrier - With free SHIPPING!!

27
testdata/testoutput2 vendored
View File

@ -4533,16 +4533,33 @@ Capturing subpattern count = 2
May match empty string
Subject length lower bound = 0
/[ab]{1}+/IB
/[ab]{1}+/B
------------------------------------------------------------------
Bra
[ab]{1,1}+
[ab]
Ket
End
------------------------------------------------------------------
/()(?1){1}/B
------------------------------------------------------------------
Bra
CBra 1
Ket
Recurse
Ket
End
------------------------------------------------------------------
/()(?1)/B
------------------------------------------------------------------
Bra
CBra 1
Ket
Recurse
Ket
End
------------------------------------------------------------------
Capturing subpattern count = 0
Starting code units: a b
Subject length lower bound = 1
/((w\/|-|with)*(free|immediate)*.*?shipping\s*[!.-]*)/Ii
Capturing subpattern count = 3