Fix integer overflow for patterns whose minimum matching length is very, very

large.
This commit is contained in:
Philip.Hazel 2015-10-09 16:54:29 +00:00
parent 093412143d
commit 5923caf05e
4 changed files with 22 additions and 3 deletions

View File

@ -198,6 +198,9 @@ and the matching functions with NULL contexts can be tested.
("not a word character") and a property escape were present, the property ("not a word character") and a property escape were present, the property
escape was being ignored. escape was being ignored.
57. Fixed integer overflow for patterns whose minimum matching length is very,
very large.
Version 10.20 30-June-2015 Version 10.20 30-June-2015
-------------------------- --------------------------

View File

@ -65,8 +65,11 @@ enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN };
/* Scan a parenthesized group and compute the minimum length of subject that /* Scan a parenthesized group and compute the minimum length of subject that
is needed to match it. This is a lower bound; it does not mean there is a is needed to match it. This is a lower bound; it does not mean there is a
string of that length that matches. In UTF8 mode, the result is in characters string of that length that matches. In UTF mode, the result is in characters
rather than bytes. rather than code units. The field in a compiled pattern for storing the minimum
length is 16-bits long (on the grounds that anything longer than that is
pathological), so we give up when we reach that amount. This also means that
integer overflow for really crazy patterns cannot happen.
Arguments: Arguments:
re compiled pattern block re compiled pattern block
@ -111,7 +114,8 @@ if (*code == OP_CBRA || *code == OP_SCBRA ||
*code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += IMM2_SIZE; *code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += IMM2_SIZE;
/* Scan along the opcodes for this branch. If we get to the end of the /* Scan along the opcodes for this branch. If we get to the end of the
branch, check the length against that of the other branches. */ branch, check the length against that of the other branches. If the accumulated
length passes 16-bits, stop and return it. */
for (;;) for (;;)
{ {
@ -119,6 +123,8 @@ for (;;)
PCRE2_UCHAR *cs, *ce; PCRE2_UCHAR *cs, *ce;
register PCRE2_UCHAR op = *cc; register PCRE2_UCHAR op = *cc;
if (branchlength > UINT16_MAX) return branchlength;
switch (op) switch (op)
{ {
case OP_COND: case OP_COND:

2
testdata/testinput2 vendored
View File

@ -4590,4 +4590,6 @@ B)x/alt_verbnames,mark
/(aa)(BB)/substitute_extended,replace=\U$1\L$2\E$1..\U$1\l$2$1 /(aa)(BB)/substitute_extended,replace=\U$1\L$2\E$1..\U$1\l$2$1
aaBB aaBB
/^(o(\1{72}{\"{\\{00000059079}\d*){74}}){19}/I
# End of testinput2 # End of testinput2

View File

@ -14741,4 +14741,12 @@ Failed: error -55 at offset 3 in replacement: requested value is not set
aaBB aaBB
1: AAbbaa..AAbBaa 1: AAbbaa..AAbBaa
/^(o(\1{72}{\"{\\{00000059079}\d*){74}}){19}/I
Capturing subpattern count = 2
Max back reference = 1
Compile options: <none>
Overall options: anchored
Last code unit = '}'
Subject length lower bound = 65535
# End of testinput2 # End of testinput2