From 5923caf05eff3bee6c1ac03d4fd61770d72a2dc0 Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Fri, 9 Oct 2015 16:54:29 +0000 Subject: [PATCH] Fix integer overflow for patterns whose minimum matching length is very, very large. --- ChangeLog | 3 +++ src/pcre2_study.c | 12 +++++++++--- testdata/testinput2 | 2 ++ testdata/testoutput2 | 8 ++++++++ 4 files changed, 22 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8cd9fb3..64614a6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -198,6 +198,9 @@ and the matching functions with NULL contexts can be tested. ("not a word character") and a property escape were present, the property escape was being ignored. +57. Fixed integer overflow for patterns whose minimum matching length is very, +very large. + Version 10.20 30-June-2015 -------------------------- diff --git a/src/pcre2_study.c b/src/pcre2_study.c index c2b0083..4fae517 100644 --- a/src/pcre2_study.c +++ b/src/pcre2_study.c @@ -65,8 +65,11 @@ enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN }; /* Scan a parenthesized group and compute the minimum length of subject that is needed to match it. This is a lower bound; it does not mean there is a -string of that length that matches. In UTF8 mode, the result is in characters -rather than bytes. +string of that length that matches. In UTF mode, the result is in characters +rather than code units. The field in a compiled pattern for storing the minimum +length is 16-bits long (on the grounds that anything longer than that is +pathological), so we give up when we reach that amount. This also means that +integer overflow for really crazy patterns cannot happen. Arguments: re compiled pattern block @@ -111,7 +114,8 @@ if (*code == OP_CBRA || *code == OP_SCBRA || *code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += IMM2_SIZE; /* Scan along the opcodes for this branch. If we get to the end of the -branch, check the length against that of the other branches. */ +branch, check the length against that of the other branches. If the accumulated +length passes 16-bits, stop and return it. */ for (;;) { @@ -119,6 +123,8 @@ for (;;) PCRE2_UCHAR *cs, *ce; register PCRE2_UCHAR op = *cc; + if (branchlength > UINT16_MAX) return branchlength; + switch (op) { case OP_COND: diff --git a/testdata/testinput2 b/testdata/testinput2 index 7dffc99..33f370f 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4590,4 +4590,6 @@ B)x/alt_verbnames,mark /(aa)(BB)/substitute_extended,replace=\U$1\L$2\E$1..\U$1\l$2$1 aaBB +/^(o(\1{72}{\"{\\{00000059079}\d*){74}}){19}/I + # End of testinput2 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index b854798..71b5189 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -14741,4 +14741,12 @@ Failed: error -55 at offset 3 in replacement: requested value is not set aaBB 1: AAbbaa..AAbBaa +/^(o(\1{72}{\"{\\{00000059079}\d*){74}}){19}/I +Capturing subpattern count = 2 +Max back reference = 1 +Compile options: +Overall options: anchored +Last code unit = '}' +Subject length lower bound = 65535 + # End of testinput2