From a4ceadbf47beeae8bb34e9cb001cbfba925d007b Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Tue, 24 Mar 2015 10:21:34 +0000 Subject: [PATCH] Fix bugs when (?!) is used as a condition. --- ChangeLog | 17 +++++++++++++---- HACKING | 7 ++++++- src/pcre2_compile.c | 3 ++- src/pcre2_dfa_match.c | 9 +++++---- src/pcre2_match.c | 1 + testdata/testinput2 | 7 +++++++ testdata/testinput6 | 4 ++++ testdata/testoutput2 | 10 ++++++++++ testdata/testoutput6 | 6 ++++++ 9 files changed, 54 insertions(+), 10 deletions(-) diff --git a/ChangeLog b/ChangeLog index a7d18a1..af537ee 100644 --- a/ChangeLog +++ b/ChangeLog @@ -14,8 +14,16 @@ error. This bug was discovered by the LLVM fuzzer. 4. Implemented pcre2_callout_enumerate(). -5. Fix JIT compilation of conditional blocks whose assertion - is converted to (*FAIL). E.g: /(?(?!))/. +5. Fix JIT compilation of conditional blocks whose assertion is converted to +(*FAIL). E.g: /(?(?!))/. + +6. The pattern /(?(?!)^)/ caused references to random memory. This bug was +discovered by the LLVM fuzzer. + +7. The assertion (?!) is optimized to (*FAIL). This was not handled correctly +when this assertion was used as a condition, for example (?(?!)a|b). In +pcre2_match() it worked by luck; in pcre2_dfa_match() it gave an incorrect +error about an unsupported item. Version 10.10 06-March-2015 @@ -120,12 +128,13 @@ repeated outer group that has a zero minimum quantifier, caused incorrect code to be compiled, leading to the error "internal error: previously-checked referenced subpattern not found" when an incorrect memory address was read. This bug was reported as "heap overflow", discovered by Kai Lu of Fortinet's -FortiGuard Labs. +FortiGuard Labs. (Added 24-March-2015: CVE-2015-2325 was given to this.) 23. A pattern such as "((?+1)(\1))/" containing a forward reference subroutine call within a group that also contained a recursive back reference caused incorrect code to be compiled. This bug was reported as "heap overflow", -discovered by Kai Lu of Fortinet's FortiGuard Labs. +discovered by Kai Lu of Fortinet's FortiGuard Labs. (Added 24-March-2015: +CVE-2015-2326 was given to this.) 24. Computing the size of the JIT read-only data in advance has been a source of various issues, and new ones are still appear unfortunately. To fix diff --git a/HACKING b/HACKING index 1e26d01..21edebc 100644 --- a/HACKING +++ b/HACKING @@ -210,7 +210,8 @@ These items are all just one unit long OP_THEN ) OP_ASSERT_ACCEPT is used when (*ACCEPT) is encountered within an assertion. -This ends the assertion, not the entire pattern match. +This ends the assertion, not the entire pattern match. The assertion (?!) is +always optimized to OP_FAIL. Backtracking control verbs with optional data @@ -528,6 +529,10 @@ immediately before the assertion. It is also possible to insert a manual callout at this point. Only assertion conditions may have callouts preceding the condition. +A condition that is the negative assertion (?!) is optimized to OP_FAIL in all +parts of the pattern, so this is another opcode that may appear as a condition. +It is treated the same as OP_FALSE. + Recursion --------- diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 2d958b1..dee00c0 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -7284,7 +7284,7 @@ inside atomic brackets or in a pattern that contains *PRUNE or *SKIP does not count, because once again the assumption no longer holds. Arguments: - code points to start of the compiled pattern + code points to start of the compiled pattern or a group bracket_map a bitmap of which brackets we are inside while testing; this handles up to substring 31; after that we just have to take the less precise approach @@ -7321,6 +7321,7 @@ do { case OP_DNCREF: case OP_RREF: case OP_DNRREF: + case OP_FAIL: case OP_FALSE: case OP_TRUE: return FALSE; diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c index d6b29e8..8f6ed62 100644 --- a/src/pcre2_dfa_match.c +++ b/src/pcre2_dfa_match.c @@ -2660,14 +2660,15 @@ for (;;) condcode == OP_DNRREF) return PCRE2_ERROR_DFA_UCOND; - /* The DEFINE condition is always false */ - - if (condcode == OP_FALSE) + /* The DEFINE condition is always false, and the assertion (?!) is + converted to OP_FAIL. */ + + if (condcode == OP_FALSE || condcode == OP_FAIL) { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); } /* There is also an always-true condition */ - if (condcode == OP_TRUE) + else if (condcode == OP_TRUE) { ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); } /* The only supported version of OP_RREF is for the value RREF_ANY, diff --git a/src/pcre2_match.c b/src/pcre2_match.c index acc695a..231a8ff 100644 --- a/src/pcre2_match.c +++ b/src/pcre2_match.c @@ -1408,6 +1408,7 @@ for (;;) break; case OP_FALSE: + case OP_FAIL: /* The assertion (?!) becomes OP_FAIL */ break; case OP_TRUE: diff --git a/testdata/testinput2 b/testdata/testinput2 index 9f2be9e..7c52e2b 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4229,4 +4229,11 @@ a random value. /Ix / 61 28 3f 43 27 78 00 7a 27 29 62/hex,callout_info abcdefgh +/(?(?!)^)/ + +/(?(?!)a|b)/ + bbb + ** Failers + aaa + # End of testinput2 diff --git a/testdata/testinput6 b/testdata/testinput6 index 65796ca..de31b53 100644 --- a/testdata/testinput6 +++ b/testdata/testinput6 @@ -4846,4 +4846,8 @@ / 61 28 3f 43 27 78 00 7a 27 29 62/hex abcdefgh +/(?(?!)a|b)/ + bbb + aaa + # End of testinput6 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index e1544f8..5761e9f 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -14188,4 +14188,14 @@ Callout (5): 'x\x00z' ^^ b 0: ab +/(?(?!)^)/ + +/(?(?!)a|b)/ + bbb + 0: b + ** Failers +No match + aaa +No match + # End of testinput2 diff --git a/testdata/testoutput6 b/testdata/testoutput6 index d9244bd..3e33562 100644 --- a/testdata/testoutput6 +++ b/testdata/testoutput6 @@ -7919,4 +7919,10 @@ Callout (5): 'x\x00z' ^^ b 0: ab +/(?(?!)a|b)/ + bbb + 0: b + aaa +No match + # End of testinput6