From 3a6b4948d1bafe1e9f8aa66fa788e9971186d6e6 Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Sun, 26 Jan 2020 15:31:27 +0000 Subject: [PATCH] Fix bug in processing (?(DEFINE)...) within lookbehind assertions. --- ChangeLog | 8 ++++++++ src/pcre2_compile.c | 20 ++++++++++++++------ testdata/testinput1 | 13 +++++++++++++ testdata/testinput2 | 4 ++++ testdata/testoutput1 | 17 +++++++++++++++++ testdata/testoutput2 | 5 +++++ 6 files changed, 61 insertions(+), 6 deletions(-) diff --git a/ChangeLog b/ChangeLog index ffe653f..cab65e3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -48,6 +48,14 @@ the minimum. 13. In pcre2grep, if the final line in a scanned file is output but does not end with a newline sequence, add a newline according to the --newline setting. +14. (?(DEFINE)...) groups were not being handled correctly when checking for +the fixed length of a lookbehind assertion. Such a group within a lookbehind +should be skipped, as it does not contribute to the length of the group. +Instead, the (DEFINE) group was being processed, and if at the end of the +lookbehind, that end was not correctly recognized. Errors such as "lookbehind +assertion is not fixed length" and also "internal error: bad code value in +parsed_skip()" could result. + Version 10.34 21-November-2019 ------------------------------ diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 0350328..aa4869f 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -8800,9 +8800,10 @@ memset(slot + IMM2_SIZE + length, 0, /* This function is called to skip parts of the parsed pattern when finding the length of a lookbehind branch. It is called after (*ACCEPT) and (*FAIL) to find -the end of the branch, it is called to skip over an internal lookaround, and it -is also called to skip to the end of a class, during which it will never -encounter nested groups (but there's no need to have special code for that). +the end of the branch, it is called to skip over an internal lookaround or +(DEFINE) group, and it is also called to skip to the end of a class, during +which it will never encounter nested groups (but there's no need to have +special code for that). When called to find the end of a branch or group, pptr must point to the first meta code inside the branch, not the branch-starting code. In other cases it @@ -9280,14 +9281,21 @@ for (;; pptr++) itemlength = grouplength; break; - /* Check nested groups - advance past the initial data for each type and - then seek a fixed length with get_grouplength(). */ + /* A (DEFINE) group is never obeyed inline and so it does not contribute to + the length of this branch. Skip from the following item to the next + unpaired ket. */ + + case META_COND_DEFINE: + pptr = parsed_skip(pptr + 1, PSKIP_KET); + break; + + /* Check other nested groups - advance past the initial data for each type + and then seek a fixed length with get_grouplength(). */ case META_COND_NAME: case META_COND_NUMBER: case META_COND_RNAME: case META_COND_RNUMBER: - case META_COND_DEFINE: pptr += 2 + SIZEOFFSET; goto CHECK_GROUP; diff --git a/testdata/testinput1 b/testdata/testinput1 index a002b78..c6b3647 100644 --- a/testdata/testinput1 +++ b/testdata/testinput1 @@ -6411,4 +6411,17 @@ ef) x/x,mark Hackdaws love my big sphinx of quartz. Pack my fox with five dozen liquor jugs. +"(?<=X(?(DEFINE)(A)))X(*F)" +\= Expect no match + AXYZ + +"(?<=X(?(DEFINE)(A)))." + AXYZ + +"(?<=X(?(DEFINE)(.*))Y)." + AXYZ + +"(?<=X(?(DEFINE)(Y))(?1))." + AXYZ + # End of testinput1 diff --git a/testdata/testinput2 b/testdata/testinput2 index b5c5188..cac6afa 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -5806,4 +5806,8 @@ a)"xI 12abc34xyz99abc55\=substitute_skip=1 12abc34xyz99abc55\=substitute_skip=2 +# Expect non-fixed-length error + +"(?<=X(?(DEFINE)(.*))(?1))." + # End of testinput2 diff --git a/testdata/testoutput1 b/testdata/testoutput1 index f196d56..4c43caa 100644 --- a/testdata/testoutput1 +++ b/testdata/testoutput1 @@ -10165,4 +10165,21 @@ No match Pack my fox with five dozen liquor jugs. No match +"(?<=X(?(DEFINE)(A)))X(*F)" +\= Expect no match + AXYZ +No match + +"(?<=X(?(DEFINE)(A)))." + AXYZ + 0: Y + +"(?<=X(?(DEFINE)(.*))Y)." + AXYZ + 0: Z + +"(?<=X(?(DEFINE)(Y))(?1))." + AXYZ + 0: Z + # End of testinput1 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index f066c78..bf272e1 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -17536,6 +17536,11 @@ Callout 0: last capture = 2 3(2) Old 12 15 "abc" New 5 10 "" 3: +# Expect non-fixed-length error + +"(?<=X(?(DEFINE)(.*))(?1))." +Failed: error 125 at offset 0: lookbehind assertion is not fixed length + # End of testinput2 Error -70: PCRE2_ERROR_BADDATA (unknown error number) Error -62: bad serialized data