From 45b219e6bc5c5b7670521645d823b9439ada6a39 Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Mon, 26 Aug 2019 16:28:26 +0000 Subject: [PATCH] Fix bug introduced in commit 1133. Lookbehinds that follow a condition were not always properly handled. --- src/pcre2_compile.c | 26 ++++++++----- testdata/testinput1 | 3 ++ testdata/testinput2 | 10 +++++ testdata/testoutput1 | 6 +++ testdata/testoutput2 | 93 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 129 insertions(+), 9 deletions(-) diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index d5aa1af..1f28a2e 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -9465,14 +9465,14 @@ non-nested closing parenthesis in this case, returning a pointer to it. Arguments pptr points to where to start (start of pattern or start of lookahead) retptr if not NULL, return the ket pointer here - recurses chain of recurse_check to catch mutual recursion + recurses chain of recurse_check to catch mutual recursion cb points to the compile block Returns: 0 on success, or an errorcode (cb->erroroffset will be set) */ static int -check_lookbehinds(uint32_t *pptr, uint32_t **retptr, +check_lookbehinds(uint32_t *pptr, uint32_t **retptr, parsed_recurse_check *recurses, compile_block *cb) { int max; @@ -9549,13 +9549,22 @@ for (; *pptr != META_END; pptr++) break; case META_BACKREF_BYNAME: + case META_RECURSE_BYNAME: + pptr += 1 + SIZEOFFSET; + break; + case META_COND_DEFINE: case META_COND_NAME: case META_COND_NUMBER: case META_COND_RNAME: case META_COND_RNUMBER: - case META_RECURSE_BYNAME: pptr += 1 + SIZEOFFSET; + nestlevel++; + break; + + case META_COND_VERSION: + pptr += 3; + nestlevel++; break; case META_CALLOUT_STRING: @@ -9576,7 +9585,6 @@ for (; *pptr != META_END; pptr++) break; case META_CALLOUT_NUMBER: - case META_COND_VERSION: pptr += 3; break; @@ -9591,7 +9599,7 @@ for (; *pptr != META_END; pptr++) case META_LOOKBEHIND: case META_LOOKBEHINDNOT: case META_LOOKBEHIND_NA: - if (!set_lookbehind_lengths(&pptr, &max, &errorcode, &loopcount, + if (!set_lookbehind_lengths(&pptr, &max, &errorcode, &loopcount, recurses, cb)) return errorcode; break; @@ -10421,12 +10429,12 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) errorcode = ERR31; goto HAD_CB_ERROR; } - - /* If study() set a bitmap of starting code units, it implies a minimum + + /* If study() set a bitmap of starting code units, it implies a minimum length of at least one. */ - + if ((re->flags & PCRE2_FIRSTMAPSET) != 0 && minminlength == 0) - minminlength = 1; + minminlength = 1; /* If the minimum length set (or not set) by study() is less than the minimum implied by required code units, override it. */ diff --git a/testdata/testinput1 b/testdata/testinput1 index 5d60cd9..f5159d6 100644 --- a/testdata/testinput1 +++ b/testdata/testinput1 @@ -6383,4 +6383,7 @@ ef) x/x,mark /(?<=(?=(?<=a)))b/ ab +/^(?a)(?()b)((?<=b).*)$/ + abc + # End of testinput1 diff --git a/testdata/testinput2 b/testdata/testinput2 index 7b44fb0..178d5eb 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -5748,4 +5748,14 @@ a)"xI /\A\s*((?:[^`]{28500}){4}|a)/I a +/(?a)(?()b)((?<=b).*)/B + +/(?(1)b)((?<=b).*)/B + +/(?(R1)b)((?<=b).*)/B + +/(?(DEFINE)b)((?<=b).*)/B + +/(?(VERSION=10.4)b)((?<=b).*)/B + # End of testinput2 diff --git a/testdata/testoutput1 b/testdata/testoutput1 index 787c7e7..ad2175b 100644 --- a/testdata/testoutput1 +++ b/testdata/testoutput1 @@ -10106,4 +10106,10 @@ No match ab 0: b +/^(?a)(?()b)((?<=b).*)$/ + abc + 0: abc + 1: a + 2: c + # End of testinput1 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 0803d9e..2c69e54 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -17294,6 +17294,99 @@ Subject length lower bound = 1 0: a 1: a +/(?a)(?()b)((?<=b).*)/B +------------------------------------------------------------------ + Bra + CBra 1 + a + Ket + Cond + 1 Cond ref + b + Ket + CBra 2 + Assert back + Reverse + b + Ket + Any*+ + Ket + Ket + End +------------------------------------------------------------------ + +/(?(1)b)((?<=b).*)/B +------------------------------------------------------------------ + Bra + Cond + 1 Cond ref + b + Ket + CBra 1 + Assert back + Reverse + b + Ket + Any*+ + Ket + Ket + End +------------------------------------------------------------------ + +/(?(R1)b)((?<=b).*)/B +------------------------------------------------------------------ + Bra + Cond + Cond recurse 1 + b + Ket + CBra 1 + Assert back + Reverse + b + Ket + Any*+ + Ket + Ket + End +------------------------------------------------------------------ + +/(?(DEFINE)b)((?<=b).*)/B +------------------------------------------------------------------ + Bra + Cond + Cond false + b + Ket + CBra 1 + Assert back + Reverse + b + Ket + Any*+ + Ket + Ket + End +------------------------------------------------------------------ + +/(?(VERSION=10.4)b)((?<=b).*)/B +------------------------------------------------------------------ + Bra + Cond + Cond false + b + Ket + CBra 1 + Assert back + Reverse + b + Ket + Any*+ + Ket + Ket + End +------------------------------------------------------------------ + # End of testinput2 Error -70: PCRE2_ERROR_BADDATA (unknown error number) Error -62: bad serialized data