From 7d7a92edef43b01cde079ad2ec86f4855d8f3ef2 Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Sun, 1 Nov 2015 17:31:48 +0000 Subject: [PATCH] Bad max lookbehind length was calculated when a back reference contained a recursion. --- ChangeLog | 3 +++ src/pcre2_compile.c | 9 ++++++++- testdata/testinput10 | 3 +++ testdata/testoutput10 | 9 +++++++++ 4 files changed, 23 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 4156dca..60477bc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -252,6 +252,9 @@ beyond the end of the replacement string. lookbehind involved an out-of-bounds pointer, which could potentially cause trouble in some environments. +73. The maximum lookbehind length was incorrectly calculated for patterns such +as /(?<=(a)(?-1))x/ which have a recursion within a backreference. + Version 10.20 30-June-2015 -------------------------- diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 5a0e817..b6c9c17 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -8378,7 +8378,7 @@ re->first_codeunit = 0; re->last_codeunit = 0; re->bsr_convention = bsr; re->newline_convention = newline; -re->max_lookbehind = +re->max_lookbehind = 0; re->minlength = 0; re->top_bracket = 0; re->top_backref = 0; @@ -8587,6 +8587,13 @@ if (errorcode == 0 && cb.check_lookbehind) } cc += 1 + LINK_SIZE; } + + /* The previous value of the maximum lookbehind was transferred to the + compiled regex block above. We could have updated this value in the loop + above, but keep the two values in step, just in case some later code below + uses the cb value. */ + + re->max_lookbehind = cb.max_lookbehind; } /* Failed to compile, or error while post-processing. Earlier errors get here diff --git a/testdata/testinput10 b/testdata/testinput10 index 620de96..681ba9a 100644 --- a/testdata/testinput10 +++ b/testdata/testinput10 @@ -440,4 +440,7 @@ /abc/utf,replace=Ã abc +/(?<=(a)(?-1))x/I,utf + a\x80zx\=offset=3 + # End of testinput10 diff --git a/testdata/testoutput10 b/testdata/testoutput10 index 65cd081..a93070f 100644 --- a/testdata/testoutput10 +++ b/testdata/testoutput10 @@ -1527,4 +1527,13 @@ Subject length lower bound = 2 abc Failed: error -3: UTF-8 error: 1 byte missing at end +/(?<=(a)(?-1))x/I,utf +Capturing subpattern count = 1 +Max lookbehind = 2 +Options: utf +First code unit = 'x' +Subject length lower bound = 1 + a\x80zx\=offset=3 +Failed: error -22: UTF-8 error: isolated 0x80 byte at offset 1 + # End of testinput10