From 59d85d7b5519cb5dd513d4a77b4a58de7c819f73 Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Tue, 12 Dec 2017 15:01:51 +0000 Subject: [PATCH] Fix incorrect first matching character when a backreference with zero minimum repeat starts a pattern (possibly after assertions). --- ChangeLog | 5 +++++ src/pcre2_compile.c | 2 +- testdata/testinput2 | 10 ++++++++++ testdata/testoutput2 | 28 ++++++++++++++++++++++++++++ 4 files changed, 44 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 40f6761..7af58de 100644 --- a/ChangeLog +++ b/ChangeLog @@ -65,6 +65,11 @@ were all int variables, causing overflow when files with more than 2147483647 lines were processed (assuming 32-bit ints). They have all been changed to unsigned long ints. +17. If a backreference with a minimum repeat count of zero was first in a +pattern, apart from assertions, an incorrect first matching character could be +recorded. For example, for the pattern /(?=(a))\1?b/, "b" was incorrectly set +as the first character of a match. + Version 10.30 14-August-2017 ---------------------------- diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 0b91d14..ad17338 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -7135,7 +7135,7 @@ for (;; pptr++) later. */ HANDLE_SINGLE_REFERENCE: - if (firstcuflags == REQ_UNSET) firstcuflags = REQ_NONE; + if (firstcuflags == REQ_UNSET) zerofirstcuflags = firstcuflags = REQ_NONE; *code++ = ((options & PCRE2_CASELESS) != 0)? OP_REFI : OP_REF; PUT2INC(code, 0, meta_arg); diff --git a/testdata/testinput2 b/testdata/testinput2 index 022df20..695f0a4 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -5375,4 +5375,14 @@ a)"xI /[\d-[:print:]]/ +# Perl gets the second of these wrong, giving no match. + +"(?<=(a))\1?b"I + ab + aaab + +"(?=(a))\1?b"I + ab + aaab + # End of testinput2 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 2d9e347..31ccfbe 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -16340,6 +16340,34 @@ Failed: error 150 at offset 3: invalid range in character class /[\d-[:print:]]/ Failed: error 150 at offset 3: invalid range in character class +# Perl gets the second of these wrong, giving no match. + +"(?<=(a))\1?b"I +Capturing subpattern count = 1 +Max back reference = 1 +Max lookbehind = 1 +Last code unit = 'b' +Subject length lower bound = 1 + ab + 0: b + 1: a + aaab + 0: ab + 1: a + +"(?=(a))\1?b"I +Capturing subpattern count = 1 +Max back reference = 1 +Starting code units: a +Last code unit = 'b' +Subject length lower bound = 1 + ab + 0: ab + 1: a + aaab + 0: ab + 1: a + # End of testinput2 Error -65: PCRE2_ERROR_BADDATA (unknown error number) Error -62: bad serialized data