From d961132ae1d61ab2e7e3fcf3255adee8ac4e4bca Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Fri, 10 Oct 2014 16:42:03 +0000 Subject: [PATCH] Fix match_unset_backref bug. --- src/pcre2_match.c | 17 +++++++++-------- testdata/testinput2 | 3 +++ testdata/testoutput2 | 6 ++++++ 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/pcre2_match.c b/src/pcre2_match.c index 272fc89..64c8379 100644 --- a/src/pcre2_match.c +++ b/src/pcre2_match.c @@ -2763,19 +2763,20 @@ for (;;) continue; /* With the main loop */ } - /* Handle repeated back references. If a set group has length zero, just - continue with the main loop, because it matches however many times. For an - unset reference, in non-match-unset-backref mode, if the minimum is - zero, we can continue at the same level without recursion. For any other - minimum, carrying on will result in NOMATCH. */ + /* Handle repeated back references. If a set group has length zero, just + continue with the main loop, because it matches however many times. For an + unset reference, if the minimum is zero, we can also just continue. We an + also continue if PCRE2_MATCH_UNSET_BACKREF is set, because this makes unset + group be have as a zero-length group. For any other unset cases, carrying + on will result in NOMATCH. */ if (offset < offset_top && mb->ovector[offset] != PCRE2_UNSET) { if (mb->ovector[offset] == mb->ovector[offset + 1]) continue; } - else + else /* Group is not set */ { - if (min == 0 && (mb->poptions & PCRE2_MATCH_UNSET_BACKREF) == 0) + if (min == 0 || (mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0) continue; } @@ -2856,7 +2857,7 @@ for (;;) eptr += slength; } - /* If the length matched for each repetiaion is the same as the length of + /* If the length matched for each repetition is the same as the length of the captured group, we can easily work backwards. This is the normal case. However, in caseless UTF-8 mode there are pairs of case-equivalent characters whose lengths (in terms of code units) differ. However, this diff --git a/testdata/testinput2 b/testdata/testinput2 index 4af89b2..548224f 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4003,4 +4003,7 @@ a random value. /Ix abd xyd +/\k*(?aa)(?bb)/match_unset_backref,dupnames + aabb + # End of testinput2 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index e086358..09c8a74 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -13576,4 +13576,10 @@ Subject length lower bound = 0 xyd 0: d +/\k*(?aa)(?bb)/match_unset_backref,dupnames + aabb + 0: aabb + 1: aa + 2: bb + # End of testinput2