From 0757041114d2018b027b3a0d0182bff7aeb650f2 Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Tue, 4 Apr 2017 11:37:21 +0000 Subject: [PATCH] Fix non-passing of mark values out of recursions. --- ChangeLog | 2 ++ src/pcre2_match.c | 23 ++++++++++------------- testdata/testinput2 | 5 +++++ testdata/testoutput2 | 7 +++++++ 4 files changed, 24 insertions(+), 13 deletions(-) diff --git a/ChangeLog b/ChangeLog index 36a5478..2f4b1a0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -34,6 +34,8 @@ bugs were never in fully released code, but are noted here for the record. (d) Captures in negative assertions that were used as conditions were not happening if the assertion matched via (*ACCEPT). + (e) Mark values were not being passed out of recursions. + 2. Now that pcre2_match() no longer uses recursive function calls (see above), the "match limit recursion" value seems misnamed. It still exists, and limits the depth of tree that is searched. To avoid future confusion, it has been diff --git a/src/pcre2_match.c b/src/pcre2_match.c index 1c35c02..69110dc 100644 --- a/src/pcre2_match.c +++ b/src/pcre2_match.c @@ -725,7 +725,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode); where we know the starting frame is at the top of the chained frames, in this case we have to search back for the relevant frame in case other types of group that use chained frames have intervened. Multiple OP_CLOSEs always - come innermost first, which matches the chain order. */ + come innermost first, which matches the chain order. We can ignore this in + a recursion, because captures are not passed out of recursions. */ case OP_CLOSE: if (Fcurrent_recurse == RECURSE_UNSET) @@ -746,23 +747,21 @@ fprintf(stderr, "++ op=%d\n", *Fecode); Fovector[offset+1] = Feptr - mb->start_subject; if (offset >= Foffset_top) Foffset_top = offset + 2; } - Fecode += PRIV(OP_lengths)[*Fecode]; break; /* ===================================================================== */ - /* End of the pattern, either real or forced. In an assertion ACCEPT, - update the last used pointer and remember the current frame so that the - captures can be fished out of it. */ + /* Real or forced end of the pattern, assertion, or recursion. In an + assertion ACCEPT, update the last used pointer and remember the current + frame so that the captures can be fished out of it. */ case OP_ASSERT_ACCEPT: if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr; assert_accept_frame = F; RRETURN(MATCH_ACCEPT); - /* The real end, or top-level (*ACCEPT). If recursing, we have to find the - most recent recursion. */ + /* If recursing, we have to find the most recent recursion. */ case OP_ACCEPT: case OP_END: @@ -782,10 +781,11 @@ fprintf(stderr, "++ op=%d\n", *Fecode); } /* N is now the frame of the recursion; the previous frame is at the - OP_RECURSE position. Go back there, copying the current subject position, - and move on past the OP_RECURSE. */ + OP_RECURSE position. Go back there, copying the current subject position + and mark, and move on past the OP_RECURSE. */ P->eptr = Feptr; + P->mark = Fmark; F = P; Fecode += 1 + LINK_SIZE; continue; @@ -5078,10 +5078,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); } } - /* Now run the recursion. If it successfully completes, it re-instates the - previous values of the captures and continues, just like a non-capturing - bracket. We must leave Fecode unchanged so that the ending code can find - out where to continue. */ + /* Now run the recursion, branch by branch. */ Lstart_branch = bracode; Lframe_type = GF_RECURSE | number; diff --git a/testdata/testinput2 b/testdata/testinput2 index eb9c556..9b7788b 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -5005,5 +5005,10 @@ a)"xI /^ (?(DEFINE) (..(*ACCEPT)|...) ) (?1)$/x \= Expect no match abc + +# Perl gives no match for this one + +/(a(*MARK:m)(*ACCEPT)){0}(?1)/mark + abc # End of testinput2 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 668ae4f..5a6c94f 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -15529,6 +15529,13 @@ Callout 1: last capture = 1 \= Expect no match abc No match + +# Perl gives no match for this one + +/(a(*MARK:m)(*ACCEPT)){0}(?1)/mark + abc + 0: a +MK: m # End of testinput2 Error -63: PCRE2_ERROR_BADDATA (unknown error number)