From 666e94cd59d5889d7ba675569fce8b4e5af8a3e8 Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Mon, 16 Jul 2018 15:24:32 +0000 Subject: [PATCH] Fixed atomic group backtracking bug. --- ChangeLog | 6 ++++++ src/pcre2_match.c | 2 +- testdata/testinput1 | 6 ++++++ testdata/testoutput1 | 8 ++++++++ 4 files changed, 21 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index a068002..c07ba56 100644 --- a/ChangeLog +++ b/ChangeLog @@ -111,6 +111,12 @@ such as the 04 in 10.04 incorrectly and hence giving wrong results. 24. Updated to Unicode version 11.0.0. As well as the usual addition of new scripts and characters, this involved re-jigging the grapheme break property algorithm because Unicode has changed the way emojis are handled. + +25. Fixed an obscure bug that struck when there were two atomic groups not +separated by something with a backtracking point. There could be an incorrect +backtrack into the first of the atomic groups. A complicated example is +/(?>a(*:1))(?>b)(*SKIP:1)x|.*/ matched against "abc", where the *SKIP +shouldn't find a MARK (because is in an atomic group), but it did. Version 10.31 12-February-2018 diff --git a/src/pcre2_match.c b/src/pcre2_match.c index 65d48e4..ed5c996 100644 --- a/src/pcre2_match.c +++ b/src/pcre2_match.c @@ -5509,7 +5509,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); frame so that it points to the final branch. */ case OP_ONCE: - Fback_frame = ((char *)F - (char *)P) + frame_size; + Fback_frame = ((char *)F - (char *)P); for (;;) { uint32_t y = GET(P->ecode,1); diff --git a/testdata/testinput1 b/testdata/testinput1 index cabc6ef..82b7f46 100644 --- a/testdata/testinput1 +++ b/testdata/testinput1 @@ -6203,4 +6203,10 @@ ef) x/x,mark /a(?:(*:X))(*SKIP:X)(*F)|(.)/ abc +/(?>a(*:1))(?>b(*:1))(*SKIP:1)x|.*/no_start_optimize + abc + +/(?>a(*:1))(?>b)(*SKIP:1)x|.*/no_start_optimize + abc + # End of testinput1 diff --git a/testdata/testoutput1 b/testdata/testoutput1 index a210038..8a3be73 100644 --- a/testdata/testoutput1 +++ b/testdata/testoutput1 @@ -9846,4 +9846,12 @@ No match 0: b 1: b +/(?>a(*:1))(?>b(*:1))(*SKIP:1)x|.*/no_start_optimize + abc + 0: abc + +/(?>a(*:1))(?>b)(*SKIP:1)x|.*/no_start_optimize + abc + 0: abc + # End of testinput1