From aff77100bb74420a2a7f819eb3a898194c870381 Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Mon, 19 Feb 2018 14:49:42 +0000 Subject: [PATCH] Fix the value passed back for POSIX unset groups when REG_STARTEND has a non-zero starting offset, and make pcre2test show relevant POSIX unset groups. --- ChangeLog | 13 +++++++++++++ src/pcre2posix.c | 10 ++++++---- src/pcre2test.c | 8 ++++++-- testdata/testinput18 | 4 ++++ testdata/testoutput18 | 15 +++++++++++++++ 5 files changed, 44 insertions(+), 6 deletions(-) diff --git a/ChangeLog b/ChangeLog index 7f520bf..d655fdc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -2,6 +2,19 @@ Change Log for PCRE2 -------------------- +Version 10.32-RC1 19-February-2018 +---------------------------------- + +1. When matching using the the REG_STARTEND feature of the POSIX API with a +non-zero starting offset, unset capturing groups with lower numbers than a +group that did capture something were not being correctly returned as "unset" +(that is, with offset values of -1). + +2. When matching using the POSIX API, pcre2test used to omit listing unset +groups altogether. Now it shows those that come before any actual captures as +"", as happens for non-POSIX matching. + + Version 10.31 12-February-2018 ------------------------------ diff --git a/src/pcre2posix.c b/src/pcre2posix.c index 026943e..5a2f7cd 100644 --- a/src/pcre2posix.c +++ b/src/pcre2posix.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016 University of Cambridge + New API code Copyright (c) 2016-2018 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -93,7 +93,7 @@ information; I know nothing about MSVC myself). For example, something like void __cdecl function(....) -might be needed. In order so make this easy, all the exported functions have +might be needed. In order to make this easy, all the exported functions have PCRE2_CALL_CONVENTION just before their names. It is rarely needed; if not set, we ensure here that it has no effect. */ @@ -344,8 +344,10 @@ if (rc >= 0) if ((size_t)rc > nmatch) rc = (int)nmatch; for (i = 0; i < (size_t)rc; i++) { - pmatch[i].rm_so = ovector[i*2] + so; - pmatch[i].rm_eo = ovector[i*2+1] + so; + pmatch[i].rm_so = (ovector[i*2] == PCRE2_UNSET)? -1 : + (int)(ovector[i*2] + so); + pmatch[i].rm_eo = (ovector[i*2+1] == PCRE2_UNSET)? -1 : + (int)(ovector[i*2+1] + so); } for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1; return 0; diff --git a/src/pcre2test.c b/src/pcre2test.c index 15bf404..7eca618 100644 --- a/src/pcre2test.c +++ b/src/pcre2test.c @@ -11,7 +11,7 @@ hacked-up (non-) design had also run out of steam. Written by Philip Hazel Original code Copyright (c) 1997-2012 University of Cambridge - Rewritten code Copyright (c) 2016-2017 University of Cambridge + Rewritten code Copyright (c) 2016-2018 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -6761,13 +6761,17 @@ if ((pat_patctl.control & CTL_POSIX) != 0) fprintf(outfile, "Matched without capture\n"); else { - size_t i; + size_t i, j; + size_t last_printed = (size_t)dat_datctl.oveccount; for (i = 0; i < (size_t)dat_datctl.oveccount; i++) { if (pmatch[i].rm_so >= 0) { PCRE2_SIZE start = pmatch[i].rm_so; PCRE2_SIZE end = pmatch[i].rm_eo; + for (j = last_printed + 1; j < i; j++) + fprintf(outfile, "%2d: \n", (int)j); + last_printed = i; if (start > end) { start = pmatch[i].rm_eo; diff --git a/testdata/testinput18 b/testdata/testinput18 index 755a0c9..563a506 100644 --- a/testdata/testinput18 +++ b/testdata/testinput18 @@ -134,4 +134,8 @@ /a\b(c/literal,posix,dotall +/((a)(b)?(c))/posix + 123ace + 123ace\=posix_startend=2:6 + # End of testdata/testinput18 diff --git a/testdata/testoutput18 b/testdata/testoutput18 index d51423d..d6e3c71 100644 --- a/testdata/testoutput18 +++ b/testdata/testoutput18 @@ -46,6 +46,7 @@ defabc\=noteol 0: def 1: def + 2: 3: def /the quick brown fox/ @@ -206,4 +207,18 @@ No match: POSIX code 17: match failed /a\b(c/literal,posix,dotall Failed: POSIX code 16: bad argument at offset 0 +/((a)(b)?(c))/posix + 123ace + 0: ac + 1: ac + 2: a + 3: + 4: c + 123ace\=posix_startend=2:6 + 0: ac + 1: ac + 2: a + 3: + 4: c + # End of testdata/testinput18