From 170644eca3a1edb5e91b401ab21b92d010df8cde Mon Sep 17 00:00:00 2001 From: "Philip.Hazel" Date: Sat, 29 Aug 2015 17:13:09 +0000 Subject: [PATCH] Add ${*MARK} feature to pcre2_substitute(). --- ChangeLog | 2 ++ doc/pcre2api.3 | 21 ++++++++++------ src/pcre2_internal.h | 2 ++ src/pcre2_substitute.c | 54 +++++++++++++++++++++++++++++++++--------- testdata/testinput2 | 31 ++++++++++++++++++++++++ testdata/testoutput2 | 44 ++++++++++++++++++++++++++++++++++ 6 files changed, 136 insertions(+), 18 deletions(-) diff --git a/ChangeLog b/ChangeLog index b53f922..9c6c413 100644 --- a/ChangeLog +++ b/ChangeLog @@ -162,6 +162,8 @@ test (there are now 20 in total). 45. Fixed a corner case of range optimization in JIT. +46. Add the ${*MARK} facility to pcre2_substitute(). + Version 10.20 30-June-2015 -------------------------- diff --git a/doc/pcre2api.3 b/doc/pcre2api.3 index 7cd20d3..b888223 100644 --- a/doc/pcre2api.3 +++ b/doc/pcre2api.3 @@ -1,4 +1,4 @@ -.TH PCRE2API 3 "18 August 2015" "PCRE2 10.21" +.TH PCRE2API 3 "29 August 2015" "PCRE2 10.21" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .sp @@ -2614,12 +2614,12 @@ be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. In the replacement string, which is interpreted as a UTF string in UTF mode, and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set, a dollar character is an escape character that can specify the insertion of -characters from capturing groups in the pattern. The following forms are -recognized: +characters from capturing groups or (*MARK) items in the pattern. The following +forms are recognized: .sp - $$ insert a dollar character - $ insert the contents of group - ${} insert the contents of group + $$ insert a dollar character + $ or ${} insert the contents of group + $*MARK or ${*MARK} insert the name of the last (*MARK) encountered .sp Either a group number or a group name can be given for . Curly brackets are required only if the following character would be interpreted as part of the @@ -2629,6 +2629,13 @@ string "+$1$0$1+", the result is "=+babcb+=". Group insertion is done by calling \fBpcre2_copy_byname()\fP or \fBpcre2_copy_bynumber()\fP as appropriate. .P +The facility for inserting a (*MARK) name can be used to perform simple +simultaneous substitutions, as this \fBpcre2test\fP example shows: +.sp + /(*:pear)apple|(*:orange)lemon/g,replace=${*MARK} + apple lemon + 2: pear orange +.P The first seven arguments of \fBpcre2_substitute()\fP are the same as for \fBpcre2_match()\fP, except that the partial matching options are not permitted, and \fImatch_data\fP may be passed as NULL, in which case a match @@ -2946,6 +2953,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 18 August 2015 +Last updated: 29 August 2015 Copyright (c) 1997-2015 University of Cambridge. .fi diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h index 19564a5..8bb745c 100644 --- a/src/pcre2_internal.h +++ b/src/pcre2_internal.h @@ -918,6 +918,7 @@ a positive value. */ #define STRING_NOTEMPTY_ATSTART_RIGHTPAR "NOTEMPTY_ATSTART)" #define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH=" #define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION=" +#define STRING_MARK "MARK" #else /* SUPPORT_UNICODE */ @@ -1190,6 +1191,7 @@ only. */ #define STRING_NOTEMPTY_ATSTART_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_UNDERSCORE STR_A STR_T STR_S STR_T STR_A STR_R STR_T STR_RIGHT_PARENTHESIS #define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN #define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN +#define STRING_MARK STR_M STR_A STR_R STR_K #endif /* SUPPORT_UNICODE */ diff --git a/src/pcre2_substitute.c b/src/pcre2_substitute.c index ec00ebb..f7965c3 100644 --- a/src/pcre2_substitute.c +++ b/src/pcre2_substitute.c @@ -205,6 +205,7 @@ do { int group, n; BOOL inparens; + BOOL star; PCRE2_SIZE sublength; PCRE2_UCHAR next; PCRE2_UCHAR name[33]; @@ -215,6 +216,7 @@ do group = -1; n = 0; inparens = FALSE; + star = FALSE; if (next == CHAR_LEFT_CURLY_BRACKET) { @@ -223,7 +225,14 @@ do inparens = TRUE; } - if (next >= CHAR_0 && next <= CHAR_9) + if (next == CHAR_ASTERISK) + { + if (++i == rlength) goto BAD; + next = replacement[i]; + star = TRUE; + } + + if (!star && next >= CHAR_0 && next <= CHAR_9) { group = next - CHAR_0; while (++i < rlength) @@ -253,19 +262,42 @@ do } else i--; /* Last code unit of name/number */ - /* Have found a syntactically correct group number or name. */ + /* Have found a syntactically correct group number or name, or + *name. Only *MARK is currently recognized. */ + + if (star) + { + if (PRIV(strcmp_c8)(name, STRING_MARK) == 0) + { + PCRE2_SPTR mark = pcre2_get_mark(match_data); + if (mark != NULL) + { + while (*mark != 0) + { + if (lengthleft-- < 1) goto NOROOM; + buffer[buff_offset++] = *mark++; + } + } + } + else goto BAD; + } + + /* Substitute the contents of a group. */ - sublength = lengthleft; - if (group < 0) - rc = pcre2_substring_copy_byname(match_data, name, - buffer + buff_offset, &sublength); else - rc = pcre2_substring_copy_bynumber(match_data, group, - buffer + buff_offset, &sublength); + { + sublength = lengthleft; + if (group < 0) + rc = pcre2_substring_copy_byname(match_data, name, + buffer + buff_offset, &sublength); + else + rc = pcre2_substring_copy_bynumber(match_data, group, + buffer + buff_offset, &sublength); + if (rc < 0) goto EXIT; - if (rc < 0) goto EXIT; - buff_offset += sublength; - lengthleft -= sublength; + buff_offset += sublength; + lengthleft -= sublength; + } } /* Handle a literal code unit */ diff --git a/testdata/testinput2 b/testdata/testinput2 index a4131d5..86b658f 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4073,6 +4073,37 @@ a random value. /Ix /(.)(.)/g,replace=$2$1 abcdefgh + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=${*MARK} + apple lemon blackberry + apple strudel + fruitless + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/replace=${*MARK} + apple lemon blackberry + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARK> + apple lemon blackberry + apple strudel + fruitless + +/(*:pear)apple/g,replace=${*MARKING} + apple lemon blackberry + +/(*:pear)apple/g,replace=${*MARK-time + apple lemon blackberry + +/(*:pear)apple/g,replace=${*mark} + apple lemon blackberry + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARKET> + apple lemon blackberry + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[22]${*MARK} + apple lemon blackberry + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[23]${*MARK} + apple lemon blackberry # End of substitute tests diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 36612b9..9a1e65b 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -13731,6 +13731,50 @@ Failed: error -34: bad option value /(.)(.)/g,replace=$2$1 abcdefgh 4: badcfehg + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=${*MARK} + apple lemon blackberry + 3: pear orange strawberry + apple strudel + 1: pear strudel + fruitless + 0: fruitless + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/replace=${*MARK} + apple lemon blackberry + 1: pear lemon blackberry + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARK> + apple lemon blackberry + 3: + apple strudel + 1: strudel + fruitless + 0: fruitless + +/(*:pear)apple/g,replace=${*MARKING} + apple lemon blackberry +Failed: error -35: invalid replacement string + +/(*:pear)apple/g,replace=${*MARK-time + apple lemon blackberry +Failed: error -35: invalid replacement string + +/(*:pear)apple/g,replace=${*mark} + apple lemon blackberry +Failed: error -35: invalid replacement string + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARKET> + apple lemon blackberry +Failed: error -35: invalid replacement string + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[22]${*MARK} + apple lemon blackberry +Failed: error -48: no more memory + +/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[23]${*MARK} + apple lemon blackberry + 3: pear orange strawberry # End of substitute tests