Add ${*MARK} feature to pcre2_substitute().

This commit is contained in:
Philip.Hazel 2015-08-29 17:13:09 +00:00
parent e40bc47ae2
commit 170644eca3
6 changed files with 136 additions and 18 deletions

View File

@ -162,6 +162,8 @@ test (there are now 20 in total).
45. Fixed a corner case of range optimization in JIT. 45. Fixed a corner case of range optimization in JIT.
46. Add the ${*MARK} facility to pcre2_substitute().
Version 10.20 30-June-2015 Version 10.20 30-June-2015
-------------------------- --------------------------

View File

@ -1,4 +1,4 @@
.TH PCRE2API 3 "18 August 2015" "PCRE2 10.21" .TH PCRE2API 3 "29 August 2015" "PCRE2 10.21"
.SH NAME .SH NAME
PCRE2 - Perl-compatible regular expressions (revised API) PCRE2 - Perl-compatible regular expressions (revised API)
.sp .sp
@ -2614,12 +2614,12 @@ be given as PCRE2_ZERO_TERMINATED for a zero-terminated string.
In the replacement string, which is interpreted as a UTF string in UTF mode, In the replacement string, which is interpreted as a UTF string in UTF mode,
and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set, a and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set, a
dollar character is an escape character that can specify the insertion of dollar character is an escape character that can specify the insertion of
characters from capturing groups in the pattern. The following forms are characters from capturing groups or (*MARK) items in the pattern. The following
recognized: forms are recognized:
.sp .sp
$$ insert a dollar character $$ insert a dollar character
$<n> insert the contents of group <n> $<n> or ${<n>} insert the contents of group <n>
${<n>} insert the contents of group <n> $*MARK or ${*MARK} insert the name of the last (*MARK) encountered
.sp .sp
Either a group number or a group name can be given for <n>. Curly brackets are Either a group number or a group name can be given for <n>. Curly brackets are
required only if the following character would be interpreted as part of the required only if the following character would be interpreted as part of the
@ -2629,6 +2629,13 @@ string "+$1$0$1+", the result is "=+babcb+=". Group insertion is done by
calling \fBpcre2_copy_byname()\fP or \fBpcre2_copy_bynumber()\fP as calling \fBpcre2_copy_byname()\fP or \fBpcre2_copy_bynumber()\fP as
appropriate. appropriate.
.P .P
The facility for inserting a (*MARK) name can be used to perform simple
simultaneous substitutions, as this \fBpcre2test\fP example shows:
.sp
/(*:pear)apple|(*:orange)lemon/g,replace=${*MARK}
apple lemon
2: pear orange
.P
The first seven arguments of \fBpcre2_substitute()\fP are the same as for The first seven arguments of \fBpcre2_substitute()\fP are the same as for
\fBpcre2_match()\fP, except that the partial matching options are not \fBpcre2_match()\fP, except that the partial matching options are not
permitted, and \fImatch_data\fP may be passed as NULL, in which case a match permitted, and \fImatch_data\fP may be passed as NULL, in which case a match
@ -2946,6 +2953,6 @@ Cambridge, England.
.rs .rs
.sp .sp
.nf .nf
Last updated: 18 August 2015 Last updated: 29 August 2015
Copyright (c) 1997-2015 University of Cambridge. Copyright (c) 1997-2015 University of Cambridge.
.fi .fi

View File

@ -918,6 +918,7 @@ a positive value. */
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR "NOTEMPTY_ATSTART)" #define STRING_NOTEMPTY_ATSTART_RIGHTPAR "NOTEMPTY_ATSTART)"
#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH=" #define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH="
#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION=" #define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION="
#define STRING_MARK "MARK"
#else /* SUPPORT_UNICODE */ #else /* SUPPORT_UNICODE */
@ -1190,6 +1191,7 @@ only. */
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_UNDERSCORE STR_A STR_T STR_S STR_T STR_A STR_R STR_T STR_RIGHT_PARENTHESIS #define STRING_NOTEMPTY_ATSTART_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_UNDERSCORE STR_A STR_T STR_S STR_T STR_A STR_R STR_T STR_RIGHT_PARENTHESIS
#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN #define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN #define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
#define STRING_MARK STR_M STR_A STR_R STR_K
#endif /* SUPPORT_UNICODE */ #endif /* SUPPORT_UNICODE */

View File

@ -205,6 +205,7 @@ do
{ {
int group, n; int group, n;
BOOL inparens; BOOL inparens;
BOOL star;
PCRE2_SIZE sublength; PCRE2_SIZE sublength;
PCRE2_UCHAR next; PCRE2_UCHAR next;
PCRE2_UCHAR name[33]; PCRE2_UCHAR name[33];
@ -215,6 +216,7 @@ do
group = -1; group = -1;
n = 0; n = 0;
inparens = FALSE; inparens = FALSE;
star = FALSE;
if (next == CHAR_LEFT_CURLY_BRACKET) if (next == CHAR_LEFT_CURLY_BRACKET)
{ {
@ -223,7 +225,14 @@ do
inparens = TRUE; inparens = TRUE;
} }
if (next >= CHAR_0 && next <= CHAR_9) if (next == CHAR_ASTERISK)
{
if (++i == rlength) goto BAD;
next = replacement[i];
star = TRUE;
}
if (!star && next >= CHAR_0 && next <= CHAR_9)
{ {
group = next - CHAR_0; group = next - CHAR_0;
while (++i < rlength) while (++i < rlength)
@ -253,19 +262,42 @@ do
} }
else i--; /* Last code unit of name/number */ else i--; /* Last code unit of name/number */
/* Have found a syntactically correct group number or name. */ /* Have found a syntactically correct group number or name, or
*name. Only *MARK is currently recognized. */
if (star)
{
if (PRIV(strcmp_c8)(name, STRING_MARK) == 0)
{
PCRE2_SPTR mark = pcre2_get_mark(match_data);
if (mark != NULL)
{
while (*mark != 0)
{
if (lengthleft-- < 1) goto NOROOM;
buffer[buff_offset++] = *mark++;
}
}
}
else goto BAD;
}
/* Substitute the contents of a group. */
sublength = lengthleft;
if (group < 0)
rc = pcre2_substring_copy_byname(match_data, name,
buffer + buff_offset, &sublength);
else else
rc = pcre2_substring_copy_bynumber(match_data, group, {
buffer + buff_offset, &sublength); sublength = lengthleft;
if (group < 0)
rc = pcre2_substring_copy_byname(match_data, name,
buffer + buff_offset, &sublength);
else
rc = pcre2_substring_copy_bynumber(match_data, group,
buffer + buff_offset, &sublength);
if (rc < 0) goto EXIT;
if (rc < 0) goto EXIT; buff_offset += sublength;
buff_offset += sublength; lengthleft -= sublength;
lengthleft -= sublength; }
} }
/* Handle a literal code unit */ /* Handle a literal code unit */

31
testdata/testinput2 vendored
View File

@ -4073,6 +4073,37 @@ a random value. /Ix
/(.)(.)/g,replace=$2$1 /(.)(.)/g,replace=$2$1
abcdefgh abcdefgh
/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=${*MARK}
apple lemon blackberry
apple strudel
fruitless
/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/replace=${*MARK}
apple lemon blackberry
/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARK>
apple lemon blackberry
apple strudel
fruitless
/(*:pear)apple/g,replace=${*MARKING}
apple lemon blackberry
/(*:pear)apple/g,replace=${*MARK-time
apple lemon blackberry
/(*:pear)apple/g,replace=${*mark}
apple lemon blackberry
/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARKET>
apple lemon blackberry
/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[22]${*MARK}
apple lemon blackberry
/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[23]${*MARK}
apple lemon blackberry
# End of substitute tests # End of substitute tests

44
testdata/testoutput2 vendored
View File

@ -13731,6 +13731,50 @@ Failed: error -34: bad option value
/(.)(.)/g,replace=$2$1 /(.)(.)/g,replace=$2$1
abcdefgh abcdefgh
4: badcfehg 4: badcfehg
/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=${*MARK}
apple lemon blackberry
3: pear orange strawberry
apple strudel
1: pear strudel
fruitless
0: fruitless
/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/replace=${*MARK}
apple lemon blackberry
1: pear lemon blackberry
/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARK>
apple lemon blackberry
3: <pear> <orange> <strawberry>
apple strudel
1: <pear> strudel
fruitless
0: fruitless
/(*:pear)apple/g,replace=${*MARKING}
apple lemon blackberry
Failed: error -35: invalid replacement string
/(*:pear)apple/g,replace=${*MARK-time
apple lemon blackberry
Failed: error -35: invalid replacement string
/(*:pear)apple/g,replace=${*mark}
apple lemon blackberry
Failed: error -35: invalid replacement string
/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARKET>
apple lemon blackberry
Failed: error -35: invalid replacement string
/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[22]${*MARK}
apple lemon blackberry
Failed: error -48: no more memory
/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[23]${*MARK}
apple lemon blackberry
3: pear orange strawberry
# End of substitute tests # End of substitute tests