Add ${*MARK} feature to pcre2_substitute().

This commit is contained in:
Philip.Hazel 2015-08-29 17:13:09 +00:00
parent e40bc47ae2
commit 170644eca3
6 changed files with 136 additions and 18 deletions

View File

@ -162,6 +162,8 @@ test (there are now 20 in total).
45. Fixed a corner case of range optimization in JIT.
46. Add the ${*MARK} facility to pcre2_substitute().
Version 10.20 30-June-2015
--------------------------

View File

@ -1,4 +1,4 @@
.TH PCRE2API 3 "18 August 2015" "PCRE2 10.21"
.TH PCRE2API 3 "29 August 2015" "PCRE2 10.21"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.sp
@ -2614,12 +2614,12 @@ be given as PCRE2_ZERO_TERMINATED for a zero-terminated string.
In the replacement string, which is interpreted as a UTF string in UTF mode,
and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set, a
dollar character is an escape character that can specify the insertion of
characters from capturing groups in the pattern. The following forms are
recognized:
characters from capturing groups or (*MARK) items in the pattern. The following
forms are recognized:
.sp
$$ insert a dollar character
$<n> insert the contents of group <n>
${<n>} insert the contents of group <n>
$$ insert a dollar character
$<n> or ${<n>} insert the contents of group <n>
$*MARK or ${*MARK} insert the name of the last (*MARK) encountered
.sp
Either a group number or a group name can be given for <n>. Curly brackets are
required only if the following character would be interpreted as part of the
@ -2629,6 +2629,13 @@ string "+$1$0$1+", the result is "=+babcb+=". Group insertion is done by
calling \fBpcre2_copy_byname()\fP or \fBpcre2_copy_bynumber()\fP as
appropriate.
.P
The facility for inserting a (*MARK) name can be used to perform simple
simultaneous substitutions, as this \fBpcre2test\fP example shows:
.sp
/(*:pear)apple|(*:orange)lemon/g,replace=${*MARK}
apple lemon
2: pear orange
.P
The first seven arguments of \fBpcre2_substitute()\fP are the same as for
\fBpcre2_match()\fP, except that the partial matching options are not
permitted, and \fImatch_data\fP may be passed as NULL, in which case a match
@ -2946,6 +2953,6 @@ Cambridge, England.
.rs
.sp
.nf
Last updated: 18 August 2015
Last updated: 29 August 2015
Copyright (c) 1997-2015 University of Cambridge.
.fi

View File

@ -918,6 +918,7 @@ a positive value. */
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR "NOTEMPTY_ATSTART)"
#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH="
#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION="
#define STRING_MARK "MARK"
#else /* SUPPORT_UNICODE */
@ -1190,6 +1191,7 @@ only. */
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_UNDERSCORE STR_A STR_T STR_S STR_T STR_A STR_R STR_T STR_RIGHT_PARENTHESIS
#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
#define STRING_MARK STR_M STR_A STR_R STR_K
#endif /* SUPPORT_UNICODE */

View File

@ -205,6 +205,7 @@ do
{
int group, n;
BOOL inparens;
BOOL star;
PCRE2_SIZE sublength;
PCRE2_UCHAR next;
PCRE2_UCHAR name[33];
@ -215,6 +216,7 @@ do
group = -1;
n = 0;
inparens = FALSE;
star = FALSE;
if (next == CHAR_LEFT_CURLY_BRACKET)
{
@ -223,7 +225,14 @@ do
inparens = TRUE;
}
if (next >= CHAR_0 && next <= CHAR_9)
if (next == CHAR_ASTERISK)
{
if (++i == rlength) goto BAD;
next = replacement[i];
star = TRUE;
}
if (!star && next >= CHAR_0 && next <= CHAR_9)
{
group = next - CHAR_0;
while (++i < rlength)
@ -253,19 +262,42 @@ do
}
else i--; /* Last code unit of name/number */
/* Have found a syntactically correct group number or name. */
/* Have found a syntactically correct group number or name, or
*name. Only *MARK is currently recognized. */
if (star)
{
if (PRIV(strcmp_c8)(name, STRING_MARK) == 0)
{
PCRE2_SPTR mark = pcre2_get_mark(match_data);
if (mark != NULL)
{
while (*mark != 0)
{
if (lengthleft-- < 1) goto NOROOM;
buffer[buff_offset++] = *mark++;
}
}
}
else goto BAD;
}
/* Substitute the contents of a group. */
sublength = lengthleft;
if (group < 0)
rc = pcre2_substring_copy_byname(match_data, name,
buffer + buff_offset, &sublength);
else
rc = pcre2_substring_copy_bynumber(match_data, group,
buffer + buff_offset, &sublength);
{
sublength = lengthleft;
if (group < 0)
rc = pcre2_substring_copy_byname(match_data, name,
buffer + buff_offset, &sublength);
else
rc = pcre2_substring_copy_bynumber(match_data, group,
buffer + buff_offset, &sublength);
if (rc < 0) goto EXIT;
if (rc < 0) goto EXIT;
buff_offset += sublength;
lengthleft -= sublength;
buff_offset += sublength;
lengthleft -= sublength;
}
}
/* Handle a literal code unit */

31
testdata/testinput2 vendored
View File

@ -4074,6 +4074,37 @@ a random value. /Ix
/(.)(.)/g,replace=$2$1
abcdefgh
/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=${*MARK}
apple lemon blackberry
apple strudel
fruitless
/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/replace=${*MARK}
apple lemon blackberry
/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARK>
apple lemon blackberry
apple strudel
fruitless
/(*:pear)apple/g,replace=${*MARKING}
apple lemon blackberry
/(*:pear)apple/g,replace=${*MARK-time
apple lemon blackberry
/(*:pear)apple/g,replace=${*mark}
apple lemon blackberry
/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARKET>
apple lemon blackberry
/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[22]${*MARK}
apple lemon blackberry
/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[23]${*MARK}
apple lemon blackberry
# End of substitute tests
"((?=(?(?=(?(?=(?(?=()))))))))"

44
testdata/testoutput2 vendored
View File

@ -13732,6 +13732,50 @@ Failed: error -34: bad option value
abcdefgh
4: badcfehg
/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=${*MARK}
apple lemon blackberry
3: pear orange strawberry
apple strudel
1: pear strudel
fruitless
0: fruitless
/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/replace=${*MARK}
apple lemon blackberry
1: pear lemon blackberry
/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARK>
apple lemon blackberry
3: <pear> <orange> <strawberry>
apple strudel
1: <pear> strudel
fruitless
0: fruitless
/(*:pear)apple/g,replace=${*MARKING}
apple lemon blackberry
Failed: error -35: invalid replacement string
/(*:pear)apple/g,replace=${*MARK-time
apple lemon blackberry
Failed: error -35: invalid replacement string
/(*:pear)apple/g,replace=${*mark}
apple lemon blackberry
Failed: error -35: invalid replacement string
/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARKET>
apple lemon blackberry
Failed: error -35: invalid replacement string
/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[22]${*MARK}
apple lemon blackberry
Failed: error -48: no more memory
/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[23]${*MARK}
apple lemon blackberry
3: pear orange strawberry
# End of substitute tests
"((?=(?(?=(?(?=(?(?=()))))))))"