Implement PCRE2_SUBSTITUTE_EXTENDED.
This commit is contained in:
parent
f64749b40a
commit
6ae5c36e83
|
@ -192,6 +192,8 @@ pcre2test (and perltest.sh) input.
|
||||||
54. Add the null_context modifier to pcre2test so that calling pcre2_compile()
|
54. Add the null_context modifier to pcre2test so that calling pcre2_compile()
|
||||||
and the matching functions with NULL contexts can be tested.
|
and the matching functions with NULL contexts can be tested.
|
||||||
|
|
||||||
|
55. Implemented PCRE2_SUBSTITUTE_EXTENDED.
|
||||||
|
|
||||||
|
|
||||||
Version 10.20 30-June-2015
|
Version 10.20 30-June-2015
|
||||||
--------------------------
|
--------------------------
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2_SUBSTITUTE 3 "11 November 2014" "PCRE2 10.00"
|
.TH PCRE2_SUBSTITUTE 3 "06 October 2015" "PCRE2 10.21"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -47,20 +47,22 @@ units, not characters, as is the contents of the variable pointed at by
|
||||||
\fIoutlengthptr\fP, which is updated to the actual length of the new string.
|
\fIoutlengthptr\fP, which is updated to the actual length of the new string.
|
||||||
The options are:
|
The options are:
|
||||||
.sp
|
.sp
|
||||||
PCRE2_ANCHORED Match only at the first position
|
PCRE2_ANCHORED Match only at the first position
|
||||||
PCRE2_NOTBOL Subject string is not the beginning of a line
|
PCRE2_NOTBOL Subject is not the beginning of a line
|
||||||
PCRE2_NOTEOL Subject string is not the end of a line
|
PCRE2_NOTEOL Subject is not the end of a line
|
||||||
PCRE2_NOTEMPTY An empty string is not a valid match
|
PCRE2_NOTEMPTY An empty string is not a valid match
|
||||||
PCRE2_NOTEMPTY_ATSTART An empty string at the start of the subject
|
PCRE2_NOTEMPTY_ATSTART An empty string at the start of the
|
||||||
is not a valid match
|
subject is not a valid match
|
||||||
PCRE2_NO_UTF_CHECK Do not check the subject or replacement for
|
PCRE2_NO_UTF_CHECK Do not check the subject or replacement
|
||||||
UTF validity (only relevant if PCRE2_UTF
|
for UTF validity (only relevant if
|
||||||
was set at compile time)
|
PCRE2_UTF was set at compile time)
|
||||||
PCRE2_SUBSTITUTE_GLOBAL Replace all occurrences in the subject
|
PCRE2_SUBSTITUTE_EXTENDED Do extended replacement processing
|
||||||
|
PCRE2_SUBSTITUTE_GLOBAL Replace all occurrences in the subject
|
||||||
.sp
|
.sp
|
||||||
The function returns the number of substitutions, which may be zero if there
|
The function returns the number of substitutions, which may be zero if there
|
||||||
were no matches. The result can be greater than one only when
|
were no matches. The result can be greater than one only when
|
||||||
PCRE2_SUBSTITUTE_GLOBAL is set.
|
PCRE2_SUBSTITUTE_GLOBAL is set. In the event of an error, a negative error code
|
||||||
|
is returned.
|
||||||
.P
|
.P
|
||||||
There is a complete description of the PCRE2 native API in the
|
There is a complete description of the PCRE2 native API in the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
|
|
145
doc/pcre2api.3
145
doc/pcre2api.3
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2API 3 "22 September 2015" "PCRE2 10.21"
|
.TH PCRE2API 3 "07 October 2015" "PCRE2 10.21"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.sp
|
.sp
|
||||||
|
@ -1170,7 +1170,7 @@ built.
|
||||||
.sp
|
.sp
|
||||||
If this option is set, an unanchored pattern is required to match before or at
|
If this option is set, an unanchored pattern is required to match before or at
|
||||||
the first newline in the subject string, though the matched text may continue
|
the first newline in the subject string, though the matched text may continue
|
||||||
over the newline. See also PCRE2_USE_OFFSET_LIMIT, which provides a more
|
over the newline. See also PCRE2_USE_OFFSET_LIMIT, which provides a more
|
||||||
general limiting facility.
|
general limiting facility.
|
||||||
.sp
|
.sp
|
||||||
PCRE2_MATCH_UNSET_BACKREF
|
PCRE2_MATCH_UNSET_BACKREF
|
||||||
|
@ -1367,8 +1367,8 @@ with Perl. It can also be set by a (?U) option setting within the pattern.
|
||||||
.sp
|
.sp
|
||||||
PCRE2_USE_OFFSET_LIMIT
|
PCRE2_USE_OFFSET_LIMIT
|
||||||
.sp
|
.sp
|
||||||
This option must be set for \fBpcre2_compile()\fP if
|
This option must be set for \fBpcre2_compile()\fP if
|
||||||
\fBpcre2_set_offset_limit()\fP is going to be used to set a non-default offset
|
\fBpcre2_set_offset_limit()\fP is going to be used to set a non-default offset
|
||||||
limit in a match context for matches that use this pattern. An error is
|
limit in a match context for matches that use this pattern. An error is
|
||||||
generated if an offset limit is set without this option. For more details, see
|
generated if an offset limit is set without this option. For more details, see
|
||||||
the description of \fBpcre2_set_offset_limit()\fP in the
|
the description of \fBpcre2_set_offset_limit()\fP in the
|
||||||
|
@ -2657,40 +2657,16 @@ same number causes an error at compile time.
|
||||||
.B int pcre2_substitute(const pcre2_code *\fIcode\fP, PCRE2_SPTR \fIsubject\fP,
|
.B int pcre2_substitute(const pcre2_code *\fIcode\fP, PCRE2_SPTR \fIsubject\fP,
|
||||||
.B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP,"
|
.B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP,"
|
||||||
.B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP,"
|
.B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP,"
|
||||||
.B " pcre2_match_context *\fImcontext\fP, PCRE2_SPTR \fIreplacementzfP,"
|
.B " pcre2_match_context *\fImcontext\fP, PCRE2_SPTR \fIreplacement\fP,"
|
||||||
.B " PCRE2_SIZE \fIrlength\fP, PCRE2_UCHAR *\fIoutputbuffer\zfP,"
|
.B " PCRE2_SIZE \fIrlength\fP, PCRE2_UCHAR *\fIoutputbuffer\zfP,"
|
||||||
.B " PCRE2_SIZE *\fIoutlengthptr\fP);"
|
.B " PCRE2_SIZE *\fIoutlengthptr\fP);"
|
||||||
.fi
|
.fi
|
||||||
|
.P
|
||||||
This function calls \fBpcre2_match()\fP and then makes a copy of the subject
|
This function calls \fBpcre2_match()\fP and then makes a copy of the subject
|
||||||
string in \fIoutputbuffer\fP, replacing the part that was matched with the
|
string in \fIoutputbuffer\fP, replacing the part that was matched with the
|
||||||
\fIreplacement\fP string, whose length is supplied in \fBrlength\fP. This can
|
\fIreplacement\fP string, whose length is supplied in \fBrlength\fP. This can
|
||||||
be given as PCRE2_ZERO_TERMINATED for a zero-terminated string.
|
be given as PCRE2_ZERO_TERMINATED for a zero-terminated string.
|
||||||
.P
|
.P
|
||||||
In the replacement string, which is interpreted as a UTF string in UTF mode,
|
|
||||||
and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set, a
|
|
||||||
dollar character is an escape character that can specify the insertion of
|
|
||||||
characters from capturing groups or (*MARK) items in the pattern. The following
|
|
||||||
forms are recognized:
|
|
||||||
.sp
|
|
||||||
$$ insert a dollar character
|
|
||||||
$<n> or ${<n>} insert the contents of group <n>
|
|
||||||
$*MARK or ${*MARK} insert the name of the last (*MARK) encountered
|
|
||||||
.sp
|
|
||||||
Either a group number or a group name can be given for <n>. Curly brackets are
|
|
||||||
required only if the following character would be interpreted as part of the
|
|
||||||
number or name. The number may be zero to include the entire matched string.
|
|
||||||
For example, if the pattern a(b)c is matched with "=abc=" and the replacement
|
|
||||||
string "+$1$0$1+", the result is "=+babcb+=". Group insertion is done by
|
|
||||||
calling \fBpcre2_copy_byname()\fP or \fBpcre2_copy_bynumber()\fP as
|
|
||||||
appropriate.
|
|
||||||
.P
|
|
||||||
The facility for inserting a (*MARK) name can be used to perform simple
|
|
||||||
simultaneous substitutions, as this \fBpcre2test\fP example shows:
|
|
||||||
.sp
|
|
||||||
/(*:pear)apple|(*:orange)lemon/g,replace=${*MARK}
|
|
||||||
apple lemon
|
|
||||||
2: pear orange
|
|
||||||
.P
|
|
||||||
The first seven arguments of \fBpcre2_substitute()\fP are the same as for
|
The first seven arguments of \fBpcre2_substitute()\fP are the same as for
|
||||||
\fBpcre2_match()\fP, except that the partial matching options are not
|
\fBpcre2_match()\fP, except that the partial matching options are not
|
||||||
permitted, and \fImatch_data\fP may be passed as NULL, in which case a match
|
permitted, and \fImatch_data\fP may be passed as NULL, in which case a match
|
||||||
|
@ -2698,23 +2674,104 @@ data block is obtained and freed within this function, using memory management
|
||||||
functions from the match context, if provided, or else those that were used to
|
functions from the match context, if provided, or else those that were used to
|
||||||
allocate memory for the compiled code.
|
allocate memory for the compiled code.
|
||||||
.P
|
.P
|
||||||
There is one additional option, PCRE2_SUBSTITUTE_GLOBAL, which causes the
|
The \fIoutlengthptr\fP argument must point to a variable that contains the
|
||||||
|
length, in code units, of the output buffer. If the function is successful,
|
||||||
|
the value is updated to contain the length of the new string, excluding the
|
||||||
|
trailing zero that is automatically added. If the function is not successful,
|
||||||
|
the value is set to PCRE2_UNSET for general errors (such as output buffer too
|
||||||
|
small). For syntax errors in the replacement string, the value is set to the
|
||||||
|
offset in the replacement string where the error was detected.
|
||||||
|
.P
|
||||||
|
In the replacement string, which is interpreted as a UTF string in UTF mode,
|
||||||
|
and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set, a
|
||||||
|
dollar character is an escape character that can specify the insertion of
|
||||||
|
characters from capturing groups or (*MARK) items in the pattern. The following
|
||||||
|
forms are always recognized:
|
||||||
|
.sp
|
||||||
|
$$ insert a dollar character
|
||||||
|
$<n> or ${<n>} insert the contents of group <n>
|
||||||
|
$*MARK or ${*MARK} insert the name of the last (*MARK) encountered
|
||||||
|
.sp
|
||||||
|
Either a group number or a group name can be given for <n>. Curly brackets are
|
||||||
|
required only if the following character would be interpreted as part of the
|
||||||
|
number or name. The number may be zero to include the entire matched string.
|
||||||
|
For example, if the pattern a(b)c is matched with "=abc=" and the replacement
|
||||||
|
string "+$1$0$1+", the result is "=+babcb+=".
|
||||||
|
.P
|
||||||
|
The facility for inserting a (*MARK) name can be used to perform simple
|
||||||
|
simultaneous substitutions, as this \fBpcre2test\fP example shows:
|
||||||
|
.sp
|
||||||
|
/(*:pear)apple|(*:orange)lemon/g,replace=${*MARK}
|
||||||
|
apple lemon
|
||||||
|
2: pear orange
|
||||||
|
.sp
|
||||||
|
There is an additional option, PCRE2_SUBSTITUTE_GLOBAL, which causes the
|
||||||
function to iterate over the subject string, replacing every matching
|
function to iterate over the subject string, replacing every matching
|
||||||
substring. If this is not set, only the first matching substring is replaced.
|
substring. If this is not set, only the first matching substring is replaced.
|
||||||
.P
|
.P
|
||||||
The \fIoutlengthptr\fP argument must point to a variable that contains the
|
A second additional option, PCRE2_SUBSTITUTE_EXTENDED, causes extra processing
|
||||||
length, in code units, of the output buffer. It is updated to contain the
|
to be applied to the replacement string. Without this option, only the dollar
|
||||||
length of the new string, excluding the trailing zero that is automatically
|
character is special, and only the group insertion forms listed above are
|
||||||
added.
|
valid. When PCRE2_SUBSTITUTE_EXTENDED is set, two things change:
|
||||||
.P
|
.P
|
||||||
The function returns the number of replacements that were made. This may be
|
Firstly, backslash in a replacement string is interpreted as an escape
|
||||||
zero if no matches were found, and is never greater than 1 unless
|
character. The usual forms such as \en or \ex{ddd} can be used to specify
|
||||||
PCRE2_SUBSTITUTE_GLOBAL is set. In the event of an error, a negative error code
|
particular character codes, and backslash followed by any non-alphanumeric
|
||||||
is returned. Except for PCRE2_ERROR_NOMATCH (which is never returned), any
|
character quotes that character. Extended quoting can be coded using \eQ...\eE,
|
||||||
errors from \fBpcre2_match()\fP or the substring copying functions are passed
|
exactly as in pattern strings.
|
||||||
straight back. PCRE2_ERROR_BADREPLACEMENT is returned for an invalid
|
.P
|
||||||
replacement string (unrecognized sequence following a dollar sign), and
|
There are also four escape sequences for forcing the case of inserted letters.
|
||||||
PCRE2_ERROR_NOMEMORY is returned if the output buffer is not big enough.
|
The insertion mechanism has three states: no case forcing, force upper case,
|
||||||
|
and force lower case. The escape sequences change the current state: \eU and
|
||||||
|
\eL change to upper or lower case forcing, respectively, and \eE (when not
|
||||||
|
terminating a \eQ quoted sequence) reverts to no case forcing. The sequences
|
||||||
|
\eu and \el force the next character (if it is a letter) to upper or lower
|
||||||
|
case, respectively, and then the state automatically reverts to no case
|
||||||
|
forcing. Case forcing applies to all inserted characters, including those from
|
||||||
|
captured groups and letters within \eQ...\eE quoted sequences.
|
||||||
|
.P
|
||||||
|
Note that case forcing sequences such as \eU...\eE do not nest. For example,
|
||||||
|
the result of processing "\eUaa\eLBB\eEcc\eE" is "AAbbcc"; the final \eE has no
|
||||||
|
effect.
|
||||||
|
.P
|
||||||
|
The second effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more
|
||||||
|
flexibility to group substitution. The syntax is similar to that used by Bash:
|
||||||
|
.sp
|
||||||
|
${<n>:-<string>}
|
||||||
|
${<n>:+<string1>:<string2>}
|
||||||
|
.sp
|
||||||
|
As before, <n> may be a group number or a name. The first form specifies a
|
||||||
|
default value. If group <n> is set, its value is inserted; if not, <string> is
|
||||||
|
expanded and the result inserted. The second form specifies strings that are
|
||||||
|
expanded and inserted when group <n> is set or unset, respectively. The first
|
||||||
|
form is just a convenient shorthand for
|
||||||
|
.sp
|
||||||
|
${<n>:+${<n>}:<string>}
|
||||||
|
.sp
|
||||||
|
Backslash can be used to escape colons and closing curly brackets in the
|
||||||
|
replacement strings. A change of the case forcing state within a replacement
|
||||||
|
string remains in force afterwards, as shown in this \fBpcre2test\fP example:
|
||||||
|
.sp
|
||||||
|
/(some)?(body)/substitute_extended,replace=${1:+\eU:\eL}HeLLo
|
||||||
|
body
|
||||||
|
1: hello
|
||||||
|
somebody
|
||||||
|
1: HELLO
|
||||||
|
.sp
|
||||||
|
If successful, the function returns the number of replacements that were made.
|
||||||
|
This may be zero if no matches were found, and is never greater than 1 unless
|
||||||
|
PCRE2_SUBSTITUTE_GLOBAL is set.
|
||||||
|
.P
|
||||||
|
In the event of an error, a negative error code is returned. Except for
|
||||||
|
PCRE2_ERROR_NOMATCH (which is never returned), errors from \fBpcre2_match()\fP
|
||||||
|
are passed straight back. PCRE2_ERROR_NOMEMORY is returned if the output buffer
|
||||||
|
is not big enough. PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax
|
||||||
|
errors in the replacement string, with more particular errors being
|
||||||
|
PCRE2_ERROR_BADREPESCAPE (invalid escape sequence),
|
||||||
|
PCRE2_ERROR_REPMISSING_BRACE (closing curly bracket not found), and
|
||||||
|
PCRE2_BADSUBSTITUTION (syntax error in extended group substitution). As for all
|
||||||
|
PCRE2 errors, a text message that describes the error can be obtained by
|
||||||
|
calling \fBpcre2_get_error_message()\fP.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "DUPLICATE SUBPATTERN NAMES"
|
.SH "DUPLICATE SUBPATTERN NAMES"
|
||||||
|
@ -3008,6 +3065,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 22 September 2015
|
Last updated: 07 October 2015
|
||||||
Copyright (c) 1997-2015 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -146,9 +146,10 @@ sanity checks). */
|
||||||
#define PCRE2_DFA_RESTART 0x00000040u
|
#define PCRE2_DFA_RESTART 0x00000040u
|
||||||
#define PCRE2_DFA_SHORTEST 0x00000080u
|
#define PCRE2_DFA_SHORTEST 0x00000080u
|
||||||
|
|
||||||
/* This is an additional option for pcre2_substitute(). */
|
/* These are additional options for pcre2_substitute(). */
|
||||||
|
|
||||||
#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u
|
#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u
|
||||||
|
#define PCRE2_SUBSTITUTE_EXTENDED 0x00000200u
|
||||||
|
|
||||||
/* Newline and \R settings, for use in compile contexts. The newline values
|
/* Newline and \R settings, for use in compile contexts. The newline values
|
||||||
must be kept in step with values set in config.h and both sets must all be
|
must be kept in step with values set in config.h and both sets must all be
|
||||||
|
@ -236,6 +237,9 @@ numbers must not be changed. */
|
||||||
#define PCRE2_ERROR_UNAVAILABLE (-54)
|
#define PCRE2_ERROR_UNAVAILABLE (-54)
|
||||||
#define PCRE2_ERROR_UNSET (-55)
|
#define PCRE2_ERROR_UNSET (-55)
|
||||||
#define PCRE2_ERROR_BADOFFSETLIMIT (-56)
|
#define PCRE2_ERROR_BADOFFSETLIMIT (-56)
|
||||||
|
#define PCRE2_ERROR_BADREPESCAPE (-57)
|
||||||
|
#define PCRE2_ERROR_REPMISSINGBRACE (-58)
|
||||||
|
#define PCRE2_ERROR_BADSUBSTITUTION (-59)
|
||||||
|
|
||||||
/* Request types for pcre2_pattern_info() */
|
/* Request types for pcre2_pattern_info() */
|
||||||
|
|
||||||
|
|
|
@ -146,9 +146,10 @@ sanity checks). */
|
||||||
#define PCRE2_DFA_RESTART 0x00000040u
|
#define PCRE2_DFA_RESTART 0x00000040u
|
||||||
#define PCRE2_DFA_SHORTEST 0x00000080u
|
#define PCRE2_DFA_SHORTEST 0x00000080u
|
||||||
|
|
||||||
/* This is an additional option for pcre2_substitute(). */
|
/* These are additional options for pcre2_substitute(). */
|
||||||
|
|
||||||
#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u
|
#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u
|
||||||
|
#define PCRE2_SUBSTITUTE_EXTENDED 0x00000200u
|
||||||
|
|
||||||
/* Newline and \R settings, for use in compile contexts. The newline values
|
/* Newline and \R settings, for use in compile contexts. The newline values
|
||||||
must be kept in step with values set in config.h and both sets must all be
|
must be kept in step with values set in config.h and both sets must all be
|
||||||
|
@ -236,6 +237,9 @@ numbers must not be changed. */
|
||||||
#define PCRE2_ERROR_UNAVAILABLE (-54)
|
#define PCRE2_ERROR_UNAVAILABLE (-54)
|
||||||
#define PCRE2_ERROR_UNSET (-55)
|
#define PCRE2_ERROR_UNSET (-55)
|
||||||
#define PCRE2_ERROR_BADOFFSETLIMIT (-56)
|
#define PCRE2_ERROR_BADOFFSETLIMIT (-56)
|
||||||
|
#define PCRE2_ERROR_BADREPESCAPE (-57)
|
||||||
|
#define PCRE2_ERROR_REPMISSINGBRACE (-58)
|
||||||
|
#define PCRE2_ERROR_BADSUBSTITUTION (-59)
|
||||||
|
|
||||||
/* Request types for pcre2_pattern_info() */
|
/* Request types for pcre2_pattern_info() */
|
||||||
|
|
||||||
|
|
|
@ -1612,8 +1612,15 @@ is placed in chptr. A backreference to group n is returned as negative n. On
|
||||||
entry, ptr is pointing at the \. On exit, it points the final code unit of the
|
entry, ptr is pointing at the \. On exit, it points the final code unit of the
|
||||||
escape sequence.
|
escape sequence.
|
||||||
|
|
||||||
|
This function is also called from pcre2_substitute() to handle escape sequences
|
||||||
|
in replacement strings. In this case, the cb argument is NULL, and only
|
||||||
|
sequences that define a data character are recognised. The isclass argument is
|
||||||
|
not relevant, but the options argument is the final value of the compiled
|
||||||
|
pattern's options.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
ptrptr points to the pattern position pointer
|
ptrptr points to the input position pointer
|
||||||
|
ptrend points to the end of the input
|
||||||
chptr points to a returned data character
|
chptr points to a returned data character
|
||||||
errorcodeptr points to the errorcode variable (containing zero)
|
errorcodeptr points to the errorcode variable (containing zero)
|
||||||
options the current options bits
|
options the current options bits
|
||||||
|
@ -1626,9 +1633,9 @@ Returns: zero => a data character
|
||||||
on error, errorcodeptr is set non-zero
|
on error, errorcodeptr is set non-zero
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static int
|
int
|
||||||
check_escape(PCRE2_SPTR *ptrptr, uint32_t *chptr, int *errorcodeptr,
|
PRIV(check_escape)(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t *chptr,
|
||||||
uint32_t options, BOOL isclass, compile_block *cb)
|
int *errorcodeptr, uint32_t options, BOOL isclass, compile_block *cb)
|
||||||
{
|
{
|
||||||
BOOL utf = (options & PCRE2_UTF) != 0;
|
BOOL utf = (options & PCRE2_UTF) != 0;
|
||||||
PCRE2_SPTR ptr = *ptrptr + 1;
|
PCRE2_SPTR ptr = *ptrptr + 1;
|
||||||
|
@ -1636,19 +1643,23 @@ register uint32_t c, cc;
|
||||||
int escape = 0;
|
int escape = 0;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
GETCHARINCTEST(c, ptr); /* Get character value, increment pointer */
|
|
||||||
ptr--; /* Set pointer back to the last code unit */
|
|
||||||
|
|
||||||
/* If backslash is at the end of the pattern, it's an error. */
|
/* If backslash is at the end of the pattern, it's an error. */
|
||||||
|
|
||||||
if (c == CHAR_NULL && ptr >= cb->end_pattern) *errorcodeptr = ERR1;
|
if (ptr >= ptrend)
|
||||||
|
{
|
||||||
|
*errorcodeptr = ERR1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
GETCHARINCTEST(c, ptr); /* Get character value, increment pointer */
|
||||||
|
ptr--; /* Set pointer back to the last code unit */
|
||||||
|
|
||||||
/* Non-alphanumerics are literals, so we just leave the value in c. An initial
|
/* Non-alphanumerics are literals, so we just leave the value in c. An initial
|
||||||
value test saves a memory lookup for code points outside the alphanumeric
|
value test saves a memory lookup for code points outside the alphanumeric
|
||||||
range. Otherwise, do a table lookup. A non-zero result is something that can be
|
range. Otherwise, do a table lookup. A non-zero result is something that can be
|
||||||
returned immediately. Otherwise further processing is required. */
|
returned immediately. Otherwise further processing is required. */
|
||||||
|
|
||||||
else if (c < ESCAPES_FIRST || c > ESCAPES_LAST) {} /* Definitely literal */
|
if (c < ESCAPES_FIRST || c > ESCAPES_LAST) {} /* Definitely literal */
|
||||||
|
|
||||||
else if ((i = escapes[c - ESCAPES_FIRST]) != 0)
|
else if ((i = escapes[c - ESCAPES_FIRST]) != 0)
|
||||||
{
|
{
|
||||||
|
@ -1660,13 +1671,24 @@ else if ((i = escapes[c - ESCAPES_FIRST]) != 0)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Escapes that need further processing, including those that are unknown. */
|
/* Escapes that need further processing, including those that are unknown.
|
||||||
|
When called from pcre2_substitute(), only \c, \o, and \x are recognized (and \u
|
||||||
|
when BSUX is set). */
|
||||||
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
PCRE2_SPTR oldptr;
|
PCRE2_SPTR oldptr;
|
||||||
BOOL braced, negated, overflow;
|
BOOL braced, negated, overflow;
|
||||||
unsigned int s;
|
unsigned int s;
|
||||||
|
|
||||||
|
/* Filter calls from pcre2_substitute(). */
|
||||||
|
|
||||||
|
if (cb == NULL && c != CHAR_c && c != CHAR_o && c != CHAR_x &&
|
||||||
|
(c != CHAR_u || (options & PCRE2_ALT_BSUX) != 0))
|
||||||
|
{
|
||||||
|
*errorcodeptr = ERR3;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
switch (c)
|
switch (c)
|
||||||
{
|
{
|
||||||
|
@ -2020,7 +2042,7 @@ else
|
||||||
|
|
||||||
c = *(++ptr);
|
c = *(++ptr);
|
||||||
if (c >= CHAR_a && c <= CHAR_z) c = UPPER_CASE(c);
|
if (c >= CHAR_a && c <= CHAR_z) c = UPPER_CASE(c);
|
||||||
if (c == CHAR_NULL && ptr >= cb->end_pattern)
|
if (c == CHAR_NULL && ptr >= ptrend)
|
||||||
{
|
{
|
||||||
*errorcodeptr = ERR2;
|
*errorcodeptr = ERR2;
|
||||||
break;
|
break;
|
||||||
|
@ -2874,7 +2896,8 @@ for (; ptr < cb->end_pattern; ptr++)
|
||||||
{
|
{
|
||||||
int rc;
|
int rc;
|
||||||
*errorcodeptr = 0;
|
*errorcodeptr = 0;
|
||||||
rc = check_escape(&ptr, &x, errorcodeptr, options, FALSE, cb);
|
rc = PRIV(check_escape)(&ptr, cb->end_pattern, &x, errorcodeptr, options,
|
||||||
|
FALSE, cb);
|
||||||
*ptrptr = ptr; /* For possible error */
|
*ptrptr = ptr; /* For possible error */
|
||||||
if (*errorcodeptr != 0) return -1;
|
if (*errorcodeptr != 0) return -1;
|
||||||
if (rc != 0)
|
if (rc != 0)
|
||||||
|
@ -3048,7 +3071,8 @@ for (; ptr < cb->end_pattern; ptr++)
|
||||||
|
|
||||||
case CHAR_BACKSLASH:
|
case CHAR_BACKSLASH:
|
||||||
errorcode = 0;
|
errorcode = 0;
|
||||||
escape = check_escape(&ptr, &c, &errorcode, options, FALSE, cb);
|
escape = PRIV(check_escape)(&ptr, cb->end_pattern, &c, &errorcode, options,
|
||||||
|
FALSE, cb);
|
||||||
if (errorcode != 0) goto FAILED;
|
if (errorcode != 0) goto FAILED;
|
||||||
if (escape == ESC_Q) inescq = TRUE;
|
if (escape == ESC_Q) inescq = TRUE;
|
||||||
break;
|
break;
|
||||||
|
@ -3132,7 +3156,8 @@ for (; ptr < cb->end_pattern; ptr++)
|
||||||
else if (c == CHAR_BACKSLASH)
|
else if (c == CHAR_BACKSLASH)
|
||||||
{
|
{
|
||||||
errorcode = 0;
|
errorcode = 0;
|
||||||
escape = check_escape(&ptr, &c, &errorcode, options, TRUE, cb);
|
escape = PRIV(check_escape)(&ptr, cb->end_pattern, &c, &errorcode,
|
||||||
|
options, TRUE, cb);
|
||||||
if (errorcode != 0) goto FAILED;
|
if (errorcode != 0) goto FAILED;
|
||||||
if (escape == ESC_Q) inescq = TRUE;
|
if (escape == ESC_Q) inescq = TRUE;
|
||||||
}
|
}
|
||||||
|
@ -4195,7 +4220,8 @@ for (;; ptr++)
|
||||||
|
|
||||||
if (c == CHAR_BACKSLASH)
|
if (c == CHAR_BACKSLASH)
|
||||||
{
|
{
|
||||||
escape = check_escape(&ptr, &ec, errorcodeptr, options, TRUE, cb);
|
escape = PRIV(check_escape)(&ptr, cb->end_pattern, &ec, errorcodeptr,
|
||||||
|
options, TRUE, cb);
|
||||||
if (*errorcodeptr != 0) goto FAILED;
|
if (*errorcodeptr != 0) goto FAILED;
|
||||||
if (escape == 0) /* Escaped single char */
|
if (escape == 0) /* Escaped single char */
|
||||||
{
|
{
|
||||||
|
@ -4405,7 +4431,8 @@ for (;; ptr++)
|
||||||
if (d == CHAR_BACKSLASH)
|
if (d == CHAR_BACKSLASH)
|
||||||
{
|
{
|
||||||
int descape;
|
int descape;
|
||||||
descape = check_escape(&ptr, &d, errorcodeptr, options, TRUE, cb);
|
descape = PRIV(check_escape)(&ptr, cb->end_pattern, &d,
|
||||||
|
errorcodeptr, options, TRUE, cb);
|
||||||
if (*errorcodeptr != 0) goto FAILED;
|
if (*errorcodeptr != 0) goto FAILED;
|
||||||
#ifdef EBCDIC
|
#ifdef EBCDIC
|
||||||
range_is_literal = FALSE;
|
range_is_literal = FALSE;
|
||||||
|
@ -6862,7 +6889,8 @@ for (;; ptr++)
|
||||||
|
|
||||||
case CHAR_BACKSLASH:
|
case CHAR_BACKSLASH:
|
||||||
tempptr = ptr;
|
tempptr = ptr;
|
||||||
escape = check_escape(&ptr, &ec, errorcodeptr, options, FALSE, cb);
|
escape = PRIV(check_escape)(&ptr, cb->end_pattern, &ec, errorcodeptr,
|
||||||
|
options, FALSE, cb);
|
||||||
if (*errorcodeptr != 0) goto FAILED;
|
if (*errorcodeptr != 0) goto FAILED;
|
||||||
|
|
||||||
if (escape == 0) /* The escape coded a single character */
|
if (escape == 0) /* The escape coded a single character */
|
||||||
|
|
|
@ -238,9 +238,12 @@ static const char match_error_texts[] =
|
||||||
"nested recursion at the same subject position\0"
|
"nested recursion at the same subject position\0"
|
||||||
"recursion limit exceeded\0"
|
"recursion limit exceeded\0"
|
||||||
"requested value is not available\0"
|
"requested value is not available\0"
|
||||||
/* 55 */
|
/* 55 */
|
||||||
"requested value is not set\0"
|
"requested value is not set\0"
|
||||||
"offset limit set without PCRE2_USE_OFFSET_LIMIT\0"
|
"offset limit set without PCRE2_USE_OFFSET_LIMIT\0"
|
||||||
|
"bad escape sequence in replacement string\0"
|
||||||
|
"expected closing curly bracket in replacement string\0"
|
||||||
|
"bad substitution in replacement string\0"
|
||||||
;
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1886,6 +1886,7 @@ not referenced from pcre2test, and must not be defined when no code unit width
|
||||||
is available. */
|
is available. */
|
||||||
|
|
||||||
#define _pcre2_auto_possessify PCRE2_SUFFIX(_pcre2_auto_possessify_)
|
#define _pcre2_auto_possessify PCRE2_SUFFIX(_pcre2_auto_possessify_)
|
||||||
|
#define _pcre2_check_escape PCRE2_SUFFIX(_pcre2_check_escape_)
|
||||||
#define _pcre2_find_bracket PCRE2_SUFFIX(_pcre2_find_bracket_)
|
#define _pcre2_find_bracket PCRE2_SUFFIX(_pcre2_find_bracket_)
|
||||||
#define _pcre2_is_newline PCRE2_SUFFIX(_pcre2_is_newline_)
|
#define _pcre2_is_newline PCRE2_SUFFIX(_pcre2_is_newline_)
|
||||||
#define _pcre2_jit_free_rodata PCRE2_SUFFIX(_pcre2_jit_free_rodata_)
|
#define _pcre2_jit_free_rodata PCRE2_SUFFIX(_pcre2_jit_free_rodata_)
|
||||||
|
@ -1907,6 +1908,8 @@ is available. */
|
||||||
|
|
||||||
extern int _pcre2_auto_possessify(PCRE2_UCHAR *, BOOL,
|
extern int _pcre2_auto_possessify(PCRE2_UCHAR *, BOOL,
|
||||||
const compile_block *);
|
const compile_block *);
|
||||||
|
extern int _pcre2_check_escape(PCRE2_SPTR *, PCRE2_SPTR, uint32_t *,
|
||||||
|
int *, uint32_t, BOOL, compile_block *);
|
||||||
extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
|
extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
|
||||||
extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR,
|
extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR,
|
||||||
uint32_t *, BOOL);
|
uint32_t *, BOOL);
|
||||||
|
|
|
@ -45,6 +45,115 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include "pcre2_internal.h"
|
#include "pcre2_internal.h"
|
||||||
|
|
||||||
|
#define PTR_STACK_SIZE 20
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Find end of substitute text *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* In extended mode, we recognize ${name:+set text:unset text} and similar
|
||||||
|
constructions. This requires the identification of unescaped : and }
|
||||||
|
characters. This function scans for such. It must deal with nested ${
|
||||||
|
constructions. The pointer to the text is updated, either to the required end
|
||||||
|
character, or to where an error was detected.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
code points to the compiled expression (for options)
|
||||||
|
ptrptr points to the pointer to the start of the text (updated)
|
||||||
|
ptrend end of the whole string
|
||||||
|
last TRUE if the last expected string (only } recognized)
|
||||||
|
|
||||||
|
Returns: 0 on success
|
||||||
|
negative error code on failure
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int
|
||||||
|
find_text_end(const pcre2_code *code, PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend,
|
||||||
|
BOOL last)
|
||||||
|
{
|
||||||
|
int rc = 0;
|
||||||
|
uint32_t nestlevel = 0;
|
||||||
|
BOOL literal = FALSE;
|
||||||
|
PCRE2_SPTR ptr = *ptrptr;
|
||||||
|
|
||||||
|
for (; ptr < ptrend; ptr++)
|
||||||
|
{
|
||||||
|
if (literal)
|
||||||
|
{
|
||||||
|
if (ptr[0] == CHAR_BACKSLASH && ptr < ptrend - 1 && ptr[1] == CHAR_E)
|
||||||
|
{
|
||||||
|
literal = FALSE;
|
||||||
|
ptr += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
|
||||||
|
{
|
||||||
|
if (nestlevel == 0) goto EXIT;
|
||||||
|
nestlevel--;
|
||||||
|
}
|
||||||
|
|
||||||
|
else if (*ptr == CHAR_COLON && !last && nestlevel == 0) goto EXIT;
|
||||||
|
|
||||||
|
else if (*ptr == CHAR_DOLLAR_SIGN)
|
||||||
|
{
|
||||||
|
if (ptr < ptrend - 1 && ptr[1] == CHAR_LEFT_CURLY_BRACKET)
|
||||||
|
{
|
||||||
|
nestlevel++;
|
||||||
|
ptr += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
else if (*ptr == CHAR_BACKSLASH)
|
||||||
|
{
|
||||||
|
int erc;
|
||||||
|
int errorcode = 0;
|
||||||
|
uint32_t ch;
|
||||||
|
|
||||||
|
if (ptr < ptrend - 1) switch (ptr[1])
|
||||||
|
{
|
||||||
|
case CHAR_L:
|
||||||
|
case CHAR_l:
|
||||||
|
case CHAR_U:
|
||||||
|
case CHAR_u:
|
||||||
|
ptr += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode,
|
||||||
|
code->overall_options, FALSE, NULL);
|
||||||
|
if (errorcode != 0)
|
||||||
|
{
|
||||||
|
rc = errorcode;
|
||||||
|
goto EXIT;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch(erc)
|
||||||
|
{
|
||||||
|
case 0: /* Data character */
|
||||||
|
case ESC_E: /* Isolated \E is ignored */
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ESC_Q:
|
||||||
|
literal = TRUE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
rc = PCRE2_ERROR_BADREPESCAPE;
|
||||||
|
goto EXIT;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rc = PCRE2_ERROR_REPMISSINGBRACE; /* Terminator not found */
|
||||||
|
|
||||||
|
EXIT:
|
||||||
|
*ptrptr = ptr;
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Match and substitute *
|
* Match and substitute *
|
||||||
|
@ -80,13 +189,23 @@ pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
|
||||||
{
|
{
|
||||||
int rc;
|
int rc;
|
||||||
int subs;
|
int subs;
|
||||||
|
int forcecase = 0;
|
||||||
|
int forcecasereset = 0;
|
||||||
uint32_t ovector_count;
|
uint32_t ovector_count;
|
||||||
uint32_t goptions = 0;
|
uint32_t goptions = 0;
|
||||||
BOOL match_data_created = FALSE;
|
BOOL match_data_created = FALSE;
|
||||||
BOOL global = FALSE;
|
BOOL global = FALSE;
|
||||||
PCRE2_SIZE buff_offset, lengthleft, fraglength;
|
BOOL extended = FALSE;
|
||||||
|
BOOL literal = FALSE;
|
||||||
|
BOOL utf = (code->overall_options & PCRE2_UTF) != 0;
|
||||||
|
PCRE2_SPTR ptr;
|
||||||
|
PCRE2_SPTR repend;
|
||||||
|
PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength;
|
||||||
PCRE2_SIZE *ovector;
|
PCRE2_SIZE *ovector;
|
||||||
|
|
||||||
|
buff_length = *blength;
|
||||||
|
*blength = PCRE2_UNSET;
|
||||||
|
|
||||||
/* Partial matching is not valid. */
|
/* Partial matching is not valid. */
|
||||||
|
|
||||||
if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
|
if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
|
||||||
|
@ -109,8 +228,7 @@ ovector_count = pcre2_get_ovector_count(match_data);
|
||||||
/* Check UTF replacement string if necessary. */
|
/* Check UTF replacement string if necessary. */
|
||||||
|
|
||||||
#ifdef SUPPORT_UNICODE
|
#ifdef SUPPORT_UNICODE
|
||||||
if ((code->overall_options & PCRE2_UTF) != 0 &&
|
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
|
||||||
(options & PCRE2_NO_UTF_CHECK) == 0)
|
|
||||||
{
|
{
|
||||||
rc = PRIV(valid_utf)(replacement, rlength, &(match_data->rightchar));
|
rc = PRIV(valid_utf)(replacement, rlength, &(match_data->rightchar));
|
||||||
if (rc != 0)
|
if (rc != 0)
|
||||||
|
@ -121,8 +239,8 @@ if ((code->overall_options & PCRE2_UTF) != 0 &&
|
||||||
}
|
}
|
||||||
#endif /* SUPPORT_UNICODE */
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
/* Notice the global option and remove it from the options that are passed to
|
/* Notice the global and extended options and remove them from the options that
|
||||||
pcre2_match(). */
|
are passed to pcre2_match(). */
|
||||||
|
|
||||||
if ((options & PCRE2_SUBSTITUTE_GLOBAL) != 0)
|
if ((options & PCRE2_SUBSTITUTE_GLOBAL) != 0)
|
||||||
{
|
{
|
||||||
|
@ -130,24 +248,32 @@ if ((options & PCRE2_SUBSTITUTE_GLOBAL) != 0)
|
||||||
global = TRUE;
|
global = TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Find lengths of zero-terminated strings. */
|
if ((options & PCRE2_SUBSTITUTE_EXTENDED) != 0)
|
||||||
|
{
|
||||||
|
options &= ~PCRE2_SUBSTITUTE_EXTENDED;
|
||||||
|
extended = TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Find lengths of zero-terminated strings and the end of the replacement. */
|
||||||
|
|
||||||
if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
|
if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
|
||||||
if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement);
|
if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement);
|
||||||
|
repend = replacement + rlength;
|
||||||
|
|
||||||
/* Copy up to the start offset */
|
/* Copy up to the start offset */
|
||||||
|
|
||||||
if (start_offset > *blength) goto NOROOM;
|
if (start_offset > buff_length) goto NOROOM;
|
||||||
memcpy(buffer, subject, start_offset * (PCRE2_CODE_UNIT_WIDTH/8));
|
memcpy(buffer, subject, start_offset * (PCRE2_CODE_UNIT_WIDTH/8));
|
||||||
buff_offset = start_offset;
|
buff_offset = start_offset;
|
||||||
lengthleft = *blength - start_offset;
|
lengthleft = buff_length - start_offset;
|
||||||
|
|
||||||
/* Loop for global substituting. */
|
/* Loop for global substituting. */
|
||||||
|
|
||||||
subs = 0;
|
subs = 0;
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
PCRE2_SIZE i;
|
PCRE2_SPTR ptrstack[PTR_STACK_SIZE];
|
||||||
|
uint32_t ptrstackptr = 0;
|
||||||
|
|
||||||
rc = pcre2_match(code, subject, length, start_offset, options|goptions,
|
rc = pcre2_match(code, subject, length, start_offset, options|goptions,
|
||||||
match_data, mcontext);
|
match_data, mcontext);
|
||||||
|
@ -199,19 +325,56 @@ do
|
||||||
buff_offset += fraglength;
|
buff_offset += fraglength;
|
||||||
lengthleft -= fraglength;
|
lengthleft -= fraglength;
|
||||||
|
|
||||||
for (i = 0; i < rlength; i++)
|
/* Process the replacement string. Literal mode is set by \Q, but only in
|
||||||
|
extended mode when backslashes are being interpreted. In extended mode we
|
||||||
|
must handle nested substrings that are to be reprocessed. */
|
||||||
|
|
||||||
|
ptr = replacement;
|
||||||
|
for (;;)
|
||||||
{
|
{
|
||||||
if (replacement[i] == CHAR_DOLLAR_SIGN)
|
uint32_t ch;
|
||||||
|
|
||||||
|
/* If at the end of a nested substring, pop the stack. */
|
||||||
|
|
||||||
|
if (ptr >= repend)
|
||||||
|
{
|
||||||
|
if (ptrstackptr <= 0) break;
|
||||||
|
repend = ptrstack[--ptrstackptr];
|
||||||
|
ptr = ptrstack[--ptrstackptr];
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Handle the next character */
|
||||||
|
|
||||||
|
if (literal)
|
||||||
|
{
|
||||||
|
if (ptr[0] == CHAR_BACKSLASH && ptr < repend - 1 && ptr[1] == CHAR_E)
|
||||||
|
{
|
||||||
|
literal = FALSE;
|
||||||
|
ptr += 2;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
goto LOADLITERAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Not in literal mode. */
|
||||||
|
|
||||||
|
if (*ptr == CHAR_DOLLAR_SIGN)
|
||||||
{
|
{
|
||||||
int group, n;
|
int group, n;
|
||||||
|
uint32_t special = 0;
|
||||||
BOOL inparens;
|
BOOL inparens;
|
||||||
BOOL star;
|
BOOL star;
|
||||||
PCRE2_SIZE sublength;
|
PCRE2_SIZE sublength;
|
||||||
|
PCRE2_SPTR text1_start = NULL;
|
||||||
|
PCRE2_SPTR text1_end = NULL;
|
||||||
|
PCRE2_SPTR text2_start = NULL;
|
||||||
|
PCRE2_SPTR text2_end = NULL;
|
||||||
PCRE2_UCHAR next;
|
PCRE2_UCHAR next;
|
||||||
PCRE2_UCHAR name[33];
|
PCRE2_UCHAR name[33];
|
||||||
|
|
||||||
if (++i == rlength) goto BAD;
|
if (++ptr >= repend) goto BAD;
|
||||||
if ((next = replacement[i]) == CHAR_DOLLAR_SIGN) goto LITERAL;
|
if ((next = *ptr) == CHAR_DOLLAR_SIGN) goto LOADLITERAL;
|
||||||
|
|
||||||
group = -1;
|
group = -1;
|
||||||
n = 0;
|
n = 0;
|
||||||
|
@ -220,24 +383,24 @@ do
|
||||||
|
|
||||||
if (next == CHAR_LEFT_CURLY_BRACKET)
|
if (next == CHAR_LEFT_CURLY_BRACKET)
|
||||||
{
|
{
|
||||||
if (++i == rlength) goto BAD;
|
if (++ptr >= repend) goto BAD;
|
||||||
next = replacement[i];
|
next = *ptr;
|
||||||
inparens = TRUE;
|
inparens = TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (next == CHAR_ASTERISK)
|
if (next == CHAR_ASTERISK)
|
||||||
{
|
{
|
||||||
if (++i == rlength) goto BAD;
|
if (++ptr >= repend) goto BAD;
|
||||||
next = replacement[i];
|
next = *ptr;
|
||||||
star = TRUE;
|
star = TRUE;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!star && next >= CHAR_0 && next <= CHAR_9)
|
if (!star && next >= CHAR_0 && next <= CHAR_9)
|
||||||
{
|
{
|
||||||
group = next - CHAR_0;
|
group = next - CHAR_0;
|
||||||
while (++i < rlength)
|
while (++ptr < repend)
|
||||||
{
|
{
|
||||||
next = replacement[i];
|
next = *ptr;
|
||||||
if (next < CHAR_0 || next > CHAR_9) break;
|
if (next < CHAR_0 || next > CHAR_9) break;
|
||||||
group = group * 10 + next - CHAR_0;
|
group = group * 10 + next - CHAR_0;
|
||||||
}
|
}
|
||||||
|
@ -249,18 +412,53 @@ do
|
||||||
{
|
{
|
||||||
name[n++] = next;
|
name[n++] = next;
|
||||||
if (n > 32) goto BAD;
|
if (n > 32) goto BAD;
|
||||||
if (i == rlength) break;
|
if (ptr >= repend) break;
|
||||||
next = replacement[++i];
|
next = *(++ptr);
|
||||||
}
|
}
|
||||||
if (n == 0) goto BAD;
|
if (n == 0) goto BAD;
|
||||||
name[n] = 0;
|
name[n] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* In extended mode we recognize ${name:+set text:unset text} and
|
||||||
|
${name:-default text}. */
|
||||||
|
|
||||||
if (inparens)
|
if (inparens)
|
||||||
{
|
{
|
||||||
if (i == rlength || next != CHAR_RIGHT_CURLY_BRACKET) goto BAD;
|
|
||||||
|
if (extended && !star && ptr < repend - 2 && next == CHAR_COLON)
|
||||||
|
{
|
||||||
|
special = *(++ptr);
|
||||||
|
if (special != CHAR_PLUS && special != CHAR_MINUS)
|
||||||
|
{
|
||||||
|
rc = PCRE2_ERROR_BADSUBSTITUTION;
|
||||||
|
goto PTREXIT;
|
||||||
|
}
|
||||||
|
|
||||||
|
text1_start = ++ptr;
|
||||||
|
rc = find_text_end(code, &ptr, repend, special == CHAR_MINUS);
|
||||||
|
if (rc != 0) goto PTREXIT;
|
||||||
|
text1_end = ptr;
|
||||||
|
|
||||||
|
if (special == CHAR_PLUS && *ptr == CHAR_COLON)
|
||||||
|
{
|
||||||
|
text2_start = ++ptr;
|
||||||
|
rc = find_text_end(code, &ptr, repend, TRUE);
|
||||||
|
if (rc != 0) goto PTREXIT;
|
||||||
|
text2_end = ptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (ptr >= repend || *ptr != CHAR_RIGHT_CURLY_BRACKET)
|
||||||
|
{
|
||||||
|
rc = PCRE2_ERROR_REPMISSINGBRACE;
|
||||||
|
goto PTREXIT;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ptr++;
|
||||||
}
|
}
|
||||||
else i--; /* Last code unit of name/number */
|
|
||||||
|
|
||||||
/* Have found a syntactically correct group number or name, or
|
/* Have found a syntactically correct group number or name, or
|
||||||
*name. Only *MARK is currently recognized. */
|
*name. Only *MARK is currently recognized. */
|
||||||
|
@ -282,31 +480,242 @@ do
|
||||||
else goto BAD;
|
else goto BAD;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Substitute the contents of a group. */
|
/* Substitute the contents of a group. We don't use substring_copy
|
||||||
|
functions any more, in order to support case forcing. */
|
||||||
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
sublength = lengthleft;
|
PCRE2_SPTR subptr, subptrend;
|
||||||
if (group < 0)
|
|
||||||
rc = pcre2_substring_copy_byname(match_data, name,
|
/* Find a number for a named group. In case there are duplicate names,
|
||||||
buffer + buff_offset, &sublength);
|
search for the first one that is set. */
|
||||||
else
|
|
||||||
rc = pcre2_substring_copy_bynumber(match_data, group,
|
|
||||||
buffer + buff_offset, &sublength);
|
|
||||||
if (rc < 0) goto EXIT;
|
|
||||||
|
|
||||||
buff_offset += sublength;
|
if (group < 0)
|
||||||
lengthleft -= sublength;
|
{
|
||||||
|
PCRE2_SPTR first, last, entry;
|
||||||
|
rc = pcre2_substring_nametable_scan(code, name, &first, &last);
|
||||||
|
if (rc < 0) goto PTREXIT;
|
||||||
|
for (entry = first; entry <= last; entry += rc)
|
||||||
|
{
|
||||||
|
uint32_t ng = GET2(entry, 0);
|
||||||
|
if (ng < ovector_count)
|
||||||
|
{
|
||||||
|
if (group < 0) group = ng; /* First in ovector */
|
||||||
|
if (ovector[ng*2] != PCRE2_UNSET)
|
||||||
|
{
|
||||||
|
group = ng; /* First that is set */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If group is still negative, it means we did not find a group that
|
||||||
|
is in the ovector. Just set the first group. */
|
||||||
|
|
||||||
|
if (group < 0) group = GET2(first, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
rc = pcre2_substring_length_bynumber(match_data, group, &sublength);
|
||||||
|
if (rc < 0 && (special == 0 || rc != PCRE2_ERROR_UNSET)) goto PTREXIT;
|
||||||
|
|
||||||
|
/* If special is '+' we have a 'set' and possibly an 'unset' text,
|
||||||
|
both of which are reprocessed when used. If special is '-' we have a
|
||||||
|
default text for when the group is unset; it must be reprocessed. */
|
||||||
|
|
||||||
|
if (special != 0)
|
||||||
|
{
|
||||||
|
if (special == CHAR_MINUS)
|
||||||
|
{
|
||||||
|
if (rc == 0) goto LITERAL_SUBSTITUTE;
|
||||||
|
text2_start = text1_start;
|
||||||
|
text2_end = text1_end;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ptrstackptr >= PTR_STACK_SIZE) goto BAD;
|
||||||
|
ptrstack[ptrstackptr++] = ptr;
|
||||||
|
ptrstack[ptrstackptr++] = repend;
|
||||||
|
|
||||||
|
if (rc == 0)
|
||||||
|
{
|
||||||
|
ptr = text1_start;
|
||||||
|
repend = text1_end;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ptr = text2_start;
|
||||||
|
repend = text2_end;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Otherwise we have a literal substitution of a group's contents. */
|
||||||
|
|
||||||
|
LITERAL_SUBSTITUTE:
|
||||||
|
subptr = subject + ovector[group*2];
|
||||||
|
subptrend = subject + ovector[group*2 + 1];
|
||||||
|
|
||||||
|
/* Substitute a literal string, possibly forcing alphabetic case. */
|
||||||
|
|
||||||
|
while (subptr < subptrend)
|
||||||
|
{
|
||||||
|
GETCHARINCTEST(ch, subptr);
|
||||||
|
if (forcecase != 0)
|
||||||
|
{
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (utf)
|
||||||
|
{
|
||||||
|
uint32_t type = UCD_CHARTYPE(ch);
|
||||||
|
if (PRIV(ucp_gentype)[type] == ucp_L &&
|
||||||
|
type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
|
||||||
|
ch = UCD_OTHERCASE(ch);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
if (((code->tables + cbits_offset +
|
||||||
|
((forcecase > 0)? cbit_upper:cbit_lower)
|
||||||
|
)[ch/8] & (1 << (ch%8))) == 0)
|
||||||
|
ch = (code->tables + fcc_offset)[ch];
|
||||||
|
}
|
||||||
|
forcecase = forcecasereset;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (utf)
|
||||||
|
{
|
||||||
|
unsigned int chlen;
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
if (lengthleft < 6) goto NOROOM;
|
||||||
|
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||||
|
if (lengthleft < 2) goto NOROOM;
|
||||||
|
#else
|
||||||
|
if (lengthleft < 1) goto NOROOM;
|
||||||
|
#endif
|
||||||
|
chlen = PRIV(ord2utf)(ch, buffer + buff_offset);
|
||||||
|
buff_offset += chlen;
|
||||||
|
lengthleft -= chlen;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
if (lengthleft-- < 1) goto NOROOM;
|
||||||
|
buffer[buff_offset++] = ch;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Handle a literal code unit */
|
/* Handle an escape sequence in extended mode. We can use check_escape()
|
||||||
|
to process \Q, \E, \c, \o, \x and \ followed by non-alphanumerics, but
|
||||||
|
the case-forcing escapes are not supported in pcre2_compile() so must be
|
||||||
|
recognized here. */
|
||||||
|
|
||||||
else
|
else if (extended && *ptr == CHAR_BACKSLASH)
|
||||||
{
|
{
|
||||||
|
int errorcode = 0;
|
||||||
|
|
||||||
|
if (ptr < repend - 1) switch (ptr[1])
|
||||||
|
{
|
||||||
|
case CHAR_L:
|
||||||
|
forcecase = forcecasereset = -1;
|
||||||
|
ptr += 2;
|
||||||
|
continue;
|
||||||
|
|
||||||
|
case CHAR_l:
|
||||||
|
forcecase = -1;
|
||||||
|
forcecasereset = 0;
|
||||||
|
ptr += 2;
|
||||||
|
continue;
|
||||||
|
|
||||||
|
case CHAR_U:
|
||||||
|
forcecase = forcecasereset = 1;
|
||||||
|
ptr += 2;
|
||||||
|
continue;
|
||||||
|
|
||||||
|
case CHAR_u:
|
||||||
|
forcecase = 1;
|
||||||
|
forcecasereset = 0;
|
||||||
|
ptr += 2;
|
||||||
|
continue;
|
||||||
|
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode,
|
||||||
|
code->overall_options, FALSE, NULL);
|
||||||
|
if (errorcode != 0) goto BADESCAPE;
|
||||||
|
ptr++;
|
||||||
|
|
||||||
|
switch(rc)
|
||||||
|
{
|
||||||
|
case ESC_E:
|
||||||
|
forcecase = forcecasereset = 0;
|
||||||
|
continue;
|
||||||
|
|
||||||
|
case ESC_Q:
|
||||||
|
literal = TRUE;
|
||||||
|
continue;
|
||||||
|
|
||||||
|
case 0: /* Data character */
|
||||||
|
goto LITERAL;
|
||||||
|
|
||||||
|
default:
|
||||||
|
goto BADESCAPE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Handle a literal code unit */
|
||||||
|
|
||||||
|
else
|
||||||
|
{
|
||||||
|
LOADLITERAL:
|
||||||
|
GETCHARINCTEST(ch, ptr); /* Get character value, increment pointer */
|
||||||
|
|
||||||
LITERAL:
|
LITERAL:
|
||||||
if (lengthleft-- < 1) goto NOROOM;
|
if (forcecase != 0)
|
||||||
buffer[buff_offset++] = replacement[i];
|
{
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (utf)
|
||||||
|
{
|
||||||
|
uint32_t type = UCD_CHARTYPE(ch);
|
||||||
|
if (PRIV(ucp_gentype)[type] == ucp_L &&
|
||||||
|
type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
|
||||||
|
ch = UCD_OTHERCASE(ch);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
if (((code->tables + cbits_offset +
|
||||||
|
((forcecase > 0)? cbit_upper:cbit_lower)
|
||||||
|
)[ch/8] & (1 << (ch%8))) == 0)
|
||||||
|
ch = (code->tables + fcc_offset)[ch];
|
||||||
|
}
|
||||||
|
|
||||||
|
forcecase = forcecasereset;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (utf)
|
||||||
|
{
|
||||||
|
unsigned int chlen;
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
|
if (lengthleft < 6) goto NOROOM;
|
||||||
|
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||||
|
if (lengthleft < 2) goto NOROOM;
|
||||||
|
#else
|
||||||
|
if (lengthleft < 1) goto NOROOM;
|
||||||
|
#endif
|
||||||
|
chlen = PRIV(ord2utf)(ch, buffer + buff_offset);
|
||||||
|
buff_offset += chlen;
|
||||||
|
lengthleft -= chlen;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
if (lengthleft-- < 1) goto NOROOM;
|
||||||
|
buffer[buff_offset++] = ch;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -341,6 +750,13 @@ goto EXIT;
|
||||||
|
|
||||||
BAD:
|
BAD:
|
||||||
rc = PCRE2_ERROR_BADREPLACEMENT;
|
rc = PCRE2_ERROR_BADREPLACEMENT;
|
||||||
|
goto PTREXIT;
|
||||||
|
|
||||||
|
BADESCAPE:
|
||||||
|
rc = PCRE2_ERROR_BADREPESCAPE;
|
||||||
|
|
||||||
|
PTREXIT:
|
||||||
|
*blength = (PCRE2_SIZE)(ptr - replacement);
|
||||||
goto EXIT;
|
goto EXIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -182,13 +182,13 @@ void vms_setsymbol( char *, char *, int );
|
||||||
#define LOCALESIZE 32 /* Size of locale name */
|
#define LOCALESIZE 32 /* Size of locale name */
|
||||||
#define LOOPREPEAT 500000 /* Default loop count for timing */
|
#define LOOPREPEAT 500000 /* Default loop count for timing */
|
||||||
#define PATSTACKSIZE 20 /* Pattern stack for save/restore testing */
|
#define PATSTACKSIZE 20 /* Pattern stack for save/restore testing */
|
||||||
#define REPLACE_MODSIZE 96 /* Field for reading 8-bit replacement */
|
#define REPLACE_MODSIZE 100 /* Field for reading 8-bit replacement */
|
||||||
#define VERSION_SIZE 64 /* Size of buffer for the version strings */
|
#define VERSION_SIZE 64 /* Size of buffer for the version strings */
|
||||||
|
|
||||||
/* Make sure the buffer into which replacement strings are copied is big enough
|
/* Make sure the buffer into which replacement strings are copied is big enough
|
||||||
to hold them as 32-bit code units. */
|
to hold them as 32-bit code units. */
|
||||||
|
|
||||||
#define REPLACE_BUFFSIZE (4*REPLACE_MODSIZE)
|
#define REPLACE_BUFFSIZE 1024 /* This is a byte value */
|
||||||
|
|
||||||
/* Execution modes */
|
/* Execution modes */
|
||||||
|
|
||||||
|
@ -385,31 +385,32 @@ enum { MOD_CTC, /* Applies to a compile context */
|
||||||
/* Control bits. Some apply to compiling, some to matching, but some can be set
|
/* Control bits. Some apply to compiling, some to matching, but some can be set
|
||||||
either on a pattern or a data line, so they must all be distinct. */
|
either on a pattern or a data line, so they must all be distinct. */
|
||||||
|
|
||||||
#define CTL_AFTERTEXT 0x00000001u
|
#define CTL_AFTERTEXT 0x00000001u
|
||||||
#define CTL_ALLAFTERTEXT 0x00000002u
|
#define CTL_ALLAFTERTEXT 0x00000002u
|
||||||
#define CTL_ALLCAPTURES 0x00000004u
|
#define CTL_ALLCAPTURES 0x00000004u
|
||||||
#define CTL_ALLUSEDTEXT 0x00000008u
|
#define CTL_ALLUSEDTEXT 0x00000008u
|
||||||
#define CTL_ALTGLOBAL 0x00000010u
|
#define CTL_ALTGLOBAL 0x00000010u
|
||||||
#define CTL_BINCODE 0x00000020u
|
#define CTL_BINCODE 0x00000020u
|
||||||
#define CTL_CALLOUT_CAPTURE 0x00000040u
|
#define CTL_CALLOUT_CAPTURE 0x00000040u
|
||||||
#define CTL_CALLOUT_INFO 0x00000080u
|
#define CTL_CALLOUT_INFO 0x00000080u
|
||||||
#define CTL_CALLOUT_NONE 0x00000100u
|
#define CTL_CALLOUT_NONE 0x00000100u
|
||||||
#define CTL_DFA 0x00000200u
|
#define CTL_DFA 0x00000200u
|
||||||
#define CTL_FINDLIMITS 0x00000400u
|
#define CTL_FINDLIMITS 0x00000400u
|
||||||
#define CTL_FULLBINCODE 0x00000800u
|
#define CTL_FULLBINCODE 0x00000800u
|
||||||
#define CTL_GETALL 0x00001000u
|
#define CTL_GETALL 0x00001000u
|
||||||
#define CTL_GLOBAL 0x00002000u
|
#define CTL_GLOBAL 0x00002000u
|
||||||
#define CTL_HEXPAT 0x00004000u
|
#define CTL_HEXPAT 0x00004000u
|
||||||
#define CTL_INFO 0x00008000u
|
#define CTL_INFO 0x00008000u
|
||||||
#define CTL_JITFAST 0x00010000u
|
#define CTL_JITFAST 0x00010000u
|
||||||
#define CTL_JITVERIFY 0x00020000u
|
#define CTL_JITVERIFY 0x00020000u
|
||||||
#define CTL_MARK 0x00040000u
|
#define CTL_MARK 0x00040000u
|
||||||
#define CTL_MEMORY 0x00080000u
|
#define CTL_MEMORY 0x00080000u
|
||||||
#define CTL_NULLCONTEXT 0x00100000u
|
#define CTL_NULLCONTEXT 0x00100000u
|
||||||
#define CTL_POSIX 0x00200000u
|
#define CTL_POSIX 0x00200000u
|
||||||
#define CTL_PUSH 0x00400000u
|
#define CTL_PUSH 0x00400000u
|
||||||
#define CTL_STARTCHAR 0x00800000u
|
#define CTL_STARTCHAR 0x00800000u
|
||||||
#define CTL_ZERO_TERMINATE 0x01000000u
|
#define CTL_SUBSTITUTE_EXTENDED 0x01000000u
|
||||||
|
#define CTL_ZERO_TERMINATE 0x02000000u
|
||||||
|
|
||||||
#define CTL_BSR_SET 0x80000000u /* This is informational */
|
#define CTL_BSR_SET 0x80000000u /* This is informational */
|
||||||
#define CTL_NL_SET 0x40000000u /* This is informational */
|
#define CTL_NL_SET 0x40000000u /* This is informational */
|
||||||
|
@ -566,6 +567,7 @@ static modstruct modlist[] = {
|
||||||
{ "replace", MOD_PND, MOD_STR, REPLACE_MODSIZE, PO(replacement) },
|
{ "replace", MOD_PND, MOD_STR, REPLACE_MODSIZE, PO(replacement) },
|
||||||
{ "stackguard", MOD_PAT, MOD_INT, 0, PO(stackguard_test) },
|
{ "stackguard", MOD_PAT, MOD_INT, 0, PO(stackguard_test) },
|
||||||
{ "startchar", MOD_PND, MOD_CTL, CTL_STARTCHAR, PO(control) },
|
{ "startchar", MOD_PND, MOD_CTL, CTL_STARTCHAR, PO(control) },
|
||||||
|
{ "substitute_extended", MOD_PAT, MOD_CTL, CTL_SUBSTITUTE_EXTENDED, PO(control) },
|
||||||
{ "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) },
|
{ "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) },
|
||||||
{ "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) },
|
{ "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) },
|
||||||
{ "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) },
|
{ "ungreedy", MOD_PAT, MOD_OPT, PCRE2_UNGREEDY, PO(options) },
|
||||||
|
@ -3453,7 +3455,7 @@ Returns: nothing
|
||||||
static void
|
static void
|
||||||
show_controls(uint32_t controls, const char *before)
|
show_controls(uint32_t controls, const char *before)
|
||||||
{
|
{
|
||||||
fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||||
before,
|
before,
|
||||||
((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
|
((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
|
||||||
((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
|
((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
|
||||||
|
@ -3481,6 +3483,7 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||||
((controls & CTL_POSIX) != 0)? " posix" : "",
|
((controls & CTL_POSIX) != 0)? " posix" : "",
|
||||||
((controls & CTL_PUSH) != 0)? " push" : "",
|
((controls & CTL_PUSH) != 0)? " push" : "",
|
||||||
((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
|
((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
|
||||||
|
((controls & CTL_SUBSTITUTE_EXTENDED) != 0)? " substitute_extended" : "",
|
||||||
((controls & CTL_ZERO_TERMINATE) != 0)? " zero_terminate" : "");
|
((controls & CTL_ZERO_TERMINATE) != 0)? " zero_terminate" : "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5685,7 +5688,7 @@ if (dat_datctl.replacement[0] != 0)
|
||||||
uint8_t *pr;
|
uint8_t *pr;
|
||||||
uint8_t rbuffer[REPLACE_BUFFSIZE];
|
uint8_t rbuffer[REPLACE_BUFFSIZE];
|
||||||
uint8_t nbuffer[REPLACE_BUFFSIZE];
|
uint8_t nbuffer[REPLACE_BUFFSIZE];
|
||||||
uint32_t goption;
|
uint32_t xoptions;
|
||||||
PCRE2_SIZE rlen, nsize, erroroffset;
|
PCRE2_SIZE rlen, nsize, erroroffset;
|
||||||
BOOL badutf = FALSE;
|
BOOL badutf = FALSE;
|
||||||
|
|
||||||
|
@ -5702,8 +5705,11 @@ if (dat_datctl.replacement[0] != 0)
|
||||||
if (timeitm)
|
if (timeitm)
|
||||||
fprintf(outfile, "** Timing is not supported with replace: ignored\n");
|
fprintf(outfile, "** Timing is not supported with replace: ignored\n");
|
||||||
|
|
||||||
goption = ((dat_datctl.control & CTL_GLOBAL) == 0)? 0 :
|
xoptions = (((dat_datctl.control & CTL_GLOBAL) == 0)? 0 :
|
||||||
PCRE2_SUBSTITUTE_GLOBAL;
|
PCRE2_SUBSTITUTE_GLOBAL) |
|
||||||
|
(((pat_patctl.control & CTL_SUBSTITUTE_EXTENDED) == 0)? 0 :
|
||||||
|
PCRE2_SUBSTITUTE_EXTENDED);
|
||||||
|
|
||||||
SETCASTPTR(r, rbuffer); /* Sets r8, r16, or r32, as appropriate. */
|
SETCASTPTR(r, rbuffer); /* Sets r8, r16, or r32, as appropriate. */
|
||||||
pr = dat_datctl.replacement;
|
pr = dat_datctl.replacement;
|
||||||
|
|
||||||
|
@ -5790,12 +5796,15 @@ if (dat_datctl.replacement[0] != 0)
|
||||||
else
|
else
|
||||||
rlen = (CASTVAR(uint8_t *, r) - rbuffer)/code_unit_size;
|
rlen = (CASTVAR(uint8_t *, r) - rbuffer)/code_unit_size;
|
||||||
PCRE2_SUBSTITUTE(rc, compiled_code, pp, ulen, dat_datctl.offset,
|
PCRE2_SUBSTITUTE(rc, compiled_code, pp, ulen, dat_datctl.offset,
|
||||||
dat_datctl.options|goption, match_data, dat_context,
|
dat_datctl.options|xoptions, match_data, dat_context,
|
||||||
rbuffer, rlen, nbuffer, &nsize);
|
rbuffer, rlen, nbuffer, &nsize);
|
||||||
|
|
||||||
if (rc < 0)
|
if (rc < 0)
|
||||||
{
|
{
|
||||||
fprintf(outfile, "Failed: error %d: ", rc);
|
fprintf(outfile, "Failed: error %d", rc);
|
||||||
|
if (nsize != PCRE2_UNSET)
|
||||||
|
fprintf(outfile, " at offset %ld in replacement", nsize);
|
||||||
|
fprintf(outfile, ": ");
|
||||||
PCRE2_GET_ERROR_MESSAGE(nsize, rc, pbuffer);
|
PCRE2_GET_ERROR_MESSAGE(nsize, rc, pbuffer);
|
||||||
PCHARSV(CASTVAR(void *, pbuffer), 0, nsize, FALSE, outfile);
|
PCHARSV(CASTVAR(void *, pbuffer), 0, nsize, FALSE, outfile);
|
||||||
}
|
}
|
||||||
|
|
|
@ -92,4 +92,6 @@
|
||||||
|
|
||||||
"(?(?C)"
|
"(?(?C)"
|
||||||
|
|
||||||
|
/abcd/substitute_extended
|
||||||
|
|
||||||
# End of testdata/testinput18
|
# End of testdata/testinput18
|
||||||
|
|
|
@ -4539,4 +4539,55 @@ B)x/alt_verbnames,mark
|
||||||
abcd\=null_context,find_limits
|
abcd\=null_context,find_limits
|
||||||
abcd\=allusedtext,startchar
|
abcd\=allusedtext,startchar
|
||||||
|
|
||||||
|
/abcd/replace=w\rx\x82y\o{333}z(\Q12\$34$$\x34\E5$$),substitute_extended
|
||||||
|
abcd
|
||||||
|
|
||||||
|
/a(bc)(DE)/replace=a\u$1\U$1\E$1\l$2\L$2\Eab\Uab\LYZ\EDone,substitute_extended
|
||||||
|
abcDE
|
||||||
|
|
||||||
|
/abcd/replace=xy\kz,substitute_extended
|
||||||
|
abcd
|
||||||
|
|
||||||
|
/a(?:(b)|(c))/substitute_extended,replace=X${1:+1:-1}X${2:+2:-2}
|
||||||
|
ab
|
||||||
|
ac
|
||||||
|
ab\=replace=${1:+$1\:$1:$2}
|
||||||
|
ac\=replace=${1:+$1\:$1:$2}
|
||||||
|
|
||||||
|
/a(?:(b)|(c))/substitute_extended,replace=X${1:-1:-1}X${2:-2:-2}
|
||||||
|
ab
|
||||||
|
ac
|
||||||
|
|
||||||
|
/(a)/substitute_extended,replace=>${1:+\Q$1:{}$$\E+\U$1}<
|
||||||
|
a
|
||||||
|
|
||||||
|
/X(b)Y/substitute_extended
|
||||||
|
XbY\=replace=x${1:+$1\U$1}y
|
||||||
|
XbY\=replace=\Ux${1:+$1$1}y
|
||||||
|
|
||||||
|
/a/substitute_extended,replace=${*MARK:+a:b}
|
||||||
|
a
|
||||||
|
|
||||||
|
/(abcd)/replace=${1:+xy\kz},substitute_extended
|
||||||
|
abcd
|
||||||
|
|
||||||
|
/abcd/substitute_extended,replace=>$1<
|
||||||
|
abcd
|
||||||
|
|
||||||
|
/abcd/substitute_extended,replace=>xxx${xyz}<<<
|
||||||
|
abcd
|
||||||
|
|
||||||
|
/(?J)(?:(?<A>a)|(?<A>b))/replace=<$A>
|
||||||
|
[a]
|
||||||
|
[b]
|
||||||
|
\= Expect error
|
||||||
|
(a)\=ovector=1
|
||||||
|
|
||||||
|
/(a)|(b)/replace=<$1>
|
||||||
|
\= Expect error
|
||||||
|
b
|
||||||
|
|
||||||
|
/(aa)(BB)/substitute_extended,replace=\U$1\L$2\E$1..\U$1\l$2$1
|
||||||
|
aaBB
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
|
@ -1678,9 +1678,16 @@
|
||||||
/[\pS#moq]/
|
/[\pS#moq]/
|
||||||
=
|
=
|
||||||
|
|
||||||
# UTF tests
|
|
||||||
|
|
||||||
/(*:a\x{12345}b\t(d\)c)xxx/utf,alt_verbnames,mark
|
/(*:a\x{12345}b\t(d\)c)xxx/utf,alt_verbnames,mark
|
||||||
cxxxz
|
cxxxz
|
||||||
|
|
||||||
|
/abcd/utf,replace=x\x{824}y\o{3333}z(\Q12\$34$$\x34\E5$$),substitute_extended
|
||||||
|
abcd
|
||||||
|
|
||||||
|
/a(\x{e0}\x{101})(\x{c0}\x{102})/utf,replace=a\u$1\U$1\E$1\l$2\L$2\Eab\U\x{e0}\x{101}\L\x{d0}\x{160}\EDone,substitute_extended
|
||||||
|
a\x{e0}\x{101}\x{c0}\x{102}
|
||||||
|
|
||||||
|
/((?<digit>\d)|(?<letter>\p{L}))/g,substitute_extended,replace=<${digit:+digit; :not digit; }${letter:+letter:not a letter}>
|
||||||
|
ab12cde
|
||||||
|
|
||||||
# End of testinput5
|
# End of testinput5
|
||||||
|
|
|
@ -135,9 +135,12 @@ No match: POSIX code 17: match failed
|
||||||
0+ issippi
|
0+ issippi
|
||||||
|
|
||||||
/abc/\
|
/abc/\
|
||||||
Failed: POSIX code 9: bad escape sequence at offset 4
|
Failed: POSIX code 9: bad escape sequence at offset 3
|
||||||
|
|
||||||
"(?(?C)"
|
"(?(?C)"
|
||||||
Failed: POSIX code 3: pattern error at offset 2
|
Failed: POSIX code 3: pattern error at offset 2
|
||||||
|
|
||||||
|
/abcd/substitute_extended
|
||||||
|
** Ignored with POSIX interface: substitute_extended
|
||||||
|
|
||||||
# End of testdata/testinput18
|
# End of testdata/testinput18
|
||||||
|
|
|
@ -946,10 +946,10 @@ Failed: error 125 at offset 6: lookbehind assertion is not fixed length
|
||||||
Failed: error 104 at offset 7: numbers out of order in {} quantifier
|
Failed: error 104 at offset 7: numbers out of order in {} quantifier
|
||||||
|
|
||||||
/abc/\
|
/abc/\
|
||||||
Failed: error 101 at offset 4: \ at end of pattern
|
Failed: error 101 at offset 3: \ at end of pattern
|
||||||
|
|
||||||
/abc/\i
|
/abc/\i
|
||||||
Failed: error 101 at offset 4: \ at end of pattern
|
Failed: error 101 at offset 3: \ at end of pattern
|
||||||
|
|
||||||
/(a)bc(d)/I
|
/(a)bc(d)/I
|
||||||
Capturing subpattern count = 2
|
Capturing subpattern count = 2
|
||||||
|
@ -13546,27 +13546,27 @@ Failed: error 119 at offset 3: parentheses are too deeply nested
|
||||||
|
|
||||||
/abc/replace=a$++
|
/abc/replace=a$++
|
||||||
123abc
|
123abc
|
||||||
Failed: error -35: invalid replacement string
|
Failed: error -35 at offset 2 in replacement: invalid replacement string
|
||||||
|
|
||||||
/abc/replace=a$bad
|
/abc/replace=a$bad
|
||||||
123abc
|
123abc
|
||||||
Failed: error -49: unknown substring
|
Failed: error -49 at offset 5 in replacement: unknown substring
|
||||||
|
|
||||||
/abc/replace=a${A234567890123456789_123456789012}z
|
/abc/replace=a${A234567890123456789_123456789012}z
|
||||||
123abc
|
123abc
|
||||||
Failed: error -49: unknown substring
|
Failed: error -49 at offset 36 in replacement: unknown substring
|
||||||
|
|
||||||
/abc/replace=a${A23456789012345678901234567890123}z
|
/abc/replace=a${A23456789012345678901234567890123}z
|
||||||
123abc
|
123abc
|
||||||
Failed: error -35: invalid replacement string
|
Failed: error -35 at offset 35 in replacement: invalid replacement string
|
||||||
|
|
||||||
/abc/replace=a${bcd
|
/abc/replace=a${bcd
|
||||||
123abc
|
123abc
|
||||||
Failed: error -35: invalid replacement string
|
Failed: error -58 at offset 6 in replacement: expected closing curly bracket in replacement string
|
||||||
|
|
||||||
/abc/replace=a${b+d}z
|
/abc/replace=a${b+d}z
|
||||||
123abc
|
123abc
|
||||||
Failed: error -35: invalid replacement string
|
Failed: error -58 at offset 4 in replacement: expected closing curly bracket in replacement string
|
||||||
|
|
||||||
/abc/replace=[10]XYZ
|
/abc/replace=[10]XYZ
|
||||||
123abc123
|
123abc123
|
||||||
|
@ -13632,19 +13632,19 @@ Failed: error -34: bad option value
|
||||||
|
|
||||||
/(*:pear)apple/g,replace=${*MARKING}
|
/(*:pear)apple/g,replace=${*MARKING}
|
||||||
apple lemon blackberry
|
apple lemon blackberry
|
||||||
Failed: error -35: invalid replacement string
|
Failed: error -35 at offset 11 in replacement: invalid replacement string
|
||||||
|
|
||||||
/(*:pear)apple/g,replace=${*MARK-time
|
/(*:pear)apple/g,replace=${*MARK-time
|
||||||
apple lemon blackberry
|
apple lemon blackberry
|
||||||
Failed: error -35: invalid replacement string
|
Failed: error -58 at offset 7 in replacement: expected closing curly bracket in replacement string
|
||||||
|
|
||||||
/(*:pear)apple/g,replace=${*mark}
|
/(*:pear)apple/g,replace=${*mark}
|
||||||
apple lemon blackberry
|
apple lemon blackberry
|
||||||
Failed: error -35: invalid replacement string
|
Failed: error -35 at offset 8 in replacement: invalid replacement string
|
||||||
|
|
||||||
/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARKET>
|
/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=<$*MARKET>
|
||||||
apple lemon blackberry
|
apple lemon blackberry
|
||||||
Failed: error -35: invalid replacement string
|
Failed: error -35 at offset 9 in replacement: invalid replacement string
|
||||||
|
|
||||||
/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[22]${*MARK}
|
/(*:pear)apple|(*:orange)lemon|(*:strawberry)blackberry/g,replace=[22]${*MARK}
|
||||||
apple lemon blackberry
|
apple lemon blackberry
|
||||||
|
@ -14669,4 +14669,76 @@ Failed: error -56: offset limit set without PCRE2_USE_OFFSET_LIMIT
|
||||||
abcd\=allusedtext,startchar
|
abcd\=allusedtext,startchar
|
||||||
** Not allowed together: allusedtext startchar
|
** Not allowed together: allusedtext startchar
|
||||||
|
|
||||||
|
/abcd/replace=w\rx\x82y\o{333}z(\Q12\$34$$\x34\E5$$),substitute_extended
|
||||||
|
abcd
|
||||||
|
1: w\x0dx\x82y\xdbz(12\$34$$\x345$)
|
||||||
|
|
||||||
|
/a(bc)(DE)/replace=a\u$1\U$1\E$1\l$2\L$2\Eab\Uab\LYZ\EDone,substitute_extended
|
||||||
|
abcDE
|
||||||
|
1: aBcBCbcdEdeabAByzDone
|
||||||
|
|
||||||
|
/abcd/replace=xy\kz,substitute_extended
|
||||||
|
abcd
|
||||||
|
Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement string
|
||||||
|
|
||||||
|
/a(?:(b)|(c))/substitute_extended,replace=X${1:+1:-1}X${2:+2:-2}
|
||||||
|
ab
|
||||||
|
1: X1X-2
|
||||||
|
ac
|
||||||
|
1: X-1X2
|
||||||
|
ab\=replace=${1:+$1\:$1:$2}
|
||||||
|
1: b:b
|
||||||
|
ac\=replace=${1:+$1\:$1:$2}
|
||||||
|
1: c
|
||||||
|
|
||||||
|
/a(?:(b)|(c))/substitute_extended,replace=X${1:-1:-1}X${2:-2:-2}
|
||||||
|
ab
|
||||||
|
1: XbX2:-2
|
||||||
|
ac
|
||||||
|
1: X1:-1Xc
|
||||||
|
|
||||||
|
/(a)/substitute_extended,replace=>${1:+\Q$1:{}$$\E+\U$1}<
|
||||||
|
a
|
||||||
|
1: >$1:{}$$+A<
|
||||||
|
|
||||||
|
/X(b)Y/substitute_extended
|
||||||
|
XbY\=replace=x${1:+$1\U$1}y
|
||||||
|
1: xbBY
|
||||||
|
XbY\=replace=\Ux${1:+$1$1}y
|
||||||
|
1: XBBY
|
||||||
|
|
||||||
|
/a/substitute_extended,replace=${*MARK:+a:b}
|
||||||
|
a
|
||||||
|
Failed: error -58 at offset 7 in replacement: expected closing curly bracket in replacement string
|
||||||
|
|
||||||
|
/(abcd)/replace=${1:+xy\kz},substitute_extended
|
||||||
|
abcd
|
||||||
|
Failed: error -57 at offset 8 in replacement: bad escape sequence in replacement string
|
||||||
|
|
||||||
|
/abcd/substitute_extended,replace=>$1<
|
||||||
|
abcd
|
||||||
|
Failed: error -49 at offset 3 in replacement: unknown substring
|
||||||
|
|
||||||
|
/abcd/substitute_extended,replace=>xxx${xyz}<<<
|
||||||
|
abcd
|
||||||
|
Failed: error -49 at offset 10 in replacement: unknown substring
|
||||||
|
|
||||||
|
/(?J)(?:(?<A>a)|(?<A>b))/replace=<$A>
|
||||||
|
[a]
|
||||||
|
1: [<a>]
|
||||||
|
[b]
|
||||||
|
1: [<b>]
|
||||||
|
\= Expect error
|
||||||
|
(a)\=ovector=1
|
||||||
|
Failed: error -54 at offset 3 in replacement: requested value is not available
|
||||||
|
|
||||||
|
/(a)|(b)/replace=<$1>
|
||||||
|
\= Expect error
|
||||||
|
b
|
||||||
|
Failed: error -55 at offset 3 in replacement: requested value is not set
|
||||||
|
|
||||||
|
/(aa)(BB)/substitute_extended,replace=\U$1\L$2\E$1..\U$1\l$2$1
|
||||||
|
aaBB
|
||||||
|
1: AAbbaa..AAbBaa
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
|
@ -4026,11 +4026,21 @@ No match
|
||||||
=
|
=
|
||||||
0: =
|
0: =
|
||||||
|
|
||||||
# UTF tests
|
|
||||||
|
|
||||||
/(*:a\x{12345}b\t(d\)c)xxx/utf,alt_verbnames,mark
|
/(*:a\x{12345}b\t(d\)c)xxx/utf,alt_verbnames,mark
|
||||||
cxxxz
|
cxxxz
|
||||||
0: xxx
|
0: xxx
|
||||||
MK: a\x{12345}b\x{09}(d)c
|
MK: a\x{12345}b\x{09}(d)c
|
||||||
|
|
||||||
|
/abcd/utf,replace=x\x{824}y\o{3333}z(\Q12\$34$$\x34\E5$$),substitute_extended
|
||||||
|
abcd
|
||||||
|
1: x\x{824}y\x{6db}z(12\$34$$\x345$)
|
||||||
|
|
||||||
|
/a(\x{e0}\x{101})(\x{c0}\x{102})/utf,replace=a\u$1\U$1\E$1\l$2\L$2\Eab\U\x{e0}\x{101}\L\x{d0}\x{160}\EDone,substitute_extended
|
||||||
|
a\x{e0}\x{101}\x{c0}\x{102}
|
||||||
|
1: a\x{c0}\x{101}\x{c0}\x{100}\x{e0}\x{101}\x{e0}\x{102}\x{e0}\x{103}ab\x{c0}\x{100}\x{f0}\x{161}Done
|
||||||
|
|
||||||
|
/((?<digit>\d)|(?<letter>\p{L}))/g,substitute_extended,replace=<${digit:+digit; :not digit; }${letter:+letter:not a letter}>
|
||||||
|
ab12cde
|
||||||
|
7: <not digit; letter><not digit; letter><digit; not a letter><digit; not a letter><not digit; letter><not digit; letter><not digit; letter>
|
||||||
|
|
||||||
# End of testinput5
|
# End of testinput5
|
||||||
|
|
Loading…
Reference in New Issue