Implement PCRE2_SUBSTITUTE_LITERAL.
This commit is contained in:
parent
0a2033f0f7
commit
f3fd8b18cb
|
@ -22,6 +22,8 @@ when users are unaware of it, making some patterns appear not to be working as
|
|||
expected. Capture values of recursive back references in repeated groups are
|
||||
now correctly backtracked, so this unnecessary restriction has been removed.
|
||||
|
||||
5. Added PCRE2_SUBSTITUTE_LITERAL.
|
||||
|
||||
|
||||
Version 10.34 21-November-2019
|
||||
------------------------------
|
||||
|
|
|
@ -74,10 +74,15 @@ zero-terminated strings. The options are:
|
|||
PCRE2_UTF was set at compile time)
|
||||
PCRE2_SUBSTITUTE_EXTENDED Do extended replacement processing
|
||||
PCRE2_SUBSTITUTE_GLOBAL Replace all occurrences in the subject
|
||||
PCRE2_SUBSTITUTE_LITERAL The replacement string is literal
|
||||
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH If overflow, compute needed length
|
||||
PCRE2_SUBSTITUTE_UNKNOWN_UNSET Treat unknown group as unset
|
||||
PCRE2_SUBSTITUTE_UNSET_EMPTY Simple unset insert = empty string
|
||||
</pre>
|
||||
PCRE2_SUBSTITUTE_LITERAL overrides PCRE2_SUBSTITUTE_EXTENDED,
|
||||
PCRE2_SUBSTITUTE_UNKNOWN_UNSET, and PCRE2_SUBSTITUTE_UNSET_EMPTY.
|
||||
</P>
|
||||
<P>
|
||||
The function returns the number of substitutions, which may be zero if there
|
||||
were no matches. The result can be greater than one only when
|
||||
PCRE2_SUBSTITUTE_GLOBAL is set. In the event of an error, a negative error code
|
||||
|
|
|
@ -3349,11 +3349,12 @@ of giving an error return as soon as the buffer overflows. Note also that the
|
|||
length is in code units, not bytes.
|
||||
</P>
|
||||
<P>
|
||||
In the replacement string, which is interpreted as a UTF string in UTF mode,
|
||||
and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set, a
|
||||
dollar character is an escape character that can specify the insertion of
|
||||
characters from capture groups or names from (*MARK) or other control verbs
|
||||
in the pattern. The following forms are always recognized:
|
||||
The replacement string, which is interpreted as a UTF string in UTF mode,
|
||||
is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set. If the
|
||||
PCRE2_SUBSTITUTE_LITERAL option is set, it is not interpreted in any way. By
|
||||
default, however, a dollar character is an escape character that can specify
|
||||
the insertion of characters from capture groups or names from (*MARK) or other
|
||||
control verbs in the pattern. The following forms are always recognized:
|
||||
<pre>
|
||||
$$ insert a dollar character
|
||||
$<n> or ${<n>} insert the contents of group <n>
|
||||
|
@ -3381,6 +3382,12 @@ As well as the usual options for <b>pcre2_match()</b>, a number of additional
|
|||
options can be set in the <i>options</i> argument of <b>pcre2_substitute()</b>.
|
||||
</P>
|
||||
<P>
|
||||
As mentioned above, PCRE2_SUBSTITUTE_LITERAL causes the replacement string to
|
||||
be treated as a literal, with no interpretation. If this option is set,
|
||||
PCRE2_SUBSTITUTE_EXTENDED, PCRE2_SUBSTITUTE_UNKNOWN_UNSET, and
|
||||
PCRE2_SUBSTITUTE_UNSET_EMPTY are irrelevant and are ignored.
|
||||
</P>
|
||||
<P>
|
||||
PCRE2_SUBSTITUTE_GLOBAL causes the function to iterate over the subject string,
|
||||
replacing every matching substring. If this option is not set, only the first
|
||||
matching substring is replaced. The search for matches takes place in the
|
||||
|
@ -3869,7 +3876,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC42" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 02 September 2019
|
||||
Last updated: 26 December 2019
|
||||
<br>
|
||||
Copyright © 1997-2019 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -1063,8 +1063,9 @@ process.
|
|||
startchar show starting character when relevant
|
||||
substitute_callout use substitution callouts
|
||||
substitute_extended use PCRE2_SUBSTITUTE_EXTENDED
|
||||
substitute_skip=<n> skip substitution number n
|
||||
substitute_literal use PCRE2_SUBSTITUTE_LITERAL
|
||||
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||
substitute_skip=<n> skip substitution number n
|
||||
substitute_stop=<n> skip substitution number n and greater
|
||||
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||
substitute_unset_empty use PCRE2_SUBSTITUTE_UNSET_EMPTY
|
||||
|
@ -1233,8 +1234,9 @@ pattern.
|
|||
startoffset=<n> same as offset=<n>
|
||||
substitute_callout use substitution callouts
|
||||
substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED
|
||||
substitute_skip=<n> skip substitution number n
|
||||
substitute_literal use PCRE2_SUBSTITUTE_LITERAL
|
||||
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||
substitute_skip=<n> skip substitution number n
|
||||
substitute_stop=<n> skip substitution number n and greater
|
||||
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||
substitute_unset_empty use PCRE2_SUBSTITUTE_UNSET_EMPTY
|
||||
|
@ -1413,6 +1415,7 @@ for <b>pcre2_substitute()</b>:
|
|||
<pre>
|
||||
global PCRE2_SUBSTITUTE_GLOBAL
|
||||
substitute_extended PCRE2_SUBSTITUTE_EXTENDED
|
||||
substitute_literal PCRE2_SUBSTITUTE_LITERAL
|
||||
substitute_overflow_length PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||
substitute_unknown_unset PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||
substitute_unset_empty PCRE2_SUBSTITUTE_UNSET_EMPTY
|
||||
|
@ -2093,7 +2096,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 30 July 2019
|
||||
Last updated: 26 December 2019
|
||||
<br>
|
||||
Copyright © 1997-2019 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -3236,38 +3236,44 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
|
|||
ing an error return as soon as the buffer overflows. Note also that the
|
||||
length is in code units, not bytes.
|
||||
|
||||
In the replacement string, which is interpreted as a UTF string in UTF
|
||||
mode, and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK op-
|
||||
tion is set, a dollar character is an escape character that can specify
|
||||
the insertion of characters from capture groups or names from (*MARK)
|
||||
or other control verbs in the pattern. The following forms are always
|
||||
recognized:
|
||||
The replacement string, which is interpreted as a UTF string in UTF
|
||||
mode, is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option
|
||||
is set. If the PCRE2_SUBSTITUTE_LITERAL option is set, it is not inter-
|
||||
preted in any way. By default, however, a dollar character is an escape
|
||||
character that can specify the insertion of characters from capture
|
||||
groups or names from (*MARK) or other control verbs in the pattern. The
|
||||
following forms are always recognized:
|
||||
|
||||
$$ insert a dollar character
|
||||
$<n> or ${<n>} insert the contents of group <n>
|
||||
$*MARK or ${*MARK} insert a control verb name
|
||||
|
||||
Either a group number or a group name can be given for <n>. Curly
|
||||
brackets are required only if the following character would be inter-
|
||||
Either a group number or a group name can be given for <n>. Curly
|
||||
brackets are required only if the following character would be inter-
|
||||
preted as part of the number or name. The number may be zero to include
|
||||
the entire matched string. For example, if the pattern a(b)c is
|
||||
matched with "=abc=" and the replacement string "+$1$0$1+", the result
|
||||
the entire matched string. For example, if the pattern a(b)c is
|
||||
matched with "=abc=" and the replacement string "+$1$0$1+", the result
|
||||
is "=+babcb+=".
|
||||
|
||||
$*MARK inserts the name from the last encountered backtracking control
|
||||
verb on the matching path that has a name. (*MARK) must always include
|
||||
a name, but the other verbs need not. For example, in the case of
|
||||
$*MARK inserts the name from the last encountered backtracking control
|
||||
verb on the matching path that has a name. (*MARK) must always include
|
||||
a name, but the other verbs need not. For example, in the case of
|
||||
(*MARK:A)(*PRUNE) the name inserted is "A", but for (*MARK:A)(*PRUNE:B)
|
||||
the relevant name is "B". This facility can be used to perform simple
|
||||
the relevant name is "B". This facility can be used to perform simple
|
||||
simultaneous substitutions, as this pcre2test example shows:
|
||||
|
||||
/(*MARK:pear)apple|(*MARK:orange)lemon/g,replace=${*MARK}
|
||||
apple lemon
|
||||
2: pear orange
|
||||
|
||||
As well as the usual options for pcre2_match(), a number of additional
|
||||
As well as the usual options for pcre2_match(), a number of additional
|
||||
options can be set in the options argument of pcre2_substitute().
|
||||
|
||||
As mentioned above, PCRE2_SUBSTITUTE_LITERAL causes the replacement
|
||||
string to be treated as a literal, with no interpretation. If this op-
|
||||
tion is set, PCRE2_SUBSTITUTE_EXTENDED, PCRE2_SUBSTITUTE_UNKNOWN_UNSET,
|
||||
and PCRE2_SUBSTITUTE_UNSET_EMPTY are irrelevant and are ignored.
|
||||
|
||||
PCRE2_SUBSTITUTE_GLOBAL causes the function to iterate over the subject
|
||||
string, replacing every matching substring. If this option is not set,
|
||||
only the first matching substring is replaced. The search for matches
|
||||
|
@ -3721,7 +3727,7 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 02 September 2019
|
||||
Last updated: 26 December 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2_SUBSTITUTE 3 "04 April 2017" "PCRE2 10.30"
|
||||
.TH PCRE2_SUBSTITUTE 3 "26 December 2019" "PCRE2 10.35"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
@ -66,10 +66,14 @@ zero-terminated strings. The options are:
|
|||
PCRE2_UTF was set at compile time)
|
||||
PCRE2_SUBSTITUTE_EXTENDED Do extended replacement processing
|
||||
PCRE2_SUBSTITUTE_GLOBAL Replace all occurrences in the subject
|
||||
PCRE2_SUBSTITUTE_LITERAL The replacement string is literal
|
||||
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH If overflow, compute needed length
|
||||
PCRE2_SUBSTITUTE_UNKNOWN_UNSET Treat unknown group as unset
|
||||
PCRE2_SUBSTITUTE_UNSET_EMPTY Simple unset insert = empty string
|
||||
.sp
|
||||
PCRE2_SUBSTITUTE_LITERAL overrides PCRE2_SUBSTITUTE_EXTENDED,
|
||||
PCRE2_SUBSTITUTE_UNKNOWN_UNSET, and PCRE2_SUBSTITUTE_UNSET_EMPTY.
|
||||
.P
|
||||
The function returns the number of substitutions, which may be zero if there
|
||||
were no matches. The result can be greater than one only when
|
||||
PCRE2_SUBSTITUTE_GLOBAL is set. In the event of an error, a negative error code
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2API 3 "02 September 2019" "PCRE2 10.34"
|
||||
.TH PCRE2API 3 "26 December 2019" "PCRE2 10.35"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.sp
|
||||
|
@ -3362,11 +3362,12 @@ space for the trailing zero. Note that in order to compute the required length,
|
|||
of giving an error return as soon as the buffer overflows. Note also that the
|
||||
length is in code units, not bytes.
|
||||
.P
|
||||
In the replacement string, which is interpreted as a UTF string in UTF mode,
|
||||
and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set, a
|
||||
dollar character is an escape character that can specify the insertion of
|
||||
characters from capture groups or names from (*MARK) or other control verbs
|
||||
in the pattern. The following forms are always recognized:
|
||||
The replacement string, which is interpreted as a UTF string in UTF mode,
|
||||
is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set. If the
|
||||
PCRE2_SUBSTITUTE_LITERAL option is set, it is not interpreted in any way. By
|
||||
default, however, a dollar character is an escape character that can specify
|
||||
the insertion of characters from capture groups or names from (*MARK) or other
|
||||
control verbs in the pattern. The following forms are always recognized:
|
||||
.sp
|
||||
$$ insert a dollar character
|
||||
$<n> or ${<n>} insert the contents of group <n>
|
||||
|
@ -3392,6 +3393,11 @@ facility can be used to perform simple simultaneous substitutions, as this
|
|||
As well as the usual options for \fBpcre2_match()\fP, a number of additional
|
||||
options can be set in the \fIoptions\fP argument of \fBpcre2_substitute()\fP.
|
||||
.P
|
||||
As mentioned above, PCRE2_SUBSTITUTE_LITERAL causes the replacement string to
|
||||
be treated as a literal, with no interpretation. If this option is set,
|
||||
PCRE2_SUBSTITUTE_EXTENDED, PCRE2_SUBSTITUTE_UNKNOWN_UNSET, and
|
||||
PCRE2_SUBSTITUTE_UNSET_EMPTY are irrelevant and are ignored.
|
||||
.P
|
||||
PCRE2_SUBSTITUTE_GLOBAL causes the function to iterate over the subject string,
|
||||
replacing every matching substring. If this option is not set, only the first
|
||||
matching substring is replaced. The search for matches takes place in the
|
||||
|
@ -3878,6 +3884,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 02 September 2019
|
||||
Last updated: 26 December 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2TEST 1 "30 July 2019" "PCRE 10.34"
|
||||
.TH PCRE2TEST 1 "26 December 2019" "PCRE 10.35"
|
||||
.SH NAME
|
||||
pcre2test - a program for testing Perl-compatible regular expressions.
|
||||
.SH SYNOPSIS
|
||||
|
@ -1024,8 +1024,9 @@ process.
|
|||
startchar show starting character when relevant
|
||||
substitute_callout use substitution callouts
|
||||
substitute_extended use PCRE2_SUBSTITUTE_EXTENDED
|
||||
substitute_skip=<n> skip substitution number n
|
||||
substitute_literal use PCRE2_SUBSTITUTE_LITERAL
|
||||
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||
substitute_skip=<n> skip substitution number n
|
||||
substitute_stop=<n> skip substitution number n and greater
|
||||
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||
substitute_unset_empty use PCRE2_SUBSTITUTE_UNSET_EMPTY
|
||||
|
@ -1201,8 +1202,9 @@ pattern.
|
|||
startoffset=<n> same as offset=<n>
|
||||
substitute_callout use substitution callouts
|
||||
substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED
|
||||
substitute_skip=<n> skip substitution number n
|
||||
substitute_literal use PCRE2_SUBSTITUTE_LITERAL
|
||||
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||
substitute_skip=<n> skip substitution number n
|
||||
substitute_stop=<n> skip substitution number n and greater
|
||||
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||
substitute_unset_empty use PCRE2_SUBSTITUTE_UNSET_EMPTY
|
||||
|
@ -1381,6 +1383,7 @@ for \fBpcre2_substitute()\fP:
|
|||
.sp
|
||||
global PCRE2_SUBSTITUTE_GLOBAL
|
||||
substitute_extended PCRE2_SUBSTITUTE_EXTENDED
|
||||
substitute_literal PCRE2_SUBSTITUTE_LITERAL
|
||||
substitute_overflow_length PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||
substitute_unknown_unset PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||
substitute_unset_empty PCRE2_SUBSTITUTE_UNSET_EMPTY
|
||||
|
@ -2073,6 +2076,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 30 July 2019
|
||||
Last updated: 26 December 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -949,8 +949,9 @@ PATTERN MODIFIERS
|
|||
startchar show starting character when relevant
|
||||
substitute_callout use substitution callouts
|
||||
substitute_extended use PCRE2_SUBSTITUTE_EXTENDED
|
||||
substitute_skip=<n> skip substitution number n
|
||||
substitute_literal use PCRE2_SUBSTITUTE_LITERAL
|
||||
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||
substitute_skip=<n> skip substitution number n
|
||||
substitute_stop=<n> skip substitution number n and greater
|
||||
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||
substitute_unset_empty use PCRE2_SUBSTITUTE_UNSET_EMPTY
|
||||
|
@ -1103,8 +1104,9 @@ SUBJECT MODIFIERS
|
|||
startoffset=<n> same as offset=<n>
|
||||
substitute_callout use substitution callouts
|
||||
substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED
|
||||
substitute_skip=<n> skip substitution number n
|
||||
substitute_literal use PCRE2_SUBSTITUTE_LITERAL
|
||||
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||
substitute_skip=<n> skip substitution number n
|
||||
substitute_stop=<n> skip substitution number n and greater
|
||||
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||
substitute_unset_empty use PCRE2_SUBSTITUTE_UNSET_EMPTY
|
||||
|
@ -1265,6 +1267,7 @@ SUBJECT MODIFIERS
|
|||
|
||||
global PCRE2_SUBSTITUTE_GLOBAL
|
||||
substitute_extended PCRE2_SUBSTITUTE_EXTENDED
|
||||
substitute_literal PCRE2_SUBSTITUTE_LITERAL
|
||||
substitute_overflow_length PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||
substitute_unknown_unset PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||
substitute_unset_empty PCRE2_SUBSTITUTE_UNSET_EMPTY
|
||||
|
@ -1902,5 +1905,5 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 30 July 2019
|
||||
Last updated: 26 December 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
|
|
|
@ -181,6 +181,7 @@ pcre2_jit_match() ignores the latter since it bypasses all sanity checks). */
|
|||
#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u /* pcre2_substitute() only */
|
||||
#define PCRE2_NO_JIT 0x00002000u /* Not for pcre2_dfa_match() */
|
||||
#define PCRE2_COPY_MATCHED_SUBJECT 0x00004000u
|
||||
#define PCRE2_SUBSTITUTE_LITERAL 0x00008000u /* pcre2_substitute() only */
|
||||
|
||||
/* Options for pcre2_pattern_convert(). */
|
||||
|
||||
|
|
|
@ -49,8 +49,8 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define SUBSTITUTE_OPTIONS \
|
||||
(PCRE2_SUBSTITUTE_EXTENDED|PCRE2_SUBSTITUTE_GLOBAL| \
|
||||
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|PCRE2_SUBSTITUTE_UNKNOWN_UNSET| \
|
||||
PCRE2_SUBSTITUTE_UNSET_EMPTY)
|
||||
PCRE2_SUBSTITUTE_LITERAL|PCRE2_SUBSTITUTE_OVERFLOW_LENGTH| \
|
||||
PCRE2_SUBSTITUTE_UNKNOWN_UNSET|PCRE2_SUBSTITUTE_UNSET_EMPTY)
|
||||
|
||||
|
||||
|
||||
|
@ -227,7 +227,7 @@ uint32_t ovector_count;
|
|||
uint32_t goptions = 0;
|
||||
uint32_t suboptions;
|
||||
BOOL match_data_created = FALSE;
|
||||
BOOL literal = FALSE;
|
||||
BOOL escaped_literal = FALSE;
|
||||
BOOL overflowed = FALSE;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
BOOL utf = (code->overall_options & PCRE2_UTF) != 0;
|
||||
|
@ -421,12 +421,21 @@ do
|
|||
scb.output_offsets[0] = buff_offset;
|
||||
scb.oveccount = rc;
|
||||
|
||||
/* Process the replacement string. Literal mode is set by \Q, but only in
|
||||
extended mode when backslashes are being interpreted. In extended mode we
|
||||
must handle nested substrings that are to be reprocessed. */
|
||||
|
||||
/* Process the replacement string. If the entire replacement is literal, just
|
||||
copy it with length check. */
|
||||
|
||||
ptr = replacement;
|
||||
for (;;)
|
||||
if ((suboptions & PCRE2_SUBSTITUTE_LITERAL) != 0)
|
||||
{
|
||||
CHECKMEMCPY(ptr, rlength);
|
||||
}
|
||||
|
||||
/* Within a non-literal replacement, which must be scanned character by
|
||||
character, local literal mode can be set by \Q, but only in extended mode
|
||||
when backslashes are being interpreted. In extended mode we must handle
|
||||
nested substrings that are to be reprocessed. */
|
||||
|
||||
else for (;;)
|
||||
{
|
||||
uint32_t ch;
|
||||
unsigned int chlen;
|
||||
|
@ -443,11 +452,11 @@ do
|
|||
|
||||
/* Handle the next character */
|
||||
|
||||
if (literal)
|
||||
if (escaped_literal)
|
||||
{
|
||||
if (ptr[0] == CHAR_BACKSLASH && ptr < repend - 1 && ptr[1] == CHAR_E)
|
||||
{
|
||||
literal = FALSE;
|
||||
escaped_literal = FALSE;
|
||||
ptr += 2;
|
||||
continue;
|
||||
}
|
||||
|
@ -784,7 +793,7 @@ do
|
|||
continue;
|
||||
|
||||
case ESC_Q:
|
||||
literal = TRUE;
|
||||
escaped_literal = TRUE;
|
||||
continue;
|
||||
|
||||
case 0: /* Data character */
|
||||
|
|
|
@ -502,13 +502,14 @@ so many of them that they are split into two fields. */
|
|||
|
||||
#define CTL2_SUBSTITUTE_CALLOUT 0x00000001u
|
||||
#define CTL2_SUBSTITUTE_EXTENDED 0x00000002u
|
||||
#define CTL2_SUBSTITUTE_OVERFLOW_LENGTH 0x00000004u
|
||||
#define CTL2_SUBSTITUTE_UNKNOWN_UNSET 0x00000008u
|
||||
#define CTL2_SUBSTITUTE_UNSET_EMPTY 0x00000010u
|
||||
#define CTL2_SUBJECT_LITERAL 0x00000020u
|
||||
#define CTL2_CALLOUT_NO_WHERE 0x00000040u
|
||||
#define CTL2_CALLOUT_EXTRA 0x00000080u
|
||||
#define CTL2_ALLVECTOR 0x00000100u
|
||||
#define CTL2_SUBSTITUTE_LITERAL 0x00000004u
|
||||
#define CTL2_SUBSTITUTE_OVERFLOW_LENGTH 0x00000008u
|
||||
#define CTL2_SUBSTITUTE_UNKNOWN_UNSET 0x00000010u
|
||||
#define CTL2_SUBSTITUTE_UNSET_EMPTY 0x00000020u
|
||||
#define CTL2_SUBJECT_LITERAL 0x00000040u
|
||||
#define CTL2_CALLOUT_NO_WHERE 0x00000080u
|
||||
#define CTL2_CALLOUT_EXTRA 0x00000100u
|
||||
#define CTL2_ALLVECTOR 0x00000200u
|
||||
|
||||
#define CTL2_NL_SET 0x40000000u /* Informational */
|
||||
#define CTL2_BSR_SET 0x80000000u /* Informational */
|
||||
|
@ -530,6 +531,7 @@ different things in the two cases. */
|
|||
|
||||
#define CTL2_ALLPD (CTL2_SUBSTITUTE_CALLOUT|\
|
||||
CTL2_SUBSTITUTE_EXTENDED|\
|
||||
CTL2_SUBSTITUTE_LITERAL|\
|
||||
CTL2_SUBSTITUTE_OVERFLOW_LENGTH|\
|
||||
CTL2_SUBSTITUTE_UNKNOWN_UNSET|\
|
||||
CTL2_SUBSTITUTE_UNSET_EMPTY|\
|
||||
|
@ -718,6 +720,7 @@ static modstruct modlist[] = {
|
|||
{ "subject_literal", MOD_PATP, MOD_CTL, CTL2_SUBJECT_LITERAL, PO(control2) },
|
||||
{ "substitute_callout", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_CALLOUT, PO(control2) },
|
||||
{ "substitute_extended", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_EXTENDED, PO(control2) },
|
||||
{ "substitute_literal", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_LITERAL, PO(control2) },
|
||||
{ "substitute_overflow_length", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_OVERFLOW_LENGTH, PO(control2) },
|
||||
{ "substitute_skip", MOD_PND, MOD_INT, 0, PO(substitute_skip) },
|
||||
{ "substitute_stop", MOD_PND, MOD_INT, 0, PO(substitute_stop) },
|
||||
|
@ -4085,7 +4088,7 @@ Returns: nothing
|
|||
static void
|
||||
show_controls(uint32_t controls, uint32_t controls2, const char *before)
|
||||
{
|
||||
fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
before,
|
||||
((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
|
||||
((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
|
||||
|
@ -4123,6 +4126,7 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s
|
|||
((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
|
||||
((controls2 & CTL2_SUBSTITUTE_CALLOUT) != 0)? " substitute_callout" : "",
|
||||
((controls2 & CTL2_SUBSTITUTE_EXTENDED) != 0)? " substitute_extended" : "",
|
||||
((controls2 & CTL2_SUBSTITUTE_LITERAL) != 0)? " substitute_literal" : "",
|
||||
((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "",
|
||||
((controls2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) != 0)? " substitute_unknown_unset" : "",
|
||||
((controls2 & CTL2_SUBSTITUTE_UNSET_EMPTY) != 0)? " substitute_unset_empty" : "",
|
||||
|
@ -7256,13 +7260,15 @@ if (dat_datctl.replacement[0] != 0)
|
|||
PCRE2_SUBSTITUTE_GLOBAL) |
|
||||
(((dat_datctl.control2 & CTL2_SUBSTITUTE_EXTENDED) == 0)? 0 :
|
||||
PCRE2_SUBSTITUTE_EXTENDED) |
|
||||
(((dat_datctl.control2 & CTL2_SUBSTITUTE_LITERAL) == 0)? 0 :
|
||||
PCRE2_SUBSTITUTE_LITERAL) |
|
||||
(((dat_datctl.control2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) == 0)? 0 :
|
||||
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) |
|
||||
(((dat_datctl.control2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) == 0)? 0 :
|
||||
PCRE2_SUBSTITUTE_UNKNOWN_UNSET) |
|
||||
(((dat_datctl.control2 & CTL2_SUBSTITUTE_UNSET_EMPTY) == 0)? 0 :
|
||||
PCRE2_SUBSTITUTE_UNSET_EMPTY);
|
||||
|
||||
|
||||
SETCASTPTR(r, rbuffer); /* Sets r8, r16, or r32, as appropriate. */
|
||||
pr = dat_datctl.replacement;
|
||||
|
||||
|
|
|
@ -4583,6 +4583,12 @@ B)x/alt_verbnames,mark
|
|||
/abcd/replace=w\rx\x82y\o{333}z(\Q12\$34$$\x34\E5$$),substitute_extended
|
||||
abcd
|
||||
|
||||
/abcd/replace=w\rx\x82y\o{333}z(\Q12\$34$$\x34\E5$$),substitute_extended,substitute_literal
|
||||
>>abcd<<
|
||||
|
||||
/abcd/g,replace=\$1$2\,substitute_literal
|
||||
XabcdYabcdZ
|
||||
|
||||
/a(bc)(DE)/replace=a\u$1\U$1\E$1\l$2\L$2\Eab\Uab\LYZ\EDone,substitute_extended
|
||||
abcDE
|
||||
|
||||
|
@ -4594,6 +4600,7 @@ B)x/alt_verbnames,mark
|
|||
ac
|
||||
ab\=replace=${1:+$1\:$1:$2}
|
||||
ac\=replace=${1:+$1\:$1:$2}
|
||||
>>ac<<\=replace=${1:+$1\:$1:$2},substitute_literal
|
||||
|
||||
/a(?:(b)|(c))/substitute_extended,replace=X${1:-1:-1}X${2:-2:-2}
|
||||
ab
|
||||
|
|
|
@ -14778,6 +14778,14 @@ No match
|
|||
abcd
|
||||
1: w\x0dx\x82y\xdbz(12\$34$$\x345$)
|
||||
|
||||
/abcd/replace=w\rx\x82y\o{333}z(\Q12\$34$$\x34\E5$$),substitute_extended,substitute_literal
|
||||
>>abcd<<
|
||||
1: >>w\rx\x82y\o{333}z(\Q12\$34$$\x34\E5$$)<<
|
||||
|
||||
/abcd/g,replace=\$1$2\,substitute_literal
|
||||
XabcdYabcdZ
|
||||
2: X\$1$2\Y\$1$2\Z
|
||||
|
||||
/a(bc)(DE)/replace=a\u$1\U$1\E$1\l$2\L$2\Eab\Uab\LYZ\EDone,substitute_extended
|
||||
abcDE
|
||||
1: aBcBCbcdEdeabAByzDone
|
||||
|
@ -14795,6 +14803,8 @@ Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement
|
|||
1: b:b
|
||||
ac\=replace=${1:+$1\:$1:$2}
|
||||
1: c
|
||||
>>ac<<\=replace=${1:+$1\:$1:$2},substitute_literal
|
||||
1: >>${1:+$1\:$1:$2}<<
|
||||
|
||||
/a(?:(b)|(c))/substitute_extended,replace=X${1:-1:-1}X${2:-2:-2}
|
||||
ab
|
||||
|
|
Loading…
Reference in New Issue