Implement callouts from pcre2_substitute().
This commit is contained in:
parent
80adf9d165
commit
a69267246f
|
@ -12,6 +12,8 @@ partial matches.
|
||||||
2. Fix subject buffer overread in JIT when UTF is disabled and \X or \R has
|
2. Fix subject buffer overread in JIT when UTF is disabled and \X or \R has
|
||||||
a greater than 1 fixed quantifier. This issue was found by Yunho Kim.
|
a greater than 1 fixed quantifier. This issue was found by Yunho Kim.
|
||||||
|
|
||||||
|
3. Added support for callouts from pcre2_substitute().
|
||||||
|
|
||||||
|
|
||||||
Version 10.32 10-September-2018
|
Version 10.32 10-September-2018
|
||||||
-------------------------------
|
-------------------------------
|
||||||
|
|
|
@ -85,6 +85,7 @@ dist_html_DATA = \
|
||||||
doc/html/pcre2_set_parens_nest_limit.html \
|
doc/html/pcre2_set_parens_nest_limit.html \
|
||||||
doc/html/pcre2_set_recursion_limit.html \
|
doc/html/pcre2_set_recursion_limit.html \
|
||||||
doc/html/pcre2_set_recursion_memory_management.html \
|
doc/html/pcre2_set_recursion_memory_management.html \
|
||||||
|
doc/html/pcre2_set_substitute_callout.html \
|
||||||
doc/html/pcre2_substitute.html \
|
doc/html/pcre2_substitute.html \
|
||||||
doc/html/pcre2_substring_copy_byname.html \
|
doc/html/pcre2_substring_copy_byname.html \
|
||||||
doc/html/pcre2_substring_copy_bynumber.html \
|
doc/html/pcre2_substring_copy_bynumber.html \
|
||||||
|
@ -178,6 +179,7 @@ dist_man_MANS = \
|
||||||
doc/pcre2_set_parens_nest_limit.3 \
|
doc/pcre2_set_parens_nest_limit.3 \
|
||||||
doc/pcre2_set_recursion_limit.3 \
|
doc/pcre2_set_recursion_limit.3 \
|
||||||
doc/pcre2_set_recursion_memory_management.3 \
|
doc/pcre2_set_recursion_memory_management.3 \
|
||||||
|
doc/pcre2_set_substitute_callout.3 \
|
||||||
doc/pcre2_substitute.3 \
|
doc/pcre2_substitute.3 \
|
||||||
doc/pcre2_substring_copy_byname.3 \
|
doc/pcre2_substring_copy_byname.3 \
|
||||||
doc/pcre2_substring_copy_bynumber.3 \
|
doc/pcre2_substring_copy_bynumber.3 \
|
||||||
|
|
|
@ -162,7 +162,7 @@ listing), and the short pages for individual functions, are concatenated in
|
||||||
pcre2-config show PCRE2 installation configuration information
|
pcre2-config show PCRE2 installation configuration information
|
||||||
pcre2api details of PCRE2's native C API
|
pcre2api details of PCRE2's native C API
|
||||||
pcre2build building PCRE2
|
pcre2build building PCRE2
|
||||||
pcre2callout details of the callout feature
|
pcre2callout details of the pattern callout feature
|
||||||
pcre2compat discussion of Perl compatibility
|
pcre2compat discussion of Perl compatibility
|
||||||
pcre2convert details of pattern conversion functions
|
pcre2convert details of pattern conversion functions
|
||||||
pcre2demo a demonstration C program that uses PCRE2
|
pcre2demo a demonstration C program that uses PCRE2
|
||||||
|
@ -198,7 +198,7 @@ use my two initials, followed by the two digits 10, at the domain cam.ac.uk.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC5" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC5" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 11 July 2018
|
Last updated: 17 September 2018
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2018 University of Cambridge.
|
Copyright © 1997-2018 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -0,0 +1,43 @@
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>pcre2_set_substitute_callout specification</title>
|
||||||
|
</head>
|
||||||
|
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||||
|
<h1>pcre2_set_substitute_callout man page</h1>
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
This page is part of the PCRE2 HTML documentation. It was generated
|
||||||
|
automatically from the original man page. If there is any nonsense in it,
|
||||||
|
please consult the man page, in case the conversion went wrong.
|
||||||
|
<br>
|
||||||
|
<br><b>
|
||||||
|
SYNOPSIS
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
<b>#include <pcre2.h></b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre2_set_substitute_callout(pcre2_match_context *<i>mcontext</i>,</b>
|
||||||
|
<b> void (*<i>callout_function</i>)(pcre2_substitute_callout_block *),</b>
|
||||||
|
<b> void *<i>callout_data</i>);</b>
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
DESCRIPTION
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
This function sets the substitute callout fields in a match context (the first
|
||||||
|
argument). The second argument specifies a callout function, and the third
|
||||||
|
argument is an opaque data item that is passed to it. The result of this
|
||||||
|
function is always zero.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
There is a complete description of the PCRE2 native API in the
|
||||||
|
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||||
|
page and a description of the POSIX API in the
|
||||||
|
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||||
|
page.
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||||
|
</p>
|
|
@ -182,6 +182,11 @@ document for an overview of all the PCRE2 documentation.
|
||||||
<b> void *<i>callout_data</i>);</b>
|
<b> void *<i>callout_data</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
|
<b>int pcre2_set_substitute_callout(pcre2_match_context *<i>mcontext</i>,</b>
|
||||||
|
<b> void (*<i>callout_function</i>)(pcre2_substitute_callout_block *, void *),</b>
|
||||||
|
<b> void *<i>callout_data</i>);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
<b>int pcre2_set_offset_limit(pcre2_match_context *<i>mcontext</i>,</b>
|
<b>int pcre2_set_offset_limit(pcre2_match_context *<i>mcontext</i>,</b>
|
||||||
<b> PCRE2_SIZE <i>value</i>);</b>
|
<b> PCRE2_SIZE <i>value</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
|
@ -912,12 +917,23 @@ PCRE2_ERROR_BADDATA if invalid data is detected.
|
||||||
<b> void *<i>callout_data</i>);</b>
|
<b> void *<i>callout_data</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
This sets up a "callout" function for PCRE2 to call at specified points
|
This sets up a callout function for PCRE2 to call at specified points
|
||||||
during a matching operation. Details are given in the
|
during a matching operation. Details are given in the
|
||||||
<a href="pcre2callout.html"><b>pcre2callout</b></a>
|
<a href="pcre2callout.html"><b>pcre2callout</b></a>
|
||||||
documentation.
|
documentation.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
|
<b>int pcre2_set_substitute_callout(pcre2_match_context *<i>mcontext</i>,</b>
|
||||||
|
<b> void (*<i>callout_function</i>)(pcre2_substitute_callout_block *, void *),</b>
|
||||||
|
<b> void *<i>callout_data</i>);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
This sets up a callout function for PCRE2 to call after each substitution
|
||||||
|
made by <b>pcre2_substitute()</b>. Details are given in the section entitled
|
||||||
|
"Creating a new string with substitutions"
|
||||||
|
<a href="#substitutions">below.</a>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
<b>int pcre2_set_offset_limit(pcre2_match_context *<i>mcontext</i>,</b>
|
<b>int pcre2_set_offset_limit(pcre2_match_context *<i>mcontext</i>,</b>
|
||||||
<b> PCRE2_SIZE <i>value</i>);</b>
|
<b> PCRE2_SIZE <i>value</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
|
@ -3163,26 +3179,30 @@ page, you cannot use names to distinguish the different subpatterns, because
|
||||||
names are not included in the compiled code. The matching process uses only
|
names are not included in the compiled code. The matching process uses only
|
||||||
numbers. For this reason, the use of different names for subpatterns of the
|
numbers. For this reason, the use of different names for subpatterns of the
|
||||||
same number causes an error at compile time.
|
same number causes an error at compile time.
|
||||||
</P>
|
<a name="substitutions"></a></P>
|
||||||
<br><a name="SEC36" href="#TOC1">CREATING A NEW STRING WITH SUBSTITUTIONS</a><br>
|
<br><a name="SEC36" href="#TOC1">CREATING A NEW STRING WITH SUBSTITUTIONS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre2_substitute(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
<b>int pcre2_substitute(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
||||||
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
||||||
<b> uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
|
<b> uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
|
||||||
<b> pcre2_match_context *<i>mcontext</i>, PCRE2_SPTR <i>replacement</i>,</b>
|
<b> pcre2_match_context *<i>mcontext</i>, PCRE2_SPTR <i>replacement</i>,</b>
|
||||||
<b> PCRE2_SIZE <i>rlength</i>, PCRE2_UCHAR *\fIoutputbuffer\zfP,</b>
|
<b> PCRE2_SIZE <i>rlength</i>, PCRE2_UCHAR *<i>outputbuffer</i>,</b>
|
||||||
<b> PCRE2_SIZE *<i>outlengthptr</i>);</b>
|
<b> PCRE2_SIZE *<i>outlengthptr</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
This function calls <b>pcre2_match()</b> and then makes a copy of the subject
|
This function calls <b>pcre2_match()</b> and then makes a copy of the subject
|
||||||
string in <i>outputbuffer</i>, replacing the part that was matched with the
|
string in <i>outputbuffer</i>, replacing one or more parts that were matched
|
||||||
<i>replacement</i> string, whose length is supplied in <b>rlength</b>. This can
|
with the <i>replacement</i> string, whose length is supplied in <b>rlength</b>.
|
||||||
be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. Matches in
|
This can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string.
|
||||||
which a \K item in a lookahead in the pattern causes the match to end before
|
The default is to perform just one replacement, but there is an option that
|
||||||
it starts are not supported, and give rise to an error return. For global
|
requests multiple replacements (see PCRE2_SUBSTITUTE_GLOBAL below for details).
|
||||||
replacements, matches in which \K in a lookbehind causes the match to start
|
</P>
|
||||||
earlier than the point that was reached in the previous iteration are also not
|
<P>
|
||||||
supported.
|
Matches in which a \K item in a lookahead in the pattern causes the match to
|
||||||
|
end before it starts are not supported, and give rise to an error return. For
|
||||||
|
global replacements, matches in which \K in a lookbehind causes the match to
|
||||||
|
start earlier than the point that was reached in the previous iteration are
|
||||||
|
also not supported.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The first seven arguments of <b>pcre2_substitute()</b> are the same as for
|
The first seven arguments of <b>pcre2_substitute()</b> are the same as for
|
||||||
|
@ -3194,9 +3214,9 @@ allocate memory for the compiled code.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If an external <i>match_data</i> block is provided, its contents afterwards
|
If an external <i>match_data</i> block is provided, its contents afterwards
|
||||||
are those set by the final call to <b>pcre2_match()</b>, which will have
|
are those set by the final call to <b>pcre2_match()</b>. For global changes,
|
||||||
ended in a matching error. The contents of the ovector within the match data
|
this will have ended in a matching error. The contents of the ovector within
|
||||||
block may or may not have been changed.
|
the match data block may or may not have been changed.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <i>outlengthptr</i> argument must point to a variable that contains the
|
The <i>outlengthptr</i> argument must point to a variable that contains the
|
||||||
|
@ -3220,12 +3240,12 @@ length is in code units, not bytes.
|
||||||
In the replacement string, which is interpreted as a UTF string in UTF mode,
|
In the replacement string, which is interpreted as a UTF string in UTF mode,
|
||||||
and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set, a
|
and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set, a
|
||||||
dollar character is an escape character that can specify the insertion of
|
dollar character is an escape character that can specify the insertion of
|
||||||
characters from capturing groups or (*MARK), (*PRUNE), or (*THEN) items in the
|
characters from capturing groups or names from (*MARK) or other control verbs
|
||||||
pattern. The following forms are always recognized:
|
in the pattern. The following forms are always recognized:
|
||||||
<pre>
|
<pre>
|
||||||
$$ insert a dollar character
|
$$ insert a dollar character
|
||||||
$<n> or ${<n>} insert the contents of group <n>
|
$<n> or ${<n>} insert the contents of group <n>
|
||||||
$*MARK or ${*MARK} insert a (*MARK), (*PRUNE), or (*THEN) name
|
$*MARK or ${*MARK} insert a control verb name
|
||||||
</pre>
|
</pre>
|
||||||
Either a group number or a group name can be given for <n>. Curly brackets are
|
Either a group number or a group name can be given for <n>. Curly brackets are
|
||||||
required only if the following character would be interpreted as part of the
|
required only if the following character would be interpreted as part of the
|
||||||
|
@ -3234,12 +3254,13 @@ For example, if the pattern a(b)c is matched with "=abc=" and the replacement
|
||||||
string "+$1$0$1+", the result is "=+babcb+=".
|
string "+$1$0$1+", the result is "=+babcb+=".
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
$*MARK inserts the name from the last encountered (*MARK), (*PRUNE), or (*THEN)
|
$*MARK inserts the name from the last encountered (*ACCEPT), (*COMMIT),
|
||||||
on the matching path that has a name. (*MARK) must always include a name, but
|
(*MARK), (*PRUNE), or (*THEN) on the matching path that has a name. (*MARK)
|
||||||
(*PRUNE) and (*THEN) need not. For example, in the case of (*MARK:A)(*PRUNE)
|
must always include a name, but the other verbs need not. For example, in
|
||||||
the name inserted is "A", but for (*MARK:A)(*PRUNE:B) the relevant name is "B".
|
the case of (*MARK:A)(*PRUNE) the name inserted is "A", but for
|
||||||
This facility can be used to perform simple simultaneous substitutions, as this
|
(*MARK:A)(*PRUNE:B) the relevant name is "B". This facility can be used to
|
||||||
<b>pcre2test</b> example shows:
|
perform simple simultaneous substitutions, as this <b>pcre2test</b> example
|
||||||
|
shows:
|
||||||
<pre>
|
<pre>
|
||||||
/(*MARK:pear)apple|(*MARK:orange)lemon/g,replace=${*MARK}
|
/(*MARK:pear)apple|(*MARK:orange)lemon/g,replace=${*MARK}
|
||||||
apple lemon
|
apple lemon
|
||||||
|
@ -3399,6 +3420,44 @@ obtained by calling the <b>pcre2_get_error_message()</b> function (see
|
||||||
"Obtaining a textual error message"
|
"Obtaining a textual error message"
|
||||||
<a href="#geterrormessage">above).</a>
|
<a href="#geterrormessage">above).</a>
|
||||||
</P>
|
</P>
|
||||||
|
<br><b>
|
||||||
|
Substitution callouts
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
<b>int pcre2_set_substitute_callout(pcre2_match_context *<i>mcontext</i>,</b>
|
||||||
|
<b> void (*<i>callout_function</i>)(pcre2_substitute_callout_block *, void *),</b>
|
||||||
|
<b> void *<i>callout_data</i>);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
The <b>pcre2_set_substitution_callout()</b> function can be used to specify a
|
||||||
|
callout function for <b>pcre2_substitute()</b>. This information is passed in
|
||||||
|
a match context. The callout function is called after each substitution. It is
|
||||||
|
not called for simulated substitutions that happen as a result of the
|
||||||
|
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option. A callout function should not return
|
||||||
|
any value.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The first argument of the callout function is a pointer to a substitute callout
|
||||||
|
block structure, which contains the following fields, not necessarily in this
|
||||||
|
order:
|
||||||
|
<pre>
|
||||||
|
uint32_t <i>version</i>;
|
||||||
|
PCRE2_SIZE <i>input_offsets[2]</i>;
|
||||||
|
PCRE2_SIZE <i>output_offsets[2]</i>;
|
||||||
|
</pre>
|
||||||
|
The <i>version</i> field contains the version number of the block format. The
|
||||||
|
current version is 0. The version number will increase in future if more fields
|
||||||
|
are added, but the intention is never to remove any of the existing fields.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The <i>input_offsets</i> vector contains the code unit offsets in the input
|
||||||
|
string of the matched substring, and the <i>output_offsets</i> vector contains
|
||||||
|
the offsets of the replacement in the output string.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The second argument of the callout function is the value passed as
|
||||||
|
<i>callout_data</i> when the function was registered.
|
||||||
|
</P>
|
||||||
<br><a name="SEC37" href="#TOC1">DUPLICATE SUBPATTERN NAMES</a><br>
|
<br><a name="SEC37" href="#TOC1">DUPLICATE SUBPATTERN NAMES</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre2_substring_nametable_scan(const pcre2_code *<i>code</i>,</b>
|
<b>int pcre2_substring_nametable_scan(const pcre2_code *<i>code</i>,</b>
|
||||||
|
@ -3665,7 +3724,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC42" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC42" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 07 September 2018
|
Last updated: 18 September 2018
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2018 University of Cambridge.
|
Copyright © 1997-2018 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -44,6 +44,14 @@ a match context (see <b>pcre2_set_callout()</b> in the
|
||||||
documentation).
|
documentation).
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
When using the <b>pcre2_substitute()</b> function, an additional callout feature
|
||||||
|
is available. This does a callout after each change to the subject string and
|
||||||
|
is described in the
|
||||||
|
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||||
|
documentation; the rest of this document is concerned with callouts during
|
||||||
|
pattern matching.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
Within a regular expression, (?C<arg>) indicates a point at which the external
|
Within a regular expression, (?C<arg>) indicates a point at which the external
|
||||||
function is to be called. Different callout points can be identified by putting
|
function is to be called. Different callout points can be identified by putting
|
||||||
a number less than 256 after the letter C. The default value is zero.
|
a number less than 256 after the letter C. The default value is zero.
|
||||||
|
@ -463,7 +471,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC8" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC8" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 26 April 2018
|
Last updated: 17 September 2018
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2018 University of Cambridge.
|
Copyright © 1997-2018 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -1041,6 +1041,7 @@ process.
|
||||||
aftertext show text after match
|
aftertext show text after match
|
||||||
allaftertext show text after captures
|
allaftertext show text after captures
|
||||||
allcaptures show all captures
|
allcaptures show all captures
|
||||||
|
allvector show the entire ovector
|
||||||
allusedtext show all consulted text
|
allusedtext show all consulted text
|
||||||
altglobal alternative global matching
|
altglobal alternative global matching
|
||||||
/g global global matching
|
/g global global matching
|
||||||
|
@ -1048,6 +1049,7 @@ process.
|
||||||
mark show mark values
|
mark show mark values
|
||||||
replace=<string> specify a replacement string
|
replace=<string> specify a replacement string
|
||||||
startchar show starting character when relevant
|
startchar show starting character when relevant
|
||||||
|
substitute_callout use substitution callouts
|
||||||
substitute_extended use PCRE2_SUBSTITUTE_EXTENDED
|
substitute_extended use PCRE2_SUBSTITUTE_EXTENDED
|
||||||
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||||
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||||
|
@ -1185,6 +1187,7 @@ pattern.
|
||||||
aftertext show text after match
|
aftertext show text after match
|
||||||
allaftertext show text after captures
|
allaftertext show text after captures
|
||||||
allcaptures show all captures
|
allcaptures show all captures
|
||||||
|
allvector show the entire ovector
|
||||||
allusedtext show all consulted text (non-JIT only)
|
allusedtext show all consulted text (non-JIT only)
|
||||||
altglobal alternative global matching
|
altglobal alternative global matching
|
||||||
callout_capture show captures at callout time
|
callout_capture show captures at callout time
|
||||||
|
@ -1214,6 +1217,7 @@ pattern.
|
||||||
replace=<string> specify a replacement string
|
replace=<string> specify a replacement string
|
||||||
startchar show startchar when relevant
|
startchar show startchar when relevant
|
||||||
startoffset=<n> same as offset=<n>
|
startoffset=<n> same as offset=<n>
|
||||||
|
substitute_callout use substitution callouts
|
||||||
substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED
|
substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED
|
||||||
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||||
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||||
|
@ -1281,10 +1285,28 @@ captured parentheses be output after a match. By default, only those up to the
|
||||||
highest one actually used in the match are output (corresponding to the return
|
highest one actually used in the match are output (corresponding to the return
|
||||||
code from <b>pcre2_match()</b>). Groups that did not take part in the match
|
code from <b>pcre2_match()</b>). Groups that did not take part in the match
|
||||||
are output as "<unset>". This modifier is not relevant for DFA matching (which
|
are output as "<unset>". This modifier is not relevant for DFA matching (which
|
||||||
does no capturing); it is ignored, with a warning message, if present.
|
does no capturing) and does not apply when <b>replace</b> is specified; it is
|
||||||
|
ignored, with a warning message, if present.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
Testing callouts
|
Showing the entire ovector, for all outcomes
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
The <b>allvector</b> modifier requests that the entire ovector be shown,
|
||||||
|
whatever the outcome of the match. Compare <b>allcaptures</b>, which shows only
|
||||||
|
up to the maximum number of capture groups for the pattern, and then only for a
|
||||||
|
successful complete non-DFA match. This modifier, which acts after any match
|
||||||
|
result, and also for DFA matching, provides a means of checking that there are
|
||||||
|
no unexpected modifications to ovector fields. Before each match attempt, the
|
||||||
|
ovector is filled with a special value, and if this is found in both elements
|
||||||
|
of a capturing pair, "<unchanged>" is output. After a successful match, this
|
||||||
|
applies to all groups after the maximum capture group for the pattern. In other
|
||||||
|
cases it applies to the entire ovector. After a partial match, the first two
|
||||||
|
elements are the only ones that should be set. After a DFA match, the amount of
|
||||||
|
ovector that is used depends on the number of matches that were found.
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
Testing pattern callouts
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
A callout function is supplied when <b>pcre2test</b> calls the library matching
|
A callout function is supplied when <b>pcre2test</b> calls the library matching
|
||||||
|
@ -1292,6 +1314,9 @@ functions, unless <b>callout_none</b> is specified. Its behaviour can be
|
||||||
controlled by various modifiers listed above whose names begin with
|
controlled by various modifiers listed above whose names begin with
|
||||||
<b>callout_</b>. Details are given in the section entitled "Callouts"
|
<b>callout_</b>. Details are given in the section entitled "Callouts"
|
||||||
<a href="#callouts">below.</a>
|
<a href="#callouts">below.</a>
|
||||||
|
Testing callouts from <b>pcre2_substitute()</b> is decribed separately in
|
||||||
|
"Testing the substitution function"
|
||||||
|
<a href="#substitution">below.</a>
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
Finding all matches in a string
|
Finding all matches in a string
|
||||||
|
@ -1343,7 +1368,7 @@ instead of a colon. This is in addition to the normal full list. The string
|
||||||
length (that is, the return from the extraction function) is given in
|
length (that is, the return from the extraction function) is given in
|
||||||
parentheses after each substring, followed by the name when the extraction was
|
parentheses after each substring, followed by the name when the extraction was
|
||||||
by name.
|
by name.
|
||||||
</P>
|
<a name="substitution"></a></P>
|
||||||
<br><b>
|
<br><b>
|
||||||
Testing the substitution function
|
Testing the substitution function
|
||||||
</b><br>
|
</b><br>
|
||||||
|
@ -1384,6 +1409,16 @@ simple example of a substitution test:
|
||||||
=abc=abc=\=global
|
=abc=abc=\=global
|
||||||
2: =xxx=xxx=
|
2: =xxx=xxx=
|
||||||
</pre>
|
</pre>
|
||||||
|
If the <b>substitute_callout</b> modifier is set, a substitution callout
|
||||||
|
function is set up. When it is called (after each substitution), the offsets in
|
||||||
|
the input and output strings are output. For example:
|
||||||
|
<pre>
|
||||||
|
/abc/g,replace=<$0>,substitute_callout
|
||||||
|
abcdefabcpqr
|
||||||
|
Old 0 3 New 0 5
|
||||||
|
Old 6 9 New 8 13
|
||||||
|
2: <abc>def<abc>pqr
|
||||||
|
</pre>
|
||||||
Subject and replacement strings should be kept relatively short (fewer than 256
|
Subject and replacement strings should be kept relatively short (fewer than 256
|
||||||
characters) for substitution tests, as fixed-size buffers are used. To make it
|
characters) for substitution tests, as fixed-size buffers are used. To make it
|
||||||
easy to test for buffer overflow, if the replacement string starts with a
|
easy to test for buffer overflow, if the replacement string starts with a
|
||||||
|
@ -1401,10 +1436,10 @@ The default action of <b>pcre2_substitute()</b> is to return
|
||||||
PCRE2_ERROR_NOMEMORY when the output buffer is too small. However, if the
|
PCRE2_ERROR_NOMEMORY when the output buffer is too small. However, if the
|
||||||
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set (by using the
|
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set (by using the
|
||||||
<b>substitute_overflow_length</b> modifier), <b>pcre2_substitute()</b> continues
|
<b>substitute_overflow_length</b> modifier), <b>pcre2_substitute()</b> continues
|
||||||
to go through the motions of matching and substituting, in order to compute the
|
to go through the motions of matching and substituting (but not doing any
|
||||||
size of buffer that is required. When this happens, <b>pcre2test</b> shows the
|
callouts), in order to compute the size of buffer that is required. When this
|
||||||
required buffer length (which includes space for the trailing zero) as part of
|
happens, <b>pcre2test</b> shows the required buffer length (which includes space
|
||||||
the error message. For example:
|
for the trailing zero) as part of the error message. For example:
|
||||||
<pre>
|
<pre>
|
||||||
/abc/substitute_overflow_length
|
/abc/substitute_overflow_length
|
||||||
123abc123\=replace=[9]XYZ
|
123abc123\=replace=[9]XYZ
|
||||||
|
@ -2004,7 +2039,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 21 July 2018
|
Last updated: 17 September 2018
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2018 University of Cambridge.
|
Copyright © 1997-2018 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2 3 "11 July 2018" "PCRE2 10.32"
|
.TH PCRE2 3 "17 September 2018" "PCRE2 10.33"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.SH INTRODUCTION
|
.SH INTRODUCTION
|
||||||
|
@ -156,7 +156,7 @@ listing), and the short pages for individual functions, are concatenated in
|
||||||
pcre2-config show PCRE2 installation configuration information
|
pcre2-config show PCRE2 installation configuration information
|
||||||
pcre2api details of PCRE2's native C API
|
pcre2api details of PCRE2's native C API
|
||||||
pcre2build building PCRE2
|
pcre2build building PCRE2
|
||||||
pcre2callout details of the callout feature
|
pcre2callout details of the pattern callout feature
|
||||||
pcre2compat discussion of Perl compatibility
|
pcre2compat discussion of Perl compatibility
|
||||||
pcre2convert details of pattern conversion functions
|
pcre2convert details of pattern conversion functions
|
||||||
pcre2demo a demonstration C program that uses PCRE2
|
pcre2demo a demonstration C program that uses PCRE2
|
||||||
|
@ -197,6 +197,6 @@ use my two initials, followed by the two digits 10, at the domain cam.ac.uk.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 11 July 2018
|
Last updated: 17 September 2018
|
||||||
Copyright (c) 1997-2018 University of Cambridge.
|
Copyright (c) 1997-2018 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
608
doc/pcre2.txt
608
doc/pcre2.txt
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,31 @@
|
||||||
|
.TH PCRE2_SET_SUBSTITUTE_CALLOUT 3 "17 September 2018" "PCRE2 10.33"
|
||||||
|
.SH NAME
|
||||||
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
|
.SH SYNOPSIS
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
.B #include <pcre2.h>
|
||||||
|
.PP
|
||||||
|
.nf
|
||||||
|
.B int pcre2_set_substitute_callout(pcre2_match_context *\fImcontext\fP,
|
||||||
|
.B " void (*\fIcallout_function\fP)(pcre2_substitute_callout_block *),"
|
||||||
|
.B " void *\fIcallout_data\fP);"
|
||||||
|
.fi
|
||||||
|
.
|
||||||
|
.SH DESCRIPTION
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
This function sets the substitute callout fields in a match context (the first
|
||||||
|
argument). The second argument specifies a callout function, and the third
|
||||||
|
argument is an opaque data item that is passed to it. The result of this
|
||||||
|
function is always zero.
|
||||||
|
.P
|
||||||
|
There is a complete description of the PCRE2 native API in the
|
||||||
|
.\" HREF
|
||||||
|
\fBpcre2api\fP
|
||||||
|
.\"
|
||||||
|
page and a description of the POSIX API in the
|
||||||
|
.\" HREF
|
||||||
|
\fBpcre2posix\fP
|
||||||
|
.\"
|
||||||
|
page.
|
107
doc/pcre2api.3
107
doc/pcre2api.3
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2API 3 "07 September 2018" "PCRE2 10.32"
|
.TH PCRE2API 3 "18 September 2018" "PCRE2 10.33"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.sp
|
.sp
|
||||||
|
@ -123,6 +123,10 @@ document for an overview of all the PCRE2 documentation.
|
||||||
.B " int (*\fIcallout_function\fP)(pcre2_callout_block *, void *),"
|
.B " int (*\fIcallout_function\fP)(pcre2_callout_block *, void *),"
|
||||||
.B " void *\fIcallout_data\fP);"
|
.B " void *\fIcallout_data\fP);"
|
||||||
.sp
|
.sp
|
||||||
|
.B int pcre2_set_substitute_callout(pcre2_match_context *\fImcontext\fP,
|
||||||
|
.B " void (*\fIcallout_function\fP)(pcre2_substitute_callout_block *, void *),"
|
||||||
|
.B " void *\fIcallout_data\fP);"
|
||||||
|
.sp
|
||||||
.B int pcre2_set_offset_limit(pcre2_match_context *\fImcontext\fP,
|
.B int pcre2_set_offset_limit(pcre2_match_context *\fImcontext\fP,
|
||||||
.B " PCRE2_SIZE \fIvalue\fP);"
|
.B " PCRE2_SIZE \fIvalue\fP);"
|
||||||
.sp
|
.sp
|
||||||
|
@ -847,7 +851,7 @@ PCRE2_ERROR_BADDATA if invalid data is detected.
|
||||||
.B " void *\fIcallout_data\fP);"
|
.B " void *\fIcallout_data\fP);"
|
||||||
.fi
|
.fi
|
||||||
.sp
|
.sp
|
||||||
This sets up a "callout" function for PCRE2 to call at specified points
|
This sets up a callout function for PCRE2 to call at specified points
|
||||||
during a matching operation. Details are given in the
|
during a matching operation. Details are given in the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
\fBpcre2callout\fP
|
\fBpcre2callout\fP
|
||||||
|
@ -855,6 +859,20 @@ during a matching operation. Details are given in the
|
||||||
documentation.
|
documentation.
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
|
.B int pcre2_set_substitute_callout(pcre2_match_context *\fImcontext\fP,
|
||||||
|
.B " void (*\fIcallout_function\fP)(pcre2_substitute_callout_block *, void *),"
|
||||||
|
.B " void *\fIcallout_data\fP);"
|
||||||
|
.fi
|
||||||
|
.sp
|
||||||
|
This sets up a callout function for PCRE2 to call after each substitution
|
||||||
|
made by \fBpcre2_substitute()\fP. Details are given in the section entitled
|
||||||
|
"Creating a new string with substitutions"
|
||||||
|
.\" HTML <a href="#substitutions">
|
||||||
|
.\" </a>
|
||||||
|
below.
|
||||||
|
.\"
|
||||||
|
.sp
|
||||||
|
.nf
|
||||||
.B int pcre2_set_offset_limit(pcre2_match_context *\fImcontext\fP,
|
.B int pcre2_set_offset_limit(pcre2_match_context *\fImcontext\fP,
|
||||||
.B " PCRE2_SIZE \fIvalue\fP);"
|
.B " PCRE2_SIZE \fIvalue\fP);"
|
||||||
.fi
|
.fi
|
||||||
|
@ -3171,6 +3189,7 @@ numbers. For this reason, the use of different names for subpatterns of the
|
||||||
same number causes an error at compile time.
|
same number causes an error at compile time.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
.\" HTML <a name="substitutions"></a>
|
||||||
.SH "CREATING A NEW STRING WITH SUBSTITUTIONS"
|
.SH "CREATING A NEW STRING WITH SUBSTITUTIONS"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
@ -3179,19 +3198,22 @@ same number causes an error at compile time.
|
||||||
.B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP,"
|
.B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP,"
|
||||||
.B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP,"
|
.B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP,"
|
||||||
.B " pcre2_match_context *\fImcontext\fP, PCRE2_SPTR \fIreplacement\fP,"
|
.B " pcre2_match_context *\fImcontext\fP, PCRE2_SPTR \fIreplacement\fP,"
|
||||||
.B " PCRE2_SIZE \fIrlength\fP, PCRE2_UCHAR *\fIoutputbuffer\zfP,"
|
.B " PCRE2_SIZE \fIrlength\fP, PCRE2_UCHAR *\fIoutputbuffer\fP,"
|
||||||
.B " PCRE2_SIZE *\fIoutlengthptr\fP);"
|
.B " PCRE2_SIZE *\fIoutlengthptr\fP);"
|
||||||
.fi
|
.fi
|
||||||
.P
|
.P
|
||||||
This function calls \fBpcre2_match()\fP and then makes a copy of the subject
|
This function calls \fBpcre2_match()\fP and then makes a copy of the subject
|
||||||
string in \fIoutputbuffer\fP, replacing the part that was matched with the
|
string in \fIoutputbuffer\fP, replacing one or more parts that were matched
|
||||||
\fIreplacement\fP string, whose length is supplied in \fBrlength\fP. This can
|
with the \fIreplacement\fP string, whose length is supplied in \fBrlength\fP.
|
||||||
be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. Matches in
|
This can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string.
|
||||||
which a \eK item in a lookahead in the pattern causes the match to end before
|
The default is to perform just one replacement, but there is an option that
|
||||||
it starts are not supported, and give rise to an error return. For global
|
requests multiple replacements (see PCRE2_SUBSTITUTE_GLOBAL below for details).
|
||||||
replacements, matches in which \eK in a lookbehind causes the match to start
|
.P
|
||||||
earlier than the point that was reached in the previous iteration are also not
|
Matches in which a \eK item in a lookahead in the pattern causes the match to
|
||||||
supported.
|
end before it starts are not supported, and give rise to an error return. For
|
||||||
|
global replacements, matches in which \eK in a lookbehind causes the match to
|
||||||
|
start earlier than the point that was reached in the previous iteration are
|
||||||
|
also not supported.
|
||||||
.P
|
.P
|
||||||
The first seven arguments of \fBpcre2_substitute()\fP are the same as for
|
The first seven arguments of \fBpcre2_substitute()\fP are the same as for
|
||||||
\fBpcre2_match()\fP, except that the partial matching options are not
|
\fBpcre2_match()\fP, except that the partial matching options are not
|
||||||
|
@ -3201,9 +3223,9 @@ functions from the match context, if provided, or else those that were used to
|
||||||
allocate memory for the compiled code.
|
allocate memory for the compiled code.
|
||||||
.P
|
.P
|
||||||
If an external \fImatch_data\fP block is provided, its contents afterwards
|
If an external \fImatch_data\fP block is provided, its contents afterwards
|
||||||
are those set by the final call to \fBpcre2_match()\fP, which will have
|
are those set by the final call to \fBpcre2_match()\fP. For global changes,
|
||||||
ended in a matching error. The contents of the ovector within the match data
|
this will have ended in a matching error. The contents of the ovector within
|
||||||
block may or may not have been changed.
|
the match data block may or may not have been changed.
|
||||||
.P
|
.P
|
||||||
The \fIoutlengthptr\fP argument must point to a variable that contains the
|
The \fIoutlengthptr\fP argument must point to a variable that contains the
|
||||||
length, in code units, of the output buffer. If the function is successful, the
|
length, in code units, of the output buffer. If the function is successful, the
|
||||||
|
@ -3224,12 +3246,12 @@ length is in code units, not bytes.
|
||||||
In the replacement string, which is interpreted as a UTF string in UTF mode,
|
In the replacement string, which is interpreted as a UTF string in UTF mode,
|
||||||
and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set, a
|
and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set, a
|
||||||
dollar character is an escape character that can specify the insertion of
|
dollar character is an escape character that can specify the insertion of
|
||||||
characters from capturing groups or (*MARK), (*PRUNE), or (*THEN) items in the
|
characters from capturing groups or names from (*MARK) or other control verbs
|
||||||
pattern. The following forms are always recognized:
|
in the pattern. The following forms are always recognized:
|
||||||
.sp
|
.sp
|
||||||
$$ insert a dollar character
|
$$ insert a dollar character
|
||||||
$<n> or ${<n>} insert the contents of group <n>
|
$<n> or ${<n>} insert the contents of group <n>
|
||||||
$*MARK or ${*MARK} insert a (*MARK), (*PRUNE), or (*THEN) name
|
$*MARK or ${*MARK} insert a control verb name
|
||||||
.sp
|
.sp
|
||||||
Either a group number or a group name can be given for <n>. Curly brackets are
|
Either a group number or a group name can be given for <n>. Curly brackets are
|
||||||
required only if the following character would be interpreted as part of the
|
required only if the following character would be interpreted as part of the
|
||||||
|
@ -3237,12 +3259,13 @@ number or name. The number may be zero to include the entire matched string.
|
||||||
For example, if the pattern a(b)c is matched with "=abc=" and the replacement
|
For example, if the pattern a(b)c is matched with "=abc=" and the replacement
|
||||||
string "+$1$0$1+", the result is "=+babcb+=".
|
string "+$1$0$1+", the result is "=+babcb+=".
|
||||||
.P
|
.P
|
||||||
$*MARK inserts the name from the last encountered (*MARK), (*PRUNE), or (*THEN)
|
$*MARK inserts the name from the last encountered (*ACCEPT), (*COMMIT),
|
||||||
on the matching path that has a name. (*MARK) must always include a name, but
|
(*MARK), (*PRUNE), or (*THEN) on the matching path that has a name. (*MARK)
|
||||||
(*PRUNE) and (*THEN) need not. For example, in the case of (*MARK:A)(*PRUNE)
|
must always include a name, but the other verbs need not. For example, in
|
||||||
the name inserted is "A", but for (*MARK:A)(*PRUNE:B) the relevant name is "B".
|
the case of (*MARK:A)(*PRUNE) the name inserted is "A", but for
|
||||||
This facility can be used to perform simple simultaneous substitutions, as this
|
(*MARK:A)(*PRUNE:B) the relevant name is "B". This facility can be used to
|
||||||
\fBpcre2test\fP example shows:
|
perform simple simultaneous substitutions, as this \fBpcre2test\fP example
|
||||||
|
shows:
|
||||||
.sp
|
.sp
|
||||||
/(*MARK:pear)apple|(*MARK:orange)lemon/g,replace=${*MARK}
|
/(*MARK:pear)apple|(*MARK:orange)lemon/g,replace=${*MARK}
|
||||||
apple lemon
|
apple lemon
|
||||||
|
@ -3388,6 +3411,42 @@ above).
|
||||||
.\"
|
.\"
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
.SS "Substitution callouts"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
.nf
|
||||||
|
.B int pcre2_set_substitute_callout(pcre2_match_context *\fImcontext\fP,
|
||||||
|
.B " void (*\fIcallout_function\fP)(pcre2_substitute_callout_block *, void *),"
|
||||||
|
.B " void *\fIcallout_data\fP);"
|
||||||
|
.fi
|
||||||
|
.sp
|
||||||
|
The \fBpcre2_set_substitution_callout()\fP function can be used to specify a
|
||||||
|
callout function for \fBpcre2_substitute()\fP. This information is passed in
|
||||||
|
a match context. The callout function is called after each substitution. It is
|
||||||
|
not called for simulated substitutions that happen as a result of the
|
||||||
|
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option. A callout function should not return
|
||||||
|
any value.
|
||||||
|
.P
|
||||||
|
The first argument of the callout function is a pointer to a substitute callout
|
||||||
|
block structure, which contains the following fields, not necessarily in this
|
||||||
|
order:
|
||||||
|
.sp
|
||||||
|
uint32_t \fIversion\fP;
|
||||||
|
PCRE2_SIZE \fIinput_offsets[2]\fP;
|
||||||
|
PCRE2_SIZE \fIoutput_offsets[2]\fP;
|
||||||
|
.sp
|
||||||
|
The \fIversion\fP field contains the version number of the block format. The
|
||||||
|
current version is 0. The version number will increase in future if more fields
|
||||||
|
are added, but the intention is never to remove any of the existing fields.
|
||||||
|
.P
|
||||||
|
The \fIinput_offsets\fP vector contains the code unit offsets in the input
|
||||||
|
string of the matched substring, and the \fIoutput_offsets\fP vector contains
|
||||||
|
the offsets of the replacement in the output string.
|
||||||
|
.P
|
||||||
|
The second argument of the callout function is the value passed as
|
||||||
|
\fIcallout_data\fP when the function was registered.
|
||||||
|
.
|
||||||
|
.
|
||||||
.SH "DUPLICATE SUBPATTERN NAMES"
|
.SH "DUPLICATE SUBPATTERN NAMES"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
@ -3670,6 +3729,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 07 September 2018
|
Last updated: 18 September 2018
|
||||||
Copyright (c) 1997-2018 University of Cambridge.
|
Copyright (c) 1997-2018 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2CALLOUT 3 "26 April 2018" "PCRE2 10.32"
|
.TH PCRE2CALLOUT 3 "17 September 2018" "PCRE2 10.33"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -27,6 +27,15 @@ a match context (see \fBpcre2_set_callout()\fP in the
|
||||||
.\"
|
.\"
|
||||||
documentation).
|
documentation).
|
||||||
.P
|
.P
|
||||||
|
When using the \fBpcre2_substitute()\fP function, an additional callout feature
|
||||||
|
is available. This does a callout after each change to the subject string and
|
||||||
|
is described in the
|
||||||
|
.\" HREF
|
||||||
|
\fBpcre2api\fP
|
||||||
|
.\"
|
||||||
|
documentation; the rest of this document is concerned with callouts during
|
||||||
|
pattern matching.
|
||||||
|
.P
|
||||||
Within a regular expression, (?C<arg>) indicates a point at which the external
|
Within a regular expression, (?C<arg>) indicates a point at which the external
|
||||||
function is to be called. Different callout points can be identified by putting
|
function is to be called. Different callout points can be identified by putting
|
||||||
a number less than 256 after the letter C. The default value is zero.
|
a number less than 256 after the letter C. The default value is zero.
|
||||||
|
@ -443,6 +452,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 26 April 2018
|
Last updated: 17 September 2018
|
||||||
Copyright (c) 1997-2018 University of Cambridge.
|
Copyright (c) 1997-2018 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2TEST 1 "15 September 2018" "PCRE 10.33"
|
.TH PCRE2TEST 1 "17 September 2018" "PCRE 10.33"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
pcre2test - a program for testing Perl-compatible regular expressions.
|
pcre2test - a program for testing Perl-compatible regular expressions.
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -1011,6 +1011,7 @@ process.
|
||||||
mark show mark values
|
mark show mark values
|
||||||
replace=<string> specify a replacement string
|
replace=<string> specify a replacement string
|
||||||
startchar show starting character when relevant
|
startchar show starting character when relevant
|
||||||
|
substitute_callout use substitution callouts
|
||||||
substitute_extended use PCRE2_SUBSTITUTE_EXTENDED
|
substitute_extended use PCRE2_SUBSTITUTE_EXTENDED
|
||||||
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||||
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||||
|
@ -1185,6 +1186,7 @@ pattern.
|
||||||
replace=<string> specify a replacement string
|
replace=<string> specify a replacement string
|
||||||
startchar show startchar when relevant
|
startchar show startchar when relevant
|
||||||
startoffset=<n> same as offset=<n>
|
startoffset=<n> same as offset=<n>
|
||||||
|
substitute_callout use substitution callouts
|
||||||
substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED
|
substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED
|
||||||
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||||
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||||
|
@ -1271,7 +1273,7 @@ elements are the only ones that should be set. After a DFA match, the amount of
|
||||||
ovector that is used depends on the number of matches that were found.
|
ovector that is used depends on the number of matches that were found.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SS "Testing callouts"
|
.SS "Testing pattern callouts"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
A callout function is supplied when \fBpcre2test\fP calls the library matching
|
A callout function is supplied when \fBpcre2test\fP calls the library matching
|
||||||
|
@ -1280,7 +1282,13 @@ controlled by various modifiers listed above whose names begin with
|
||||||
\fBcallout_\fP. Details are given in the section entitled "Callouts"
|
\fBcallout_\fP. Details are given in the section entitled "Callouts"
|
||||||
.\" HTML <a href="#callouts">
|
.\" HTML <a href="#callouts">
|
||||||
.\" </a>
|
.\" </a>
|
||||||
below.
|
below.
|
||||||
|
.\"
|
||||||
|
Testing callouts from \fBpcre2_substitute()\fP is decribed separately in
|
||||||
|
"Testing the substitution function"
|
||||||
|
.\" HTML <a href="#substitution">
|
||||||
|
.\" </a>
|
||||||
|
below.
|
||||||
.\"
|
.\"
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
@ -1332,6 +1340,7 @@ parentheses after each substring, followed by the name when the extraction was
|
||||||
by name.
|
by name.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
.\" HTML <a name="substitution"></a>
|
||||||
.SS "Testing the substitution function"
|
.SS "Testing the substitution function"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
@ -1367,6 +1376,16 @@ simple example of a substitution test:
|
||||||
=abc=abc=\e=global
|
=abc=abc=\e=global
|
||||||
2: =xxx=xxx=
|
2: =xxx=xxx=
|
||||||
.sp
|
.sp
|
||||||
|
If the \fBsubstitute_callout\fP modifier is set, a substitution callout
|
||||||
|
function is set up. When it is called (after each substitution), the offsets in
|
||||||
|
the input and output strings are output. For example:
|
||||||
|
.sp
|
||||||
|
/abc/g,replace=<$0>,substitute_callout
|
||||||
|
abcdefabcpqr
|
||||||
|
Old 0 3 New 0 5
|
||||||
|
Old 6 9 New 8 13
|
||||||
|
2: <abc>def<abc>pqr
|
||||||
|
.sp
|
||||||
Subject and replacement strings should be kept relatively short (fewer than 256
|
Subject and replacement strings should be kept relatively short (fewer than 256
|
||||||
characters) for substitution tests, as fixed-size buffers are used. To make it
|
characters) for substitution tests, as fixed-size buffers are used. To make it
|
||||||
easy to test for buffer overflow, if the replacement string starts with a
|
easy to test for buffer overflow, if the replacement string starts with a
|
||||||
|
@ -1384,10 +1403,10 @@ The default action of \fBpcre2_substitute()\fP is to return
|
||||||
PCRE2_ERROR_NOMEMORY when the output buffer is too small. However, if the
|
PCRE2_ERROR_NOMEMORY when the output buffer is too small. However, if the
|
||||||
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set (by using the
|
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set (by using the
|
||||||
\fBsubstitute_overflow_length\fP modifier), \fBpcre2_substitute()\fP continues
|
\fBsubstitute_overflow_length\fP modifier), \fBpcre2_substitute()\fP continues
|
||||||
to go through the motions of matching and substituting, in order to compute the
|
to go through the motions of matching and substituting (but not doing any
|
||||||
size of buffer that is required. When this happens, \fBpcre2test\fP shows the
|
callouts), in order to compute the size of buffer that is required. When this
|
||||||
required buffer length (which includes space for the trailing zero) as part of
|
happens, \fBpcre2test\fP shows the required buffer length (which includes space
|
||||||
the error message. For example:
|
for the trailing zero) as part of the error message. For example:
|
||||||
.sp
|
.sp
|
||||||
/abc/substitute_overflow_length
|
/abc/substitute_overflow_length
|
||||||
123abc123\e=replace=[9]XYZ
|
123abc123\e=replace=[9]XYZ
|
||||||
|
@ -2002,6 +2021,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 15 September 2018
|
Last updated: 17 September 2018
|
||||||
Copyright (c) 1997-2018 University of Cambridge.
|
Copyright (c) 1997-2018 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -929,6 +929,7 @@ PATTERN MODIFIERS
|
||||||
aftertext show text after match
|
aftertext show text after match
|
||||||
allaftertext show text after captures
|
allaftertext show text after captures
|
||||||
allcaptures show all captures
|
allcaptures show all captures
|
||||||
|
allvector show the entire ovector
|
||||||
allusedtext show all consulted text
|
allusedtext show all consulted text
|
||||||
altglobal alternative global matching
|
altglobal alternative global matching
|
||||||
/g global global matching
|
/g global global matching
|
||||||
|
@ -936,6 +937,7 @@ PATTERN MODIFIERS
|
||||||
mark show mark values
|
mark show mark values
|
||||||
replace=<string> specify a replacement string
|
replace=<string> specify a replacement string
|
||||||
startchar show starting character when relevant
|
startchar show starting character when relevant
|
||||||
|
substitute_callout use substitution callouts
|
||||||
substitute_extended use PCRE2_SUBSTITUTE_EXTENDED
|
substitute_extended use PCRE2_SUBSTITUTE_EXTENDED
|
||||||
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||||
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||||
|
@ -1057,6 +1059,7 @@ SUBJECT MODIFIERS
|
||||||
aftertext show text after match
|
aftertext show text after match
|
||||||
allaftertext show text after captures
|
allaftertext show text after captures
|
||||||
allcaptures show all captures
|
allcaptures show all captures
|
||||||
|
allvector show the entire ovector
|
||||||
allusedtext show all consulted text (non-JIT only)
|
allusedtext show all consulted text (non-JIT only)
|
||||||
altglobal alternative global matching
|
altglobal alternative global matching
|
||||||
callout_capture show captures at callout time
|
callout_capture show captures at callout time
|
||||||
|
@ -1086,6 +1089,7 @@ SUBJECT MODIFIERS
|
||||||
replace=<string> specify a replacement string
|
replace=<string> specify a replacement string
|
||||||
startchar show startchar when relevant
|
startchar show startchar when relevant
|
||||||
startoffset=<n> same as offset=<n>
|
startoffset=<n> same as offset=<n>
|
||||||
|
substitute_callout use substitution callouts
|
||||||
substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED
|
substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED
|
||||||
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||||
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||||
|
@ -1150,76 +1154,95 @@ SUBJECT MODIFIERS
|
||||||
the highest one actually used in the match are output (corresponding to
|
the highest one actually used in the match are output (corresponding to
|
||||||
the return code from pcre2_match()). Groups that did not take part in
|
the return code from pcre2_match()). Groups that did not take part in
|
||||||
the match are output as "<unset>". This modifier is not relevant for
|
the match are output as "<unset>". This modifier is not relevant for
|
||||||
DFA matching (which does no capturing); it is ignored, with a warning
|
DFA matching (which does no capturing) and does not apply when replace
|
||||||
message, if present.
|
is specified; it is ignored, with a warning message, if present.
|
||||||
|
|
||||||
Testing callouts
|
Showing the entire ovector, for all outcomes
|
||||||
|
|
||||||
A callout function is supplied when pcre2test calls the library match-
|
The allvector modifier requests that the entire ovector be shown, what-
|
||||||
ing functions, unless callout_none is specified. Its behaviour can be
|
ever the outcome of the match. Compare allcaptures, which shows only up
|
||||||
controlled by various modifiers listed above whose names begin with
|
to the maximum number of capture groups for the pattern, and then only
|
||||||
callout_. Details are given in the section entitled "Callouts" below.
|
for a successful complete non-DFA match. This modifier, which acts
|
||||||
|
after any match result, and also for DFA matching, provides a means of
|
||||||
|
checking that there are no unexpected modifications to ovector fields.
|
||||||
|
Before each match attempt, the ovector is filled with a special value,
|
||||||
|
and if this is found in both elements of a capturing pair,
|
||||||
|
"<unchanged>" is output. After a successful match, this applies to all
|
||||||
|
groups after the maximum capture group for the pattern. In other cases
|
||||||
|
it applies to the entire ovector. After a partial match, the first two
|
||||||
|
elements are the only ones that should be set. After a DFA match, the
|
||||||
|
amount of ovector that is used depends on the number of matches that
|
||||||
|
were found.
|
||||||
|
|
||||||
|
Testing pattern callouts
|
||||||
|
|
||||||
|
A callout function is supplied when pcre2test calls the library match-
|
||||||
|
ing functions, unless callout_none is specified. Its behaviour can be
|
||||||
|
controlled by various modifiers listed above whose names begin with
|
||||||
|
callout_. Details are given in the section entitled "Callouts" below.
|
||||||
|
Testing callouts from pcre2_substitute() is decribed separately in
|
||||||
|
"Testing the substitution function" below.
|
||||||
|
|
||||||
Finding all matches in a string
|
Finding all matches in a string
|
||||||
|
|
||||||
Searching for all possible matches within a subject can be requested by
|
Searching for all possible matches within a subject can be requested by
|
||||||
the global or altglobal modifier. After finding a match, the matching
|
the global or altglobal modifier. After finding a match, the matching
|
||||||
function is called again to search the remainder of the subject. The
|
function is called again to search the remainder of the subject. The
|
||||||
difference between global and altglobal is that the former uses the
|
difference between global and altglobal is that the former uses the
|
||||||
start_offset argument to pcre2_match() or pcre2_dfa_match() to start
|
start_offset argument to pcre2_match() or pcre2_dfa_match() to start
|
||||||
searching at a new point within the entire string (which is what Perl
|
searching at a new point within the entire string (which is what Perl
|
||||||
does), whereas the latter passes over a shortened subject. This makes a
|
does), whereas the latter passes over a shortened subject. This makes a
|
||||||
difference to the matching process if the pattern begins with a lookbe-
|
difference to the matching process if the pattern begins with a lookbe-
|
||||||
hind assertion (including \b or \B).
|
hind assertion (including \b or \B).
|
||||||
|
|
||||||
If an empty string is matched, the next match is done with the
|
If an empty string is matched, the next match is done with the
|
||||||
PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set, in order to search
|
PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set, in order to search
|
||||||
for another, non-empty, match at the same point in the subject. If this
|
for another, non-empty, match at the same point in the subject. If this
|
||||||
match fails, the start offset is advanced, and the normal match is
|
match fails, the start offset is advanced, and the normal match is
|
||||||
retried. This imitates the way Perl handles such cases when using the
|
retried. This imitates the way Perl handles such cases when using the
|
||||||
/g modifier or the split() function. Normally, the start offset is
|
/g modifier or the split() function. Normally, the start offset is
|
||||||
advanced by one character, but if the newline convention recognizes
|
advanced by one character, but if the newline convention recognizes
|
||||||
CRLF as a newline, and the current character is CR followed by LF, an
|
CRLF as a newline, and the current character is CR followed by LF, an
|
||||||
advance of two characters occurs.
|
advance of two characters occurs.
|
||||||
|
|
||||||
Testing substring extraction functions
|
Testing substring extraction functions
|
||||||
|
|
||||||
The copy and get modifiers can be used to test the pcre2_sub-
|
The copy and get modifiers can be used to test the pcre2_sub-
|
||||||
string_copy_xxx() and pcre2_substring_get_xxx() functions. They can be
|
string_copy_xxx() and pcre2_substring_get_xxx() functions. They can be
|
||||||
given more than once, and each can specify a group name or number, for
|
given more than once, and each can specify a group name or number, for
|
||||||
example:
|
example:
|
||||||
|
|
||||||
abcd\=copy=1,copy=3,get=G1
|
abcd\=copy=1,copy=3,get=G1
|
||||||
|
|
||||||
If the #subject command is used to set default copy and/or get lists,
|
If the #subject command is used to set default copy and/or get lists,
|
||||||
these can be unset by specifying a negative number to cancel all num-
|
these can be unset by specifying a negative number to cancel all num-
|
||||||
bered groups and an empty name to cancel all named groups.
|
bered groups and an empty name to cancel all named groups.
|
||||||
|
|
||||||
The getall modifier tests pcre2_substring_list_get(), which extracts
|
The getall modifier tests pcre2_substring_list_get(), which extracts
|
||||||
all captured substrings.
|
all captured substrings.
|
||||||
|
|
||||||
If the subject line is successfully matched, the substrings extracted
|
If the subject line is successfully matched, the substrings extracted
|
||||||
by the convenience functions are output with C, G, or L after the
|
by the convenience functions are output with C, G, or L after the
|
||||||
string number instead of a colon. This is in addition to the normal
|
string number instead of a colon. This is in addition to the normal
|
||||||
full list. The string length (that is, the return from the extraction
|
full list. The string length (that is, the return from the extraction
|
||||||
function) is given in parentheses after each substring, followed by the
|
function) is given in parentheses after each substring, followed by the
|
||||||
name when the extraction was by name.
|
name when the extraction was by name.
|
||||||
|
|
||||||
Testing the substitution function
|
Testing the substitution function
|
||||||
|
|
||||||
If the replace modifier is set, the pcre2_substitute() function is
|
If the replace modifier is set, the pcre2_substitute() function is
|
||||||
called instead of one of the matching functions. Note that replacement
|
called instead of one of the matching functions. Note that replacement
|
||||||
strings cannot contain commas, because a comma signifies the end of a
|
strings cannot contain commas, because a comma signifies the end of a
|
||||||
modifier. This is not thought to be an issue in a test program.
|
modifier. This is not thought to be an issue in a test program.
|
||||||
|
|
||||||
Unlike subject strings, pcre2test does not process replacement strings
|
Unlike subject strings, pcre2test does not process replacement strings
|
||||||
for escape sequences. In UTF mode, a replacement string is checked to
|
for escape sequences. In UTF mode, a replacement string is checked to
|
||||||
see if it is a valid UTF-8 string. If so, it is correctly converted to
|
see if it is a valid UTF-8 string. If so, it is correctly converted to
|
||||||
a UTF string of the appropriate code unit width. If it is not a valid
|
a UTF string of the appropriate code unit width. If it is not a valid
|
||||||
UTF-8 string, the individual code units are copied directly. This pro-
|
UTF-8 string, the individual code units are copied directly. This pro-
|
||||||
vides a means of passing an invalid UTF-8 string for testing purposes.
|
vides a means of passing an invalid UTF-8 string for testing purposes.
|
||||||
|
|
||||||
The following modifiers set options (in additional to the normal match
|
The following modifiers set options (in additional to the normal match
|
||||||
options) for pcre2_substitute():
|
options) for pcre2_substitute():
|
||||||
|
|
||||||
global PCRE2_SUBSTITUTE_GLOBAL
|
global PCRE2_SUBSTITUTE_GLOBAL
|
||||||
|
@ -1229,8 +1252,8 @@ SUBJECT MODIFIERS
|
||||||
substitute_unset_empty PCRE2_SUBSTITUTE_UNSET_EMPTY
|
substitute_unset_empty PCRE2_SUBSTITUTE_UNSET_EMPTY
|
||||||
|
|
||||||
|
|
||||||
After a successful substitution, the modified string is output, pre-
|
After a successful substitution, the modified string is output, pre-
|
||||||
ceded by the number of replacements. This may be zero if there were no
|
ceded by the number of replacements. This may be zero if there were no
|
||||||
matches. Here is a simple example of a substitution test:
|
matches. Here is a simple example of a substitution test:
|
||||||
|
|
||||||
/abc/replace=xxx
|
/abc/replace=xxx
|
||||||
|
@ -1239,12 +1262,22 @@ SUBJECT MODIFIERS
|
||||||
=abc=abc=\=global
|
=abc=abc=\=global
|
||||||
2: =xxx=xxx=
|
2: =xxx=xxx=
|
||||||
|
|
||||||
Subject and replacement strings should be kept relatively short (fewer
|
If the substitute_callout modifier is set, a substitution callout func-
|
||||||
than 256 characters) for substitution tests, as fixed-size buffers are
|
tion is set up. When it is called (after each substitution), the off-
|
||||||
used. To make it easy to test for buffer overflow, if the replacement
|
sets in the input and output strings are output. For example:
|
||||||
string starts with a number in square brackets, that number is passed
|
|
||||||
to pcre2_substitute() as the size of the output buffer, with the
|
/abc/g,replace=<$0>,substitute_callout
|
||||||
replacement string starting at the next character. Here is an example
|
abcdefabcpqr
|
||||||
|
Old 0 3 New 0 5
|
||||||
|
Old 6 9 New 8 13
|
||||||
|
2: <abc>def<abc>pqr
|
||||||
|
|
||||||
|
Subject and replacement strings should be kept relatively short (fewer
|
||||||
|
than 256 characters) for substitution tests, as fixed-size buffers are
|
||||||
|
used. To make it easy to test for buffer overflow, if the replacement
|
||||||
|
string starts with a number in square brackets, that number is passed
|
||||||
|
to pcre2_substitute() as the size of the output buffer, with the
|
||||||
|
replacement string starting at the next character. Here is an example
|
||||||
that tests the edge case:
|
that tests the edge case:
|
||||||
|
|
||||||
/abc/
|
/abc/
|
||||||
|
@ -1253,14 +1286,15 @@ SUBJECT MODIFIERS
|
||||||
123abc123\=replace=[9]XYZ
|
123abc123\=replace=[9]XYZ
|
||||||
Failed: error -47: no more memory
|
Failed: error -47: no more memory
|
||||||
|
|
||||||
The default action of pcre2_substitute() is to return
|
The default action of pcre2_substitute() is to return
|
||||||
PCRE2_ERROR_NOMEMORY when the output buffer is too small. However, if
|
PCRE2_ERROR_NOMEMORY when the output buffer is too small. However, if
|
||||||
the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set (by using the sub-
|
the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set (by using the sub-
|
||||||
stitute_overflow_length modifier), pcre2_substitute() continues to go
|
stitute_overflow_length modifier), pcre2_substitute() continues to go
|
||||||
through the motions of matching and substituting, in order to compute
|
through the motions of matching and substituting (but not doing any
|
||||||
the size of buffer that is required. When this happens, pcre2test shows
|
callouts), in order to compute the size of buffer that is required.
|
||||||
the required buffer length (which includes space for the trailing zero)
|
When this happens, pcre2test shows the required buffer length (which
|
||||||
as part of the error message. For example:
|
includes space for the trailing zero) as part of the error message. For
|
||||||
|
example:
|
||||||
|
|
||||||
/abc/substitute_overflow_length
|
/abc/substitute_overflow_length
|
||||||
123abc123\=replace=[9]XYZ
|
123abc123\=replace=[9]XYZ
|
||||||
|
@ -1818,5 +1852,5 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 21 July 2018
|
Last updated: 17 September 2018
|
||||||
Copyright (c) 1997-2018 University of Cambridge.
|
Copyright (c) 1997-2018 University of Cambridge.
|
||||||
|
|
|
@ -505,10 +505,10 @@ typedef struct pcre2_real_jit_stack pcre2_jit_stack; \
|
||||||
typedef pcre2_jit_stack *(*pcre2_jit_callback)(void *);
|
typedef pcre2_jit_stack *(*pcre2_jit_callback)(void *);
|
||||||
|
|
||||||
|
|
||||||
/* The structure for passing out data via the pcre_callout_function. We use a
|
/* The structures for passing out data via callout functions. We use structures
|
||||||
structure so that new fields can be added on the end in future versions,
|
so that new fields can be added on the end in future versions, without changing
|
||||||
without changing the API of the function, thereby allowing old clients to work
|
the API of the function, thereby allowing old clients to work without
|
||||||
without modification. Define the generic version in a macro; the width-specific
|
modification. Define the generic versions in a macro; the width-specific
|
||||||
versions are generated from this macro below. */
|
versions are generated from this macro below. */
|
||||||
|
|
||||||
/* Flags for the callout_flags field. These are cleared after a callout. */
|
/* Flags for the callout_flags field. These are cleared after a callout. */
|
||||||
|
@ -550,7 +550,15 @@ typedef struct pcre2_callout_enumerate_block { \
|
||||||
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
|
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
|
||||||
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
|
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
|
||||||
/* ------------------------------------------------------------------ */ \
|
/* ------------------------------------------------------------------ */ \
|
||||||
} pcre2_callout_enumerate_block;
|
} pcre2_callout_enumerate_block; \
|
||||||
|
\
|
||||||
|
typedef struct pcre2_substitute_callout_block { \
|
||||||
|
uint32_t version; /* Identifies version of block */ \
|
||||||
|
/* ------------------------ Version 0 ------------------------------- */ \
|
||||||
|
PCRE2_SIZE input_offsets[2]; /* Matched portion of the input */ \
|
||||||
|
PCRE2_SIZE output_offsets[2]; /* Changed portion of the output */ \
|
||||||
|
/* ------------------------------------------------------------------ */ \
|
||||||
|
} pcre2_substitute_callout_block;
|
||||||
|
|
||||||
|
|
||||||
/* List the generic forms of all other functions in macros, which will be
|
/* List the generic forms of all other functions in macros, which will be
|
||||||
|
@ -605,6 +613,9 @@ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
pcre2_set_callout(pcre2_match_context *, \
|
pcre2_set_callout(pcre2_match_context *, \
|
||||||
int (*)(pcre2_callout_block *, void *), void *); \
|
int (*)(pcre2_callout_block *, void *), void *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_substitute_callout(pcre2_match_context *, \
|
||||||
|
void (*)(pcre2_substitute_callout_block *, void *), void *); \
|
||||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \
|
pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \
|
||||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
@ -808,6 +819,7 @@ pcre2_compile are called by application code. */
|
||||||
|
|
||||||
#define pcre2_callout_block PCRE2_SUFFIX(pcre2_callout_block_)
|
#define pcre2_callout_block PCRE2_SUFFIX(pcre2_callout_block_)
|
||||||
#define pcre2_callout_enumerate_block PCRE2_SUFFIX(pcre2_callout_enumerate_block_)
|
#define pcre2_callout_enumerate_block PCRE2_SUFFIX(pcre2_callout_enumerate_block_)
|
||||||
|
#define pcre2_substitute_callout_block PCRE2_SUFFIX(pcre2_substitute_callout_block_)
|
||||||
#define pcre2_general_context PCRE2_SUFFIX(pcre2_general_context_)
|
#define pcre2_general_context PCRE2_SUFFIX(pcre2_general_context_)
|
||||||
#define pcre2_compile_context PCRE2_SUFFIX(pcre2_compile_context_)
|
#define pcre2_compile_context PCRE2_SUFFIX(pcre2_compile_context_)
|
||||||
#define pcre2_convert_context PCRE2_SUFFIX(pcre2_convert_context_)
|
#define pcre2_convert_context PCRE2_SUFFIX(pcre2_convert_context_)
|
||||||
|
@ -873,6 +885,7 @@ pcre2_compile are called by application code. */
|
||||||
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
|
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
|
||||||
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
|
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
|
||||||
#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_)
|
#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_)
|
||||||
|
#define pcre2_set_substitute_callout PCRE2_SUFFIX(pcre2_set_substitute_callout_)
|
||||||
#define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_)
|
#define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_)
|
||||||
#define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_)
|
#define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_)
|
||||||
#define pcre2_substring_copy_bynumber PCRE2_SUFFIX(pcre2_substring_copy_bynumber_)
|
#define pcre2_substring_copy_bynumber PCRE2_SUFFIX(pcre2_substring_copy_bynumber_)
|
||||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||||
New API code Copyright (c) 2016-2017 University of Cambridge
|
New API code Copyright (c) 2016-2018 University of Cambridge
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -163,11 +163,13 @@ when no context is supplied to a match function. */
|
||||||
const pcre2_match_context PRIV(default_match_context) = {
|
const pcre2_match_context PRIV(default_match_context) = {
|
||||||
{ default_malloc, default_free, NULL },
|
{ default_malloc, default_free, NULL },
|
||||||
#ifdef SUPPORT_JIT
|
#ifdef SUPPORT_JIT
|
||||||
NULL,
|
NULL, /* JIT callback */
|
||||||
NULL,
|
NULL, /* JIT callback data */
|
||||||
#endif
|
#endif
|
||||||
NULL,
|
NULL, /* Callout function */
|
||||||
NULL,
|
NULL, /* Callout data */
|
||||||
|
NULL, /* Substitute callout function */
|
||||||
|
NULL, /* Substitute callout data */
|
||||||
PCRE2_UNSET, /* Offset limit */
|
PCRE2_UNSET, /* Offset limit */
|
||||||
HEAP_LIMIT,
|
HEAP_LIMIT,
|
||||||
MATCH_LIMIT,
|
MATCH_LIMIT,
|
||||||
|
@ -403,6 +405,16 @@ mcontext->callout_data = callout_data;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_set_substitute_callout(pcre2_match_context *mcontext,
|
||||||
|
void (*substitute_callout)(pcre2_substitute_callout_block *, void *),
|
||||||
|
void *substitute_callout_data)
|
||||||
|
{
|
||||||
|
mcontext->substitute_callout = substitute_callout;
|
||||||
|
mcontext->substitute_callout_data = substitute_callout_data;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
pcre2_set_heap_limit(pcre2_match_context *mcontext, uint32_t limit)
|
pcre2_set_heap_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||||
{
|
{
|
||||||
|
|
|
@ -585,6 +585,8 @@ typedef struct pcre2_real_match_context {
|
||||||
#endif
|
#endif
|
||||||
int (*callout)(pcre2_callout_block *, void *);
|
int (*callout)(pcre2_callout_block *, void *);
|
||||||
void *callout_data;
|
void *callout_data;
|
||||||
|
void (*substitute_callout)(pcre2_substitute_callout_block *, void *);
|
||||||
|
void *substitute_callout_data;
|
||||||
PCRE2_SIZE offset_limit;
|
PCRE2_SIZE offset_limit;
|
||||||
uint32_t heap_limit;
|
uint32_t heap_limit;
|
||||||
uint32_t match_limit;
|
uint32_t match_limit;
|
||||||
|
|
|
@ -239,7 +239,9 @@ PCRE2_SIZE extra_needed = 0;
|
||||||
PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength;
|
PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength;
|
||||||
PCRE2_SIZE *ovector;
|
PCRE2_SIZE *ovector;
|
||||||
PCRE2_SIZE ovecsave[3];
|
PCRE2_SIZE ovecsave[3];
|
||||||
|
pcre2_substitute_callout_block scb;
|
||||||
|
|
||||||
|
scb.version = 0;
|
||||||
buff_offset = 0;
|
buff_offset = 0;
|
||||||
lengthleft = buff_length = *blength;
|
lengthleft = buff_length = *blength;
|
||||||
*blength = PCRE2_UNSET;
|
*blength = PCRE2_UNSET;
|
||||||
|
@ -390,7 +392,12 @@ do
|
||||||
rc = PCRE2_ERROR_INTERNAL_DUPMATCH;
|
rc = PCRE2_ERROR_INTERNAL_DUPMATCH;
|
||||||
goto EXIT;
|
goto EXIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Save the match point for a possible callout */
|
||||||
|
|
||||||
|
scb.input_offsets[0] = ovector[0];
|
||||||
|
scb.input_offsets[1] = ovector[1];
|
||||||
|
|
||||||
/* Count substitutions with a paranoid check for integer overflow; surely no
|
/* Count substitutions with a paranoid check for integer overflow; surely no
|
||||||
real call to this function would ever hit this! */
|
real call to this function would ever hit this! */
|
||||||
|
|
||||||
|
@ -401,11 +408,13 @@ do
|
||||||
}
|
}
|
||||||
subs++;
|
subs++;
|
||||||
|
|
||||||
/* Copy the text leading up to the match. */
|
/* Copy the text leading up to the match, and remember where the insert
|
||||||
|
begins. */
|
||||||
|
|
||||||
if (rc == 0) rc = ovector_count;
|
if (rc == 0) rc = ovector_count;
|
||||||
fraglength = ovector[0] - start_offset;
|
fraglength = ovector[0] - start_offset;
|
||||||
CHECKMEMCPY(subject + start_offset, fraglength);
|
CHECKMEMCPY(subject + start_offset, fraglength);
|
||||||
|
scb.output_offsets[0] = buff_offset;
|
||||||
|
|
||||||
/* Process the replacement string. Literal mode is set by \Q, but only in
|
/* Process the replacement string. Literal mode is set by \Q, but only in
|
||||||
extended mode when backslashes are being interpreted. In extended mode we
|
extended mode when backslashes are being interpreted. In extended mode we
|
||||||
|
@ -821,10 +830,19 @@ do
|
||||||
} /* End handling a literal code unit */
|
} /* End handling a literal code unit */
|
||||||
} /* End of loop for scanning the replacement. */
|
} /* End of loop for scanning the replacement. */
|
||||||
|
|
||||||
/* The replacement has been copied to the output. Save the details of this
|
/* The replacement has been copied to the output, or its size has been
|
||||||
match. See above for how this data is used. If we matched an empty string, do
|
remembered. Do the callout if there is one and we have done an actual
|
||||||
the magic for global matches. Finally, update the start offset to point to
|
replacement. */
|
||||||
the rest of the subject string. */
|
|
||||||
|
if (!overflowed && mcontext->substitute_callout != NULL)
|
||||||
|
{
|
||||||
|
scb.output_offsets[1] = buff_offset;
|
||||||
|
mcontext->substitute_callout(&scb, mcontext->substitute_callout_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Save the details of this match. See above for how this data is used. If we
|
||||||
|
matched an empty string, do the magic for global matches. Finally, update the
|
||||||
|
start offset to point to the rest of the subject string. */
|
||||||
|
|
||||||
ovecsave[0] = ovector[0];
|
ovecsave[0] = ovector[0];
|
||||||
ovecsave[1] = ovector[1];
|
ovecsave[1] = ovector[1];
|
||||||
|
|
104
src/pcre2test.c
104
src/pcre2test.c
|
@ -484,14 +484,15 @@ so many of them that they are split into two fields. */
|
||||||
|
|
||||||
/* Second control word */
|
/* Second control word */
|
||||||
|
|
||||||
#define CTL2_SUBSTITUTE_EXTENDED 0x00000001u
|
#define CTL2_SUBSTITUTE_CALLOUT 0x00000001u
|
||||||
#define CTL2_SUBSTITUTE_OVERFLOW_LENGTH 0x00000002u
|
#define CTL2_SUBSTITUTE_EXTENDED 0x00000002u
|
||||||
#define CTL2_SUBSTITUTE_UNKNOWN_UNSET 0x00000004u
|
#define CTL2_SUBSTITUTE_OVERFLOW_LENGTH 0x00000004u
|
||||||
#define CTL2_SUBSTITUTE_UNSET_EMPTY 0x00000008u
|
#define CTL2_SUBSTITUTE_UNKNOWN_UNSET 0x00000008u
|
||||||
#define CTL2_SUBJECT_LITERAL 0x00000010u
|
#define CTL2_SUBSTITUTE_UNSET_EMPTY 0x00000010u
|
||||||
#define CTL2_CALLOUT_NO_WHERE 0x00000020u
|
#define CTL2_SUBJECT_LITERAL 0x00000020u
|
||||||
#define CTL2_CALLOUT_EXTRA 0x00000040u
|
#define CTL2_CALLOUT_NO_WHERE 0x00000040u
|
||||||
#define CTL2_ALLVECTOR 0x00000080u
|
#define CTL2_CALLOUT_EXTRA 0x00000080u
|
||||||
|
#define CTL2_ALLVECTOR 0x00000100u
|
||||||
|
|
||||||
#define CTL2_NL_SET 0x40000000u /* Informational */
|
#define CTL2_NL_SET 0x40000000u /* Informational */
|
||||||
#define CTL2_BSR_SET 0x80000000u /* Informational */
|
#define CTL2_BSR_SET 0x80000000u /* Informational */
|
||||||
|
@ -511,7 +512,8 @@ different things in the two cases. */
|
||||||
CTL_STARTCHAR|\
|
CTL_STARTCHAR|\
|
||||||
CTL_UTF8_INPUT)
|
CTL_UTF8_INPUT)
|
||||||
|
|
||||||
#define CTL2_ALLPD (CTL2_SUBSTITUTE_EXTENDED|\
|
#define CTL2_ALLPD (CTL2_SUBSTITUTE_CALLOUT|\
|
||||||
|
CTL2_SUBSTITUTE_EXTENDED|\
|
||||||
CTL2_SUBSTITUTE_OVERFLOW_LENGTH|\
|
CTL2_SUBSTITUTE_OVERFLOW_LENGTH|\
|
||||||
CTL2_SUBSTITUTE_UNKNOWN_UNSET|\
|
CTL2_SUBSTITUTE_UNKNOWN_UNSET|\
|
||||||
CTL2_SUBSTITUTE_UNSET_EMPTY|\
|
CTL2_SUBSTITUTE_UNSET_EMPTY|\
|
||||||
|
@ -690,6 +692,7 @@ static modstruct modlist[] = {
|
||||||
{ "startchar", MOD_PND, MOD_CTL, CTL_STARTCHAR, PO(control) },
|
{ "startchar", MOD_PND, MOD_CTL, CTL_STARTCHAR, PO(control) },
|
||||||
{ "startoffset", MOD_DAT, MOD_INT, 0, DO(offset) },
|
{ "startoffset", MOD_DAT, MOD_INT, 0, DO(offset) },
|
||||||
{ "subject_literal", MOD_PATP, MOD_CTL, CTL2_SUBJECT_LITERAL, PO(control2) },
|
{ "subject_literal", MOD_PATP, MOD_CTL, CTL2_SUBJECT_LITERAL, PO(control2) },
|
||||||
|
{ "substitute_callout", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_CALLOUT, PO(control2) },
|
||||||
{ "substitute_extended", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_EXTENDED, PO(control2) },
|
{ "substitute_extended", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_EXTENDED, PO(control2) },
|
||||||
{ "substitute_overflow_length", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_OVERFLOW_LENGTH, PO(control2) },
|
{ "substitute_overflow_length", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_OVERFLOW_LENGTH, PO(control2) },
|
||||||
{ "substitute_unknown_unset", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNKNOWN_UNSET, PO(control2) },
|
{ "substitute_unknown_unset", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNKNOWN_UNSET, PO(control2) },
|
||||||
|
@ -1355,6 +1358,17 @@ are supported. */
|
||||||
else \
|
else \
|
||||||
pcre2_set_parens_nest_limit_32(G(a,32),b)
|
pcre2_set_parens_nest_limit_32(G(a,32),b)
|
||||||
|
|
||||||
|
#define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
|
||||||
|
if (test_mode == PCRE8_MODE) \
|
||||||
|
pcre2_set_substitute_callout_8(G(a,8), \
|
||||||
|
(void (*)(pcre2_substitute_callout_block_8 *, void *))b,c); \
|
||||||
|
else if (test_mode == PCRE16_MODE) \
|
||||||
|
pcre2_set_substitute_callout_16(G(a,16), \
|
||||||
|
(void (*)(pcre2_substitute_callout_block_16 *, void *))b,c); \
|
||||||
|
else \
|
||||||
|
pcre2_set_substitute_callout_32(G(a,32), \
|
||||||
|
(void (*)(pcre2_substitute_callout_block_32 *, void *))b,c)
|
||||||
|
|
||||||
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
|
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
|
||||||
if (test_mode == PCRE8_MODE) \
|
if (test_mode == PCRE8_MODE) \
|
||||||
a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
|
a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
|
||||||
|
@ -1824,6 +1838,14 @@ the three different cases. */
|
||||||
else \
|
else \
|
||||||
G(pcre2_set_parens_nest_limit_,BITTWO)(G(a,BITTWO),b)
|
G(pcre2_set_parens_nest_limit_,BITTWO)(G(a,BITTWO),b)
|
||||||
|
|
||||||
|
#define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
|
||||||
|
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
||||||
|
G(pcre2_set_substitute_callout_,BITONE)(G(a,BITONE), \
|
||||||
|
(void (*)(G(pcre2_substitute_callout_block_,BITONE) *, void *))b,c); \
|
||||||
|
else \
|
||||||
|
G(pcre2_set_substitute_callout_,BITTWO)(G(a,BITTWO), \
|
||||||
|
(void (*)(G(pcre2_substitute_callout_block_,BITTWO) *, void *))b,c)
|
||||||
|
|
||||||
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
|
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
|
||||||
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
||||||
a = G(pcre2_substitute_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
|
a = G(pcre2_substitute_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
|
||||||
|
@ -2025,6 +2047,9 @@ the three different cases. */
|
||||||
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b)
|
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b)
|
||||||
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b)
|
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b)
|
||||||
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b)
|
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b)
|
||||||
|
#define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
|
||||||
|
pcre2_set_substitute_callout_8(G(a,8), \
|
||||||
|
(void (*)(pcre2_substitute_callout_block_8 *, void *))b,c)
|
||||||
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
|
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
|
||||||
a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
|
a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
|
||||||
(PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l)
|
(PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l)
|
||||||
|
@ -2129,6 +2154,9 @@ the three different cases. */
|
||||||
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_16(G(a,16),b)
|
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_16(G(a,16),b)
|
||||||
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b)
|
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b)
|
||||||
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b)
|
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b)
|
||||||
|
#define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
|
||||||
|
pcre2_set_substitute_callout_16(G(a,16), \
|
||||||
|
(void (*)(pcre2_substitute_callout_block_16 *, void *))b,c)
|
||||||
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
|
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
|
||||||
a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \
|
a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \
|
||||||
(PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l)
|
(PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l)
|
||||||
|
@ -2221,7 +2249,7 @@ the three different cases. */
|
||||||
#define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
|
#define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
|
||||||
r = pcre2_serialize_get_number_of_codes_32(a)
|
r = pcre2_serialize_get_number_of_codes_32(a)
|
||||||
#define PCRE2_SET_CALLOUT(a,b,c) \
|
#define PCRE2_SET_CALLOUT(a,b,c) \
|
||||||
pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c);
|
pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c)
|
||||||
#define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b)
|
#define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b)
|
||||||
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
|
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
|
||||||
pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
|
pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
|
||||||
|
@ -2233,6 +2261,9 @@ the three different cases. */
|
||||||
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_32(G(a,32),b)
|
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_32(G(a,32),b)
|
||||||
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b)
|
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b)
|
||||||
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b)
|
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b)
|
||||||
|
#define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
|
||||||
|
pcre2_set_substitute_callout_32(G(a,32), \
|
||||||
|
(void (*)(pcre2_substitute_callout_block_32 *, void *))b,c)
|
||||||
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
|
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
|
||||||
a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \
|
a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \
|
||||||
(PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
|
(PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
|
||||||
|
@ -4022,7 +4053,7 @@ Returns: nothing
|
||||||
static void
|
static void
|
||||||
show_controls(uint32_t controls, uint32_t controls2, const char *before)
|
show_controls(uint32_t controls, uint32_t controls2, const char *before)
|
||||||
{
|
{
|
||||||
fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||||
before,
|
before,
|
||||||
((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
|
((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
|
||||||
((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
|
((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
|
||||||
|
@ -4058,6 +4089,7 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s
|
||||||
((controls & CTL_PUSHCOPY) != 0)? " pushcopy" : "",
|
((controls & CTL_PUSHCOPY) != 0)? " pushcopy" : "",
|
||||||
((controls & CTL_PUSHTABLESCOPY) != 0)? " pushtablescopy" : "",
|
((controls & CTL_PUSHTABLESCOPY) != 0)? " pushtablescopy" : "",
|
||||||
((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
|
((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
|
||||||
|
((controls2 & CTL2_SUBSTITUTE_CALLOUT) != 0)? " substitute_callout" : "",
|
||||||
((controls2 & CTL2_SUBSTITUTE_EXTENDED) != 0)? " substitute_extended" : "",
|
((controls2 & CTL2_SUBSTITUTE_EXTENDED) != 0)? " substitute_extended" : "",
|
||||||
((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "",
|
((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "",
|
||||||
((controls2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) != 0)? " substitute_unknown_unset" : "",
|
((controls2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) != 0)? " substitute_unknown_unset" : "",
|
||||||
|
@ -5896,6 +5928,35 @@ return capcount;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Substitute callout function *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* Called from pcre2_substitute() when the substitute_callout modifier is set.
|
||||||
|
Print out the data that is passed back. The substitute callout block is
|
||||||
|
identical for all code unit widths, so we just pick one.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
scb pointer to substitute callout block
|
||||||
|
data_ptr callout data
|
||||||
|
|
||||||
|
Returns: nothing
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void
|
||||||
|
substitute_callout_function(pcre2_substitute_callout_block_8 *scb,
|
||||||
|
void *data_ptr)
|
||||||
|
{
|
||||||
|
(void)data_ptr; /* Not used */
|
||||||
|
fprintf(outfile, "Old %" SIZ_FORM " %" SIZ_FORM " New %" SIZ_FORM
|
||||||
|
" %" SIZ_FORM "\n",
|
||||||
|
SIZ_CAST scb->input_offsets[0],
|
||||||
|
SIZ_CAST scb->input_offsets[1],
|
||||||
|
SIZ_CAST scb->output_offsets[0],
|
||||||
|
SIZ_CAST scb->output_offsets[1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Callout function *
|
* Callout function *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
@ -5907,8 +5968,11 @@ callout block for different code unit widths are that the pointers to the
|
||||||
subject, the most recent MARK, and a callout argument string point to strings
|
subject, the most recent MARK, and a callout argument string point to strings
|
||||||
of the appropriate width. Casts can be used to deal with this.
|
of the appropriate width. Casts can be used to deal with this.
|
||||||
|
|
||||||
Argument: a pointer to a callout block
|
Arguments:
|
||||||
Return:
|
cb a pointer to a callout block
|
||||||
|
callout_data_ptr the provided callout data
|
||||||
|
|
||||||
|
Returns: 0 or 1 or an error, as determined by settings
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
@ -6779,8 +6843,8 @@ if (pat_patctl.replacement[0] != 0)
|
||||||
return PR_OK;
|
return PR_OK;
|
||||||
}
|
}
|
||||||
if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
|
if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
|
||||||
fprintf(outfile, "** Ignored with replacement text: allcaptures\n");
|
fprintf(outfile, "** Ignored with replacement text: allcaptures\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Warn for modifiers that are ignored for DFA. */
|
/* Warn for modifiers that are ignored for DFA. */
|
||||||
|
|
||||||
|
@ -7158,6 +7222,16 @@ if (dat_datctl.replacement[0] != 0)
|
||||||
rlen = PCRE2_ZERO_TERMINATED;
|
rlen = PCRE2_ZERO_TERMINATED;
|
||||||
else
|
else
|
||||||
rlen = (CASTVAR(uint8_t *, r) - rbuffer)/code_unit_size;
|
rlen = (CASTVAR(uint8_t *, r) - rbuffer)/code_unit_size;
|
||||||
|
|
||||||
|
if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CALLOUT) != 0)
|
||||||
|
{
|
||||||
|
PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, substitute_callout_function, NULL);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, NULL, NULL); /* No callout */
|
||||||
|
}
|
||||||
|
|
||||||
PCRE2_SUBSTITUTE(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
|
PCRE2_SUBSTITUTE(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
|
||||||
dat_datctl.options|xoptions, match_data, dat_context,
|
dat_datctl.options|xoptions, match_data, dat_context,
|
||||||
rbuffer, rlen, nbuffer, &nsize);
|
rbuffer, rlen, nbuffer, &nsize);
|
||||||
|
|
|
@ -475,5 +475,10 @@
|
||||||
\x{100}
|
\x{100}
|
||||||
\= Expect no match
|
\= Expect no match
|
||||||
aaa
|
aaa
|
||||||
|
|
||||||
|
# Offsets are different in 8-bit mode.
|
||||||
|
|
||||||
|
/(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout
|
||||||
|
123abcáyzabcdef789abcሴqr
|
||||||
|
|
||||||
# End of testinput10
|
# End of testinput10
|
||||||
|
|
|
@ -381,5 +381,10 @@
|
||||||
\x{100}
|
\x{100}
|
||||||
\= Expect no match
|
\= Expect no match
|
||||||
aaa
|
aaa
|
||||||
|
|
||||||
|
# Offsets are different in 8-bit mode.
|
||||||
|
|
||||||
|
/(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout
|
||||||
|
123abcáyzabcdef789abcሴqr
|
||||||
|
|
||||||
# End of testinput12
|
# End of testinput12
|
||||||
|
|
|
@ -5514,4 +5514,7 @@ a)"xI
|
||||||
abcdef\=ovector=4
|
abcdef\=ovector=4
|
||||||
abxyz\=ovector=4
|
abxyz\=ovector=4
|
||||||
|
|
||||||
|
/a(b)c|xyz/g,replace=<$0>,substitute_callout
|
||||||
|
abcdefabcpqr
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
|
@ -1625,5 +1625,15 @@ Subject length lower bound = 1
|
||||||
\= Expect no match
|
\= Expect no match
|
||||||
aaa
|
aaa
|
||||||
No match
|
No match
|
||||||
|
|
||||||
|
# Offsets are different in 8-bit mode.
|
||||||
|
|
||||||
|
/(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout
|
||||||
|
123abcáyzabcdef789abcሴqr
|
||||||
|
Old 6 6 New 6 8
|
||||||
|
Old 13 13 New 15 17
|
||||||
|
Old 13 16 New 17 22
|
||||||
|
Old 22 22 New 28 30
|
||||||
|
4: 123abc<>\x{e1}yzabc<><def>789abc<>\x{1234}qr
|
||||||
|
|
||||||
# End of testinput10
|
# End of testinput10
|
||||||
|
|
|
@ -1470,5 +1470,15 @@ Subject length lower bound = 1
|
||||||
\= Expect no match
|
\= Expect no match
|
||||||
aaa
|
aaa
|
||||||
No match
|
No match
|
||||||
|
|
||||||
|
# Offsets are different in 8-bit mode.
|
||||||
|
|
||||||
|
/(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout
|
||||||
|
123abcáyzabcdef789abcሴqr
|
||||||
|
Old 6 6 New 6 8
|
||||||
|
Old 12 12 New 14 16
|
||||||
|
Old 12 15 New 16 21
|
||||||
|
Old 21 21 New 27 29
|
||||||
|
4: 123abc<>\x{e1}yzabc<><def>789abc<>\x{1234}qr
|
||||||
|
|
||||||
# End of testinput12
|
# End of testinput12
|
||||||
|
|
|
@ -1467,5 +1467,15 @@ Subject length lower bound = 1
|
||||||
\= Expect no match
|
\= Expect no match
|
||||||
aaa
|
aaa
|
||||||
No match
|
No match
|
||||||
|
|
||||||
|
# Offsets are different in 8-bit mode.
|
||||||
|
|
||||||
|
/(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout
|
||||||
|
123abcáyzabcdef789abcሴqr
|
||||||
|
Old 6 6 New 6 8
|
||||||
|
Old 12 12 New 14 16
|
||||||
|
Old 12 15 New 16 21
|
||||||
|
Old 21 21 New 27 29
|
||||||
|
4: 123abc<>\x{e1}yzabc<><def>789abc<>\x{1234}qr
|
||||||
|
|
||||||
# End of testinput12
|
# End of testinput12
|
||||||
|
|
|
@ -16795,6 +16795,12 @@ Subject length lower bound = 1
|
||||||
2: <unchanged>
|
2: <unchanged>
|
||||||
3: <unchanged>
|
3: <unchanged>
|
||||||
|
|
||||||
|
/a(b)c|xyz/g,replace=<$0>,substitute_callout
|
||||||
|
abcdefabcpqr
|
||||||
|
Old 0 3 New 0 5
|
||||||
|
Old 6 9 New 8 13
|
||||||
|
2: <abc>def<abc>pqr
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
|
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
|
||||||
Error -62: bad serialized data
|
Error -62: bad serialized data
|
||||||
|
|
Loading…
Reference in New Issue