Implement callouts from pcre2_substitute().
This commit is contained in:
parent
80adf9d165
commit
a69267246f
|
@ -12,6 +12,8 @@ partial matches.
|
|||
2. Fix subject buffer overread in JIT when UTF is disabled and \X or \R has
|
||||
a greater than 1 fixed quantifier. This issue was found by Yunho Kim.
|
||||
|
||||
3. Added support for callouts from pcre2_substitute().
|
||||
|
||||
|
||||
Version 10.32 10-September-2018
|
||||
-------------------------------
|
||||
|
|
|
@ -85,6 +85,7 @@ dist_html_DATA = \
|
|||
doc/html/pcre2_set_parens_nest_limit.html \
|
||||
doc/html/pcre2_set_recursion_limit.html \
|
||||
doc/html/pcre2_set_recursion_memory_management.html \
|
||||
doc/html/pcre2_set_substitute_callout.html \
|
||||
doc/html/pcre2_substitute.html \
|
||||
doc/html/pcre2_substring_copy_byname.html \
|
||||
doc/html/pcre2_substring_copy_bynumber.html \
|
||||
|
@ -178,6 +179,7 @@ dist_man_MANS = \
|
|||
doc/pcre2_set_parens_nest_limit.3 \
|
||||
doc/pcre2_set_recursion_limit.3 \
|
||||
doc/pcre2_set_recursion_memory_management.3 \
|
||||
doc/pcre2_set_substitute_callout.3 \
|
||||
doc/pcre2_substitute.3 \
|
||||
doc/pcre2_substring_copy_byname.3 \
|
||||
doc/pcre2_substring_copy_bynumber.3 \
|
||||
|
|
|
@ -162,7 +162,7 @@ listing), and the short pages for individual functions, are concatenated in
|
|||
pcre2-config show PCRE2 installation configuration information
|
||||
pcre2api details of PCRE2's native C API
|
||||
pcre2build building PCRE2
|
||||
pcre2callout details of the callout feature
|
||||
pcre2callout details of the pattern callout feature
|
||||
pcre2compat discussion of Perl compatibility
|
||||
pcre2convert details of pattern conversion functions
|
||||
pcre2demo a demonstration C program that uses PCRE2
|
||||
|
@ -198,7 +198,7 @@ use my two initials, followed by the two digits 10, at the domain cam.ac.uk.
|
|||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 11 July 2018
|
||||
Last updated: 17 September 2018
|
||||
<br>
|
||||
Copyright © 1997-2018 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre2_set_substitute_callout specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_set_substitute_callout man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_set_substitute_callout(pcre2_match_context *<i>mcontext</i>,</b>
|
||||
<b> void (*<i>callout_function</i>)(pcre2_substitute_callout_block *),</b>
|
||||
<b> void *<i>callout_data</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function sets the substitute callout fields in a match context (the first
|
||||
argument). The second argument specifies a callout function, and the third
|
||||
argument is an opaque data item that is passed to it. The result of this
|
||||
function is always zero.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
|
@ -182,6 +182,11 @@ document for an overview of all the PCRE2 documentation.
|
|||
<b> void *<i>callout_data</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre2_set_substitute_callout(pcre2_match_context *<i>mcontext</i>,</b>
|
||||
<b> void (*<i>callout_function</i>)(pcre2_substitute_callout_block *, void *),</b>
|
||||
<b> void *<i>callout_data</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre2_set_offset_limit(pcre2_match_context *<i>mcontext</i>,</b>
|
||||
<b> PCRE2_SIZE <i>value</i>);</b>
|
||||
<br>
|
||||
|
@ -912,12 +917,23 @@ PCRE2_ERROR_BADDATA if invalid data is detected.
|
|||
<b> void *<i>callout_data</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
This sets up a "callout" function for PCRE2 to call at specified points
|
||||
This sets up a callout function for PCRE2 to call at specified points
|
||||
during a matching operation. Details are given in the
|
||||
<a href="pcre2callout.html"><b>pcre2callout</b></a>
|
||||
documentation.
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre2_set_substitute_callout(pcre2_match_context *<i>mcontext</i>,</b>
|
||||
<b> void (*<i>callout_function</i>)(pcre2_substitute_callout_block *, void *),</b>
|
||||
<b> void *<i>callout_data</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
This sets up a callout function for PCRE2 to call after each substitution
|
||||
made by <b>pcre2_substitute()</b>. Details are given in the section entitled
|
||||
"Creating a new string with substitutions"
|
||||
<a href="#substitutions">below.</a>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre2_set_offset_limit(pcre2_match_context *<i>mcontext</i>,</b>
|
||||
<b> PCRE2_SIZE <i>value</i>);</b>
|
||||
<br>
|
||||
|
@ -3163,26 +3179,30 @@ page, you cannot use names to distinguish the different subpatterns, because
|
|||
names are not included in the compiled code. The matching process uses only
|
||||
numbers. For this reason, the use of different names for subpatterns of the
|
||||
same number causes an error at compile time.
|
||||
</P>
|
||||
<a name="substitutions"></a></P>
|
||||
<br><a name="SEC36" href="#TOC1">CREATING A NEW STRING WITH SUBSTITUTIONS</a><br>
|
||||
<P>
|
||||
<b>int pcre2_substitute(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
||||
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
||||
<b> uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
|
||||
<b> pcre2_match_context *<i>mcontext</i>, PCRE2_SPTR <i>replacement</i>,</b>
|
||||
<b> PCRE2_SIZE <i>rlength</i>, PCRE2_UCHAR *\fIoutputbuffer\zfP,</b>
|
||||
<b> PCRE2_SIZE <i>rlength</i>, PCRE2_UCHAR *<i>outputbuffer</i>,</b>
|
||||
<b> PCRE2_SIZE *<i>outlengthptr</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
This function calls <b>pcre2_match()</b> and then makes a copy of the subject
|
||||
string in <i>outputbuffer</i>, replacing the part that was matched with the
|
||||
<i>replacement</i> string, whose length is supplied in <b>rlength</b>. This can
|
||||
be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. Matches in
|
||||
which a \K item in a lookahead in the pattern causes the match to end before
|
||||
it starts are not supported, and give rise to an error return. For global
|
||||
replacements, matches in which \K in a lookbehind causes the match to start
|
||||
earlier than the point that was reached in the previous iteration are also not
|
||||
supported.
|
||||
string in <i>outputbuffer</i>, replacing one or more parts that were matched
|
||||
with the <i>replacement</i> string, whose length is supplied in <b>rlength</b>.
|
||||
This can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string.
|
||||
The default is to perform just one replacement, but there is an option that
|
||||
requests multiple replacements (see PCRE2_SUBSTITUTE_GLOBAL below for details).
|
||||
</P>
|
||||
<P>
|
||||
Matches in which a \K item in a lookahead in the pattern causes the match to
|
||||
end before it starts are not supported, and give rise to an error return. For
|
||||
global replacements, matches in which \K in a lookbehind causes the match to
|
||||
start earlier than the point that was reached in the previous iteration are
|
||||
also not supported.
|
||||
</P>
|
||||
<P>
|
||||
The first seven arguments of <b>pcre2_substitute()</b> are the same as for
|
||||
|
@ -3194,9 +3214,9 @@ allocate memory for the compiled code.
|
|||
</P>
|
||||
<P>
|
||||
If an external <i>match_data</i> block is provided, its contents afterwards
|
||||
are those set by the final call to <b>pcre2_match()</b>, which will have
|
||||
ended in a matching error. The contents of the ovector within the match data
|
||||
block may or may not have been changed.
|
||||
are those set by the final call to <b>pcre2_match()</b>. For global changes,
|
||||
this will have ended in a matching error. The contents of the ovector within
|
||||
the match data block may or may not have been changed.
|
||||
</P>
|
||||
<P>
|
||||
The <i>outlengthptr</i> argument must point to a variable that contains the
|
||||
|
@ -3220,12 +3240,12 @@ length is in code units, not bytes.
|
|||
In the replacement string, which is interpreted as a UTF string in UTF mode,
|
||||
and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set, a
|
||||
dollar character is an escape character that can specify the insertion of
|
||||
characters from capturing groups or (*MARK), (*PRUNE), or (*THEN) items in the
|
||||
pattern. The following forms are always recognized:
|
||||
characters from capturing groups or names from (*MARK) or other control verbs
|
||||
in the pattern. The following forms are always recognized:
|
||||
<pre>
|
||||
$$ insert a dollar character
|
||||
$<n> or ${<n>} insert the contents of group <n>
|
||||
$*MARK or ${*MARK} insert a (*MARK), (*PRUNE), or (*THEN) name
|
||||
$*MARK or ${*MARK} insert a control verb name
|
||||
</pre>
|
||||
Either a group number or a group name can be given for <n>. Curly brackets are
|
||||
required only if the following character would be interpreted as part of the
|
||||
|
@ -3234,12 +3254,13 @@ For example, if the pattern a(b)c is matched with "=abc=" and the replacement
|
|||
string "+$1$0$1+", the result is "=+babcb+=".
|
||||
</P>
|
||||
<P>
|
||||
$*MARK inserts the name from the last encountered (*MARK), (*PRUNE), or (*THEN)
|
||||
on the matching path that has a name. (*MARK) must always include a name, but
|
||||
(*PRUNE) and (*THEN) need not. For example, in the case of (*MARK:A)(*PRUNE)
|
||||
the name inserted is "A", but for (*MARK:A)(*PRUNE:B) the relevant name is "B".
|
||||
This facility can be used to perform simple simultaneous substitutions, as this
|
||||
<b>pcre2test</b> example shows:
|
||||
$*MARK inserts the name from the last encountered (*ACCEPT), (*COMMIT),
|
||||
(*MARK), (*PRUNE), or (*THEN) on the matching path that has a name. (*MARK)
|
||||
must always include a name, but the other verbs need not. For example, in
|
||||
the case of (*MARK:A)(*PRUNE) the name inserted is "A", but for
|
||||
(*MARK:A)(*PRUNE:B) the relevant name is "B". This facility can be used to
|
||||
perform simple simultaneous substitutions, as this <b>pcre2test</b> example
|
||||
shows:
|
||||
<pre>
|
||||
/(*MARK:pear)apple|(*MARK:orange)lemon/g,replace=${*MARK}
|
||||
apple lemon
|
||||
|
@ -3399,6 +3420,44 @@ obtained by calling the <b>pcre2_get_error_message()</b> function (see
|
|||
"Obtaining a textual error message"
|
||||
<a href="#geterrormessage">above).</a>
|
||||
</P>
|
||||
<br><b>
|
||||
Substitution callouts
|
||||
</b><br>
|
||||
<P>
|
||||
<b>int pcre2_set_substitute_callout(pcre2_match_context *<i>mcontext</i>,</b>
|
||||
<b> void (*<i>callout_function</i>)(pcre2_substitute_callout_block *, void *),</b>
|
||||
<b> void *<i>callout_data</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
The <b>pcre2_set_substitution_callout()</b> function can be used to specify a
|
||||
callout function for <b>pcre2_substitute()</b>. This information is passed in
|
||||
a match context. The callout function is called after each substitution. It is
|
||||
not called for simulated substitutions that happen as a result of the
|
||||
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option. A callout function should not return
|
||||
any value.
|
||||
</P>
|
||||
<P>
|
||||
The first argument of the callout function is a pointer to a substitute callout
|
||||
block structure, which contains the following fields, not necessarily in this
|
||||
order:
|
||||
<pre>
|
||||
uint32_t <i>version</i>;
|
||||
PCRE2_SIZE <i>input_offsets[2]</i>;
|
||||
PCRE2_SIZE <i>output_offsets[2]</i>;
|
||||
</pre>
|
||||
The <i>version</i> field contains the version number of the block format. The
|
||||
current version is 0. The version number will increase in future if more fields
|
||||
are added, but the intention is never to remove any of the existing fields.
|
||||
</P>
|
||||
<P>
|
||||
The <i>input_offsets</i> vector contains the code unit offsets in the input
|
||||
string of the matched substring, and the <i>output_offsets</i> vector contains
|
||||
the offsets of the replacement in the output string.
|
||||
</P>
|
||||
<P>
|
||||
The second argument of the callout function is the value passed as
|
||||
<i>callout_data</i> when the function was registered.
|
||||
</P>
|
||||
<br><a name="SEC37" href="#TOC1">DUPLICATE SUBPATTERN NAMES</a><br>
|
||||
<P>
|
||||
<b>int pcre2_substring_nametable_scan(const pcre2_code *<i>code</i>,</b>
|
||||
|
@ -3665,7 +3724,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC42" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 07 September 2018
|
||||
Last updated: 18 September 2018
|
||||
<br>
|
||||
Copyright © 1997-2018 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -44,6 +44,14 @@ a match context (see <b>pcre2_set_callout()</b> in the
|
|||
documentation).
|
||||
</P>
|
||||
<P>
|
||||
When using the <b>pcre2_substitute()</b> function, an additional callout feature
|
||||
is available. This does a callout after each change to the subject string and
|
||||
is described in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
documentation; the rest of this document is concerned with callouts during
|
||||
pattern matching.
|
||||
</P>
|
||||
<P>
|
||||
Within a regular expression, (?C<arg>) indicates a point at which the external
|
||||
function is to be called. Different callout points can be identified by putting
|
||||
a number less than 256 after the letter C. The default value is zero.
|
||||
|
@ -463,7 +471,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC8" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 26 April 2018
|
||||
Last updated: 17 September 2018
|
||||
<br>
|
||||
Copyright © 1997-2018 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -1041,6 +1041,7 @@ process.
|
|||
aftertext show text after match
|
||||
allaftertext show text after captures
|
||||
allcaptures show all captures
|
||||
allvector show the entire ovector
|
||||
allusedtext show all consulted text
|
||||
altglobal alternative global matching
|
||||
/g global global matching
|
||||
|
@ -1048,6 +1049,7 @@ process.
|
|||
mark show mark values
|
||||
replace=<string> specify a replacement string
|
||||
startchar show starting character when relevant
|
||||
substitute_callout use substitution callouts
|
||||
substitute_extended use PCRE2_SUBSTITUTE_EXTENDED
|
||||
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||
|
@ -1185,6 +1187,7 @@ pattern.
|
|||
aftertext show text after match
|
||||
allaftertext show text after captures
|
||||
allcaptures show all captures
|
||||
allvector show the entire ovector
|
||||
allusedtext show all consulted text (non-JIT only)
|
||||
altglobal alternative global matching
|
||||
callout_capture show captures at callout time
|
||||
|
@ -1214,6 +1217,7 @@ pattern.
|
|||
replace=<string> specify a replacement string
|
||||
startchar show startchar when relevant
|
||||
startoffset=<n> same as offset=<n>
|
||||
substitute_callout use substitution callouts
|
||||
substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED
|
||||
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||
|
@ -1281,10 +1285,28 @@ captured parentheses be output after a match. By default, only those up to the
|
|||
highest one actually used in the match are output (corresponding to the return
|
||||
code from <b>pcre2_match()</b>). Groups that did not take part in the match
|
||||
are output as "<unset>". This modifier is not relevant for DFA matching (which
|
||||
does no capturing); it is ignored, with a warning message, if present.
|
||||
does no capturing) and does not apply when <b>replace</b> is specified; it is
|
||||
ignored, with a warning message, if present.
|
||||
</P>
|
||||
<br><b>
|
||||
Testing callouts
|
||||
Showing the entire ovector, for all outcomes
|
||||
</b><br>
|
||||
<P>
|
||||
The <b>allvector</b> modifier requests that the entire ovector be shown,
|
||||
whatever the outcome of the match. Compare <b>allcaptures</b>, which shows only
|
||||
up to the maximum number of capture groups for the pattern, and then only for a
|
||||
successful complete non-DFA match. This modifier, which acts after any match
|
||||
result, and also for DFA matching, provides a means of checking that there are
|
||||
no unexpected modifications to ovector fields. Before each match attempt, the
|
||||
ovector is filled with a special value, and if this is found in both elements
|
||||
of a capturing pair, "<unchanged>" is output. After a successful match, this
|
||||
applies to all groups after the maximum capture group for the pattern. In other
|
||||
cases it applies to the entire ovector. After a partial match, the first two
|
||||
elements are the only ones that should be set. After a DFA match, the amount of
|
||||
ovector that is used depends on the number of matches that were found.
|
||||
</P>
|
||||
<br><b>
|
||||
Testing pattern callouts
|
||||
</b><br>
|
||||
<P>
|
||||
A callout function is supplied when <b>pcre2test</b> calls the library matching
|
||||
|
@ -1292,6 +1314,9 @@ functions, unless <b>callout_none</b> is specified. Its behaviour can be
|
|||
controlled by various modifiers listed above whose names begin with
|
||||
<b>callout_</b>. Details are given in the section entitled "Callouts"
|
||||
<a href="#callouts">below.</a>
|
||||
Testing callouts from <b>pcre2_substitute()</b> is decribed separately in
|
||||
"Testing the substitution function"
|
||||
<a href="#substitution">below.</a>
|
||||
</P>
|
||||
<br><b>
|
||||
Finding all matches in a string
|
||||
|
@ -1343,7 +1368,7 @@ instead of a colon. This is in addition to the normal full list. The string
|
|||
length (that is, the return from the extraction function) is given in
|
||||
parentheses after each substring, followed by the name when the extraction was
|
||||
by name.
|
||||
</P>
|
||||
<a name="substitution"></a></P>
|
||||
<br><b>
|
||||
Testing the substitution function
|
||||
</b><br>
|
||||
|
@ -1384,6 +1409,16 @@ simple example of a substitution test:
|
|||
=abc=abc=\=global
|
||||
2: =xxx=xxx=
|
||||
</pre>
|
||||
If the <b>substitute_callout</b> modifier is set, a substitution callout
|
||||
function is set up. When it is called (after each substitution), the offsets in
|
||||
the input and output strings are output. For example:
|
||||
<pre>
|
||||
/abc/g,replace=<$0>,substitute_callout
|
||||
abcdefabcpqr
|
||||
Old 0 3 New 0 5
|
||||
Old 6 9 New 8 13
|
||||
2: <abc>def<abc>pqr
|
||||
</pre>
|
||||
Subject and replacement strings should be kept relatively short (fewer than 256
|
||||
characters) for substitution tests, as fixed-size buffers are used. To make it
|
||||
easy to test for buffer overflow, if the replacement string starts with a
|
||||
|
@ -1401,10 +1436,10 @@ The default action of <b>pcre2_substitute()</b> is to return
|
|||
PCRE2_ERROR_NOMEMORY when the output buffer is too small. However, if the
|
||||
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set (by using the
|
||||
<b>substitute_overflow_length</b> modifier), <b>pcre2_substitute()</b> continues
|
||||
to go through the motions of matching and substituting, in order to compute the
|
||||
size of buffer that is required. When this happens, <b>pcre2test</b> shows the
|
||||
required buffer length (which includes space for the trailing zero) as part of
|
||||
the error message. For example:
|
||||
to go through the motions of matching and substituting (but not doing any
|
||||
callouts), in order to compute the size of buffer that is required. When this
|
||||
happens, <b>pcre2test</b> shows the required buffer length (which includes space
|
||||
for the trailing zero) as part of the error message. For example:
|
||||
<pre>
|
||||
/abc/substitute_overflow_length
|
||||
123abc123\=replace=[9]XYZ
|
||||
|
@ -2004,7 +2039,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 21 July 2018
|
||||
Last updated: 17 September 2018
|
||||
<br>
|
||||
Copyright © 1997-2018 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2 3 "11 July 2018" "PCRE2 10.32"
|
||||
.TH PCRE2 3 "17 September 2018" "PCRE2 10.33"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH INTRODUCTION
|
||||
|
@ -156,7 +156,7 @@ listing), and the short pages for individual functions, are concatenated in
|
|||
pcre2-config show PCRE2 installation configuration information
|
||||
pcre2api details of PCRE2's native C API
|
||||
pcre2build building PCRE2
|
||||
pcre2callout details of the callout feature
|
||||
pcre2callout details of the pattern callout feature
|
||||
pcre2compat discussion of Perl compatibility
|
||||
pcre2convert details of pattern conversion functions
|
||||
pcre2demo a demonstration C program that uses PCRE2
|
||||
|
@ -197,6 +197,6 @@ use my two initials, followed by the two digits 10, at the domain cam.ac.uk.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 11 July 2018
|
||||
Last updated: 17 September 2018
|
||||
Copyright (c) 1997-2018 University of Cambridge.
|
||||
.fi
|
||||
|
|
608
doc/pcre2.txt
608
doc/pcre2.txt
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,31 @@
|
|||
.TH PCRE2_SET_SUBSTITUTE_CALLOUT 3 "17 September 2018" "PCRE2 10.33"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.B #include <pcre2.h>
|
||||
.PP
|
||||
.nf
|
||||
.B int pcre2_set_substitute_callout(pcre2_match_context *\fImcontext\fP,
|
||||
.B " void (*\fIcallout_function\fP)(pcre2_substitute_callout_block *),"
|
||||
.B " void *\fIcallout_data\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
.sp
|
||||
This function sets the substitute callout fields in a match context (the first
|
||||
argument). The second argument specifies a callout function, and the third
|
||||
argument is an opaque data item that is passed to it. The result of this
|
||||
function is always zero.
|
||||
.P
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
.\" HREF
|
||||
\fBpcre2api\fP
|
||||
.\"
|
||||
page and a description of the POSIX API in the
|
||||
.\" HREF
|
||||
\fBpcre2posix\fP
|
||||
.\"
|
||||
page.
|
107
doc/pcre2api.3
107
doc/pcre2api.3
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2API 3 "07 September 2018" "PCRE2 10.32"
|
||||
.TH PCRE2API 3 "18 September 2018" "PCRE2 10.33"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.sp
|
||||
|
@ -123,6 +123,10 @@ document for an overview of all the PCRE2 documentation.
|
|||
.B " int (*\fIcallout_function\fP)(pcre2_callout_block *, void *),"
|
||||
.B " void *\fIcallout_data\fP);"
|
||||
.sp
|
||||
.B int pcre2_set_substitute_callout(pcre2_match_context *\fImcontext\fP,
|
||||
.B " void (*\fIcallout_function\fP)(pcre2_substitute_callout_block *, void *),"
|
||||
.B " void *\fIcallout_data\fP);"
|
||||
.sp
|
||||
.B int pcre2_set_offset_limit(pcre2_match_context *\fImcontext\fP,
|
||||
.B " PCRE2_SIZE \fIvalue\fP);"
|
||||
.sp
|
||||
|
@ -847,7 +851,7 @@ PCRE2_ERROR_BADDATA if invalid data is detected.
|
|||
.B " void *\fIcallout_data\fP);"
|
||||
.fi
|
||||
.sp
|
||||
This sets up a "callout" function for PCRE2 to call at specified points
|
||||
This sets up a callout function for PCRE2 to call at specified points
|
||||
during a matching operation. Details are given in the
|
||||
.\" HREF
|
||||
\fBpcre2callout\fP
|
||||
|
@ -855,6 +859,20 @@ during a matching operation. Details are given in the
|
|||
documentation.
|
||||
.sp
|
||||
.nf
|
||||
.B int pcre2_set_substitute_callout(pcre2_match_context *\fImcontext\fP,
|
||||
.B " void (*\fIcallout_function\fP)(pcre2_substitute_callout_block *, void *),"
|
||||
.B " void *\fIcallout_data\fP);"
|
||||
.fi
|
||||
.sp
|
||||
This sets up a callout function for PCRE2 to call after each substitution
|
||||
made by \fBpcre2_substitute()\fP. Details are given in the section entitled
|
||||
"Creating a new string with substitutions"
|
||||
.\" HTML <a href="#substitutions">
|
||||
.\" </a>
|
||||
below.
|
||||
.\"
|
||||
.sp
|
||||
.nf
|
||||
.B int pcre2_set_offset_limit(pcre2_match_context *\fImcontext\fP,
|
||||
.B " PCRE2_SIZE \fIvalue\fP);"
|
||||
.fi
|
||||
|
@ -3171,6 +3189,7 @@ numbers. For this reason, the use of different names for subpatterns of the
|
|||
same number causes an error at compile time.
|
||||
.
|
||||
.
|
||||
.\" HTML <a name="substitutions"></a>
|
||||
.SH "CREATING A NEW STRING WITH SUBSTITUTIONS"
|
||||
.rs
|
||||
.sp
|
||||
|
@ -3179,19 +3198,22 @@ same number causes an error at compile time.
|
|||
.B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP,"
|
||||
.B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP,"
|
||||
.B " pcre2_match_context *\fImcontext\fP, PCRE2_SPTR \fIreplacement\fP,"
|
||||
.B " PCRE2_SIZE \fIrlength\fP, PCRE2_UCHAR *\fIoutputbuffer\zfP,"
|
||||
.B " PCRE2_SIZE \fIrlength\fP, PCRE2_UCHAR *\fIoutputbuffer\fP,"
|
||||
.B " PCRE2_SIZE *\fIoutlengthptr\fP);"
|
||||
.fi
|
||||
.P
|
||||
This function calls \fBpcre2_match()\fP and then makes a copy of the subject
|
||||
string in \fIoutputbuffer\fP, replacing the part that was matched with the
|
||||
\fIreplacement\fP string, whose length is supplied in \fBrlength\fP. This can
|
||||
be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. Matches in
|
||||
which a \eK item in a lookahead in the pattern causes the match to end before
|
||||
it starts are not supported, and give rise to an error return. For global
|
||||
replacements, matches in which \eK in a lookbehind causes the match to start
|
||||
earlier than the point that was reached in the previous iteration are also not
|
||||
supported.
|
||||
string in \fIoutputbuffer\fP, replacing one or more parts that were matched
|
||||
with the \fIreplacement\fP string, whose length is supplied in \fBrlength\fP.
|
||||
This can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string.
|
||||
The default is to perform just one replacement, but there is an option that
|
||||
requests multiple replacements (see PCRE2_SUBSTITUTE_GLOBAL below for details).
|
||||
.P
|
||||
Matches in which a \eK item in a lookahead in the pattern causes the match to
|
||||
end before it starts are not supported, and give rise to an error return. For
|
||||
global replacements, matches in which \eK in a lookbehind causes the match to
|
||||
start earlier than the point that was reached in the previous iteration are
|
||||
also not supported.
|
||||
.P
|
||||
The first seven arguments of \fBpcre2_substitute()\fP are the same as for
|
||||
\fBpcre2_match()\fP, except that the partial matching options are not
|
||||
|
@ -3201,9 +3223,9 @@ functions from the match context, if provided, or else those that were used to
|
|||
allocate memory for the compiled code.
|
||||
.P
|
||||
If an external \fImatch_data\fP block is provided, its contents afterwards
|
||||
are those set by the final call to \fBpcre2_match()\fP, which will have
|
||||
ended in a matching error. The contents of the ovector within the match data
|
||||
block may or may not have been changed.
|
||||
are those set by the final call to \fBpcre2_match()\fP. For global changes,
|
||||
this will have ended in a matching error. The contents of the ovector within
|
||||
the match data block may or may not have been changed.
|
||||
.P
|
||||
The \fIoutlengthptr\fP argument must point to a variable that contains the
|
||||
length, in code units, of the output buffer. If the function is successful, the
|
||||
|
@ -3224,12 +3246,12 @@ length is in code units, not bytes.
|
|||
In the replacement string, which is interpreted as a UTF string in UTF mode,
|
||||
and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set, a
|
||||
dollar character is an escape character that can specify the insertion of
|
||||
characters from capturing groups or (*MARK), (*PRUNE), or (*THEN) items in the
|
||||
pattern. The following forms are always recognized:
|
||||
characters from capturing groups or names from (*MARK) or other control verbs
|
||||
in the pattern. The following forms are always recognized:
|
||||
.sp
|
||||
$$ insert a dollar character
|
||||
$<n> or ${<n>} insert the contents of group <n>
|
||||
$*MARK or ${*MARK} insert a (*MARK), (*PRUNE), or (*THEN) name
|
||||
$*MARK or ${*MARK} insert a control verb name
|
||||
.sp
|
||||
Either a group number or a group name can be given for <n>. Curly brackets are
|
||||
required only if the following character would be interpreted as part of the
|
||||
|
@ -3237,12 +3259,13 @@ number or name. The number may be zero to include the entire matched string.
|
|||
For example, if the pattern a(b)c is matched with "=abc=" and the replacement
|
||||
string "+$1$0$1+", the result is "=+babcb+=".
|
||||
.P
|
||||
$*MARK inserts the name from the last encountered (*MARK), (*PRUNE), or (*THEN)
|
||||
on the matching path that has a name. (*MARK) must always include a name, but
|
||||
(*PRUNE) and (*THEN) need not. For example, in the case of (*MARK:A)(*PRUNE)
|
||||
the name inserted is "A", but for (*MARK:A)(*PRUNE:B) the relevant name is "B".
|
||||
This facility can be used to perform simple simultaneous substitutions, as this
|
||||
\fBpcre2test\fP example shows:
|
||||
$*MARK inserts the name from the last encountered (*ACCEPT), (*COMMIT),
|
||||
(*MARK), (*PRUNE), or (*THEN) on the matching path that has a name. (*MARK)
|
||||
must always include a name, but the other verbs need not. For example, in
|
||||
the case of (*MARK:A)(*PRUNE) the name inserted is "A", but for
|
||||
(*MARK:A)(*PRUNE:B) the relevant name is "B". This facility can be used to
|
||||
perform simple simultaneous substitutions, as this \fBpcre2test\fP example
|
||||
shows:
|
||||
.sp
|
||||
/(*MARK:pear)apple|(*MARK:orange)lemon/g,replace=${*MARK}
|
||||
apple lemon
|
||||
|
@ -3388,6 +3411,42 @@ above).
|
|||
.\"
|
||||
.
|
||||
.
|
||||
.SS "Substitution callouts"
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
.B int pcre2_set_substitute_callout(pcre2_match_context *\fImcontext\fP,
|
||||
.B " void (*\fIcallout_function\fP)(pcre2_substitute_callout_block *, void *),"
|
||||
.B " void *\fIcallout_data\fP);"
|
||||
.fi
|
||||
.sp
|
||||
The \fBpcre2_set_substitution_callout()\fP function can be used to specify a
|
||||
callout function for \fBpcre2_substitute()\fP. This information is passed in
|
||||
a match context. The callout function is called after each substitution. It is
|
||||
not called for simulated substitutions that happen as a result of the
|
||||
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option. A callout function should not return
|
||||
any value.
|
||||
.P
|
||||
The first argument of the callout function is a pointer to a substitute callout
|
||||
block structure, which contains the following fields, not necessarily in this
|
||||
order:
|
||||
.sp
|
||||
uint32_t \fIversion\fP;
|
||||
PCRE2_SIZE \fIinput_offsets[2]\fP;
|
||||
PCRE2_SIZE \fIoutput_offsets[2]\fP;
|
||||
.sp
|
||||
The \fIversion\fP field contains the version number of the block format. The
|
||||
current version is 0. The version number will increase in future if more fields
|
||||
are added, but the intention is never to remove any of the existing fields.
|
||||
.P
|
||||
The \fIinput_offsets\fP vector contains the code unit offsets in the input
|
||||
string of the matched substring, and the \fIoutput_offsets\fP vector contains
|
||||
the offsets of the replacement in the output string.
|
||||
.P
|
||||
The second argument of the callout function is the value passed as
|
||||
\fIcallout_data\fP when the function was registered.
|
||||
.
|
||||
.
|
||||
.SH "DUPLICATE SUBPATTERN NAMES"
|
||||
.rs
|
||||
.sp
|
||||
|
@ -3670,6 +3729,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 07 September 2018
|
||||
Last updated: 18 September 2018
|
||||
Copyright (c) 1997-2018 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2CALLOUT 3 "26 April 2018" "PCRE2 10.32"
|
||||
.TH PCRE2CALLOUT 3 "17 September 2018" "PCRE2 10.33"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
@ -27,6 +27,15 @@ a match context (see \fBpcre2_set_callout()\fP in the
|
|||
.\"
|
||||
documentation).
|
||||
.P
|
||||
When using the \fBpcre2_substitute()\fP function, an additional callout feature
|
||||
is available. This does a callout after each change to the subject string and
|
||||
is described in the
|
||||
.\" HREF
|
||||
\fBpcre2api\fP
|
||||
.\"
|
||||
documentation; the rest of this document is concerned with callouts during
|
||||
pattern matching.
|
||||
.P
|
||||
Within a regular expression, (?C<arg>) indicates a point at which the external
|
||||
function is to be called. Different callout points can be identified by putting
|
||||
a number less than 256 after the letter C. The default value is zero.
|
||||
|
@ -443,6 +452,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 26 April 2018
|
||||
Last updated: 17 September 2018
|
||||
Copyright (c) 1997-2018 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2TEST 1 "15 September 2018" "PCRE 10.33"
|
||||
.TH PCRE2TEST 1 "17 September 2018" "PCRE 10.33"
|
||||
.SH NAME
|
||||
pcre2test - a program for testing Perl-compatible regular expressions.
|
||||
.SH SYNOPSIS
|
||||
|
@ -1011,6 +1011,7 @@ process.
|
|||
mark show mark values
|
||||
replace=<string> specify a replacement string
|
||||
startchar show starting character when relevant
|
||||
substitute_callout use substitution callouts
|
||||
substitute_extended use PCRE2_SUBSTITUTE_EXTENDED
|
||||
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||
|
@ -1185,6 +1186,7 @@ pattern.
|
|||
replace=<string> specify a replacement string
|
||||
startchar show startchar when relevant
|
||||
startoffset=<n> same as offset=<n>
|
||||
substitute_callout use substitution callouts
|
||||
substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED
|
||||
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||
|
@ -1271,7 +1273,7 @@ elements are the only ones that should be set. After a DFA match, the amount of
|
|||
ovector that is used depends on the number of matches that were found.
|
||||
.
|
||||
.
|
||||
.SS "Testing callouts"
|
||||
.SS "Testing pattern callouts"
|
||||
.rs
|
||||
.sp
|
||||
A callout function is supplied when \fBpcre2test\fP calls the library matching
|
||||
|
@ -1280,7 +1282,13 @@ controlled by various modifiers listed above whose names begin with
|
|||
\fBcallout_\fP. Details are given in the section entitled "Callouts"
|
||||
.\" HTML <a href="#callouts">
|
||||
.\" </a>
|
||||
below.
|
||||
below.
|
||||
.\"
|
||||
Testing callouts from \fBpcre2_substitute()\fP is decribed separately in
|
||||
"Testing the substitution function"
|
||||
.\" HTML <a href="#substitution">
|
||||
.\" </a>
|
||||
below.
|
||||
.\"
|
||||
.
|
||||
.
|
||||
|
@ -1332,6 +1340,7 @@ parentheses after each substring, followed by the name when the extraction was
|
|||
by name.
|
||||
.
|
||||
.
|
||||
.\" HTML <a name="substitution"></a>
|
||||
.SS "Testing the substitution function"
|
||||
.rs
|
||||
.sp
|
||||
|
@ -1367,6 +1376,16 @@ simple example of a substitution test:
|
|||
=abc=abc=\e=global
|
||||
2: =xxx=xxx=
|
||||
.sp
|
||||
If the \fBsubstitute_callout\fP modifier is set, a substitution callout
|
||||
function is set up. When it is called (after each substitution), the offsets in
|
||||
the input and output strings are output. For example:
|
||||
.sp
|
||||
/abc/g,replace=<$0>,substitute_callout
|
||||
abcdefabcpqr
|
||||
Old 0 3 New 0 5
|
||||
Old 6 9 New 8 13
|
||||
2: <abc>def<abc>pqr
|
||||
.sp
|
||||
Subject and replacement strings should be kept relatively short (fewer than 256
|
||||
characters) for substitution tests, as fixed-size buffers are used. To make it
|
||||
easy to test for buffer overflow, if the replacement string starts with a
|
||||
|
@ -1384,10 +1403,10 @@ The default action of \fBpcre2_substitute()\fP is to return
|
|||
PCRE2_ERROR_NOMEMORY when the output buffer is too small. However, if the
|
||||
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set (by using the
|
||||
\fBsubstitute_overflow_length\fP modifier), \fBpcre2_substitute()\fP continues
|
||||
to go through the motions of matching and substituting, in order to compute the
|
||||
size of buffer that is required. When this happens, \fBpcre2test\fP shows the
|
||||
required buffer length (which includes space for the trailing zero) as part of
|
||||
the error message. For example:
|
||||
to go through the motions of matching and substituting (but not doing any
|
||||
callouts), in order to compute the size of buffer that is required. When this
|
||||
happens, \fBpcre2test\fP shows the required buffer length (which includes space
|
||||
for the trailing zero) as part of the error message. For example:
|
||||
.sp
|
||||
/abc/substitute_overflow_length
|
||||
123abc123\e=replace=[9]XYZ
|
||||
|
@ -2002,6 +2021,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 15 September 2018
|
||||
Last updated: 17 September 2018
|
||||
Copyright (c) 1997-2018 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -929,6 +929,7 @@ PATTERN MODIFIERS
|
|||
aftertext show text after match
|
||||
allaftertext show text after captures
|
||||
allcaptures show all captures
|
||||
allvector show the entire ovector
|
||||
allusedtext show all consulted text
|
||||
altglobal alternative global matching
|
||||
/g global global matching
|
||||
|
@ -936,6 +937,7 @@ PATTERN MODIFIERS
|
|||
mark show mark values
|
||||
replace=<string> specify a replacement string
|
||||
startchar show starting character when relevant
|
||||
substitute_callout use substitution callouts
|
||||
substitute_extended use PCRE2_SUBSTITUTE_EXTENDED
|
||||
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||
|
@ -1057,6 +1059,7 @@ SUBJECT MODIFIERS
|
|||
aftertext show text after match
|
||||
allaftertext show text after captures
|
||||
allcaptures show all captures
|
||||
allvector show the entire ovector
|
||||
allusedtext show all consulted text (non-JIT only)
|
||||
altglobal alternative global matching
|
||||
callout_capture show captures at callout time
|
||||
|
@ -1086,6 +1089,7 @@ SUBJECT MODIFIERS
|
|||
replace=<string> specify a replacement string
|
||||
startchar show startchar when relevant
|
||||
startoffset=<n> same as offset=<n>
|
||||
substitute_callout use substitution callouts
|
||||
substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED
|
||||
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||
|
@ -1150,76 +1154,95 @@ SUBJECT MODIFIERS
|
|||
the highest one actually used in the match are output (corresponding to
|
||||
the return code from pcre2_match()). Groups that did not take part in
|
||||
the match are output as "<unset>". This modifier is not relevant for
|
||||
DFA matching (which does no capturing); it is ignored, with a warning
|
||||
message, if present.
|
||||
DFA matching (which does no capturing) and does not apply when replace
|
||||
is specified; it is ignored, with a warning message, if present.
|
||||
|
||||
Testing callouts
|
||||
Showing the entire ovector, for all outcomes
|
||||
|
||||
A callout function is supplied when pcre2test calls the library match-
|
||||
ing functions, unless callout_none is specified. Its behaviour can be
|
||||
controlled by various modifiers listed above whose names begin with
|
||||
callout_. Details are given in the section entitled "Callouts" below.
|
||||
The allvector modifier requests that the entire ovector be shown, what-
|
||||
ever the outcome of the match. Compare allcaptures, which shows only up
|
||||
to the maximum number of capture groups for the pattern, and then only
|
||||
for a successful complete non-DFA match. This modifier, which acts
|
||||
after any match result, and also for DFA matching, provides a means of
|
||||
checking that there are no unexpected modifications to ovector fields.
|
||||
Before each match attempt, the ovector is filled with a special value,
|
||||
and if this is found in both elements of a capturing pair,
|
||||
"<unchanged>" is output. After a successful match, this applies to all
|
||||
groups after the maximum capture group for the pattern. In other cases
|
||||
it applies to the entire ovector. After a partial match, the first two
|
||||
elements are the only ones that should be set. After a DFA match, the
|
||||
amount of ovector that is used depends on the number of matches that
|
||||
were found.
|
||||
|
||||
Testing pattern callouts
|
||||
|
||||
A callout function is supplied when pcre2test calls the library match-
|
||||
ing functions, unless callout_none is specified. Its behaviour can be
|
||||
controlled by various modifiers listed above whose names begin with
|
||||
callout_. Details are given in the section entitled "Callouts" below.
|
||||
Testing callouts from pcre2_substitute() is decribed separately in
|
||||
"Testing the substitution function" below.
|
||||
|
||||
Finding all matches in a string
|
||||
|
||||
Searching for all possible matches within a subject can be requested by
|
||||
the global or altglobal modifier. After finding a match, the matching
|
||||
function is called again to search the remainder of the subject. The
|
||||
difference between global and altglobal is that the former uses the
|
||||
start_offset argument to pcre2_match() or pcre2_dfa_match() to start
|
||||
searching at a new point within the entire string (which is what Perl
|
||||
the global or altglobal modifier. After finding a match, the matching
|
||||
function is called again to search the remainder of the subject. The
|
||||
difference between global and altglobal is that the former uses the
|
||||
start_offset argument to pcre2_match() or pcre2_dfa_match() to start
|
||||
searching at a new point within the entire string (which is what Perl
|
||||
does), whereas the latter passes over a shortened subject. This makes a
|
||||
difference to the matching process if the pattern begins with a lookbe-
|
||||
hind assertion (including \b or \B).
|
||||
|
||||
If an empty string is matched, the next match is done with the
|
||||
If an empty string is matched, the next match is done with the
|
||||
PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set, in order to search
|
||||
for another, non-empty, match at the same point in the subject. If this
|
||||
match fails, the start offset is advanced, and the normal match is
|
||||
retried. This imitates the way Perl handles such cases when using the
|
||||
/g modifier or the split() function. Normally, the start offset is
|
||||
advanced by one character, but if the newline convention recognizes
|
||||
CRLF as a newline, and the current character is CR followed by LF, an
|
||||
match fails, the start offset is advanced, and the normal match is
|
||||
retried. This imitates the way Perl handles such cases when using the
|
||||
/g modifier or the split() function. Normally, the start offset is
|
||||
advanced by one character, but if the newline convention recognizes
|
||||
CRLF as a newline, and the current character is CR followed by LF, an
|
||||
advance of two characters occurs.
|
||||
|
||||
Testing substring extraction functions
|
||||
|
||||
The copy and get modifiers can be used to test the pcre2_sub-
|
||||
The copy and get modifiers can be used to test the pcre2_sub-
|
||||
string_copy_xxx() and pcre2_substring_get_xxx() functions. They can be
|
||||
given more than once, and each can specify a group name or number, for
|
||||
given more than once, and each can specify a group name or number, for
|
||||
example:
|
||||
|
||||
abcd\=copy=1,copy=3,get=G1
|
||||
|
||||
If the #subject command is used to set default copy and/or get lists,
|
||||
these can be unset by specifying a negative number to cancel all num-
|
||||
If the #subject command is used to set default copy and/or get lists,
|
||||
these can be unset by specifying a negative number to cancel all num-
|
||||
bered groups and an empty name to cancel all named groups.
|
||||
|
||||
The getall modifier tests pcre2_substring_list_get(), which extracts
|
||||
The getall modifier tests pcre2_substring_list_get(), which extracts
|
||||
all captured substrings.
|
||||
|
||||
If the subject line is successfully matched, the substrings extracted
|
||||
by the convenience functions are output with C, G, or L after the
|
||||
string number instead of a colon. This is in addition to the normal
|
||||
full list. The string length (that is, the return from the extraction
|
||||
If the subject line is successfully matched, the substrings extracted
|
||||
by the convenience functions are output with C, G, or L after the
|
||||
string number instead of a colon. This is in addition to the normal
|
||||
full list. The string length (that is, the return from the extraction
|
||||
function) is given in parentheses after each substring, followed by the
|
||||
name when the extraction was by name.
|
||||
|
||||
Testing the substitution function
|
||||
|
||||
If the replace modifier is set, the pcre2_substitute() function is
|
||||
called instead of one of the matching functions. Note that replacement
|
||||
strings cannot contain commas, because a comma signifies the end of a
|
||||
If the replace modifier is set, the pcre2_substitute() function is
|
||||
called instead of one of the matching functions. Note that replacement
|
||||
strings cannot contain commas, because a comma signifies the end of a
|
||||
modifier. This is not thought to be an issue in a test program.
|
||||
|
||||
Unlike subject strings, pcre2test does not process replacement strings
|
||||
for escape sequences. In UTF mode, a replacement string is checked to
|
||||
see if it is a valid UTF-8 string. If so, it is correctly converted to
|
||||
a UTF string of the appropriate code unit width. If it is not a valid
|
||||
UTF-8 string, the individual code units are copied directly. This pro-
|
||||
Unlike subject strings, pcre2test does not process replacement strings
|
||||
for escape sequences. In UTF mode, a replacement string is checked to
|
||||
see if it is a valid UTF-8 string. If so, it is correctly converted to
|
||||
a UTF string of the appropriate code unit width. If it is not a valid
|
||||
UTF-8 string, the individual code units are copied directly. This pro-
|
||||
vides a means of passing an invalid UTF-8 string for testing purposes.
|
||||
|
||||
The following modifiers set options (in additional to the normal match
|
||||
The following modifiers set options (in additional to the normal match
|
||||
options) for pcre2_substitute():
|
||||
|
||||
global PCRE2_SUBSTITUTE_GLOBAL
|
||||
|
@ -1229,8 +1252,8 @@ SUBJECT MODIFIERS
|
|||
substitute_unset_empty PCRE2_SUBSTITUTE_UNSET_EMPTY
|
||||
|
||||
|
||||
After a successful substitution, the modified string is output, pre-
|
||||
ceded by the number of replacements. This may be zero if there were no
|
||||
After a successful substitution, the modified string is output, pre-
|
||||
ceded by the number of replacements. This may be zero if there were no
|
||||
matches. Here is a simple example of a substitution test:
|
||||
|
||||
/abc/replace=xxx
|
||||
|
@ -1239,12 +1262,22 @@ SUBJECT MODIFIERS
|
|||
=abc=abc=\=global
|
||||
2: =xxx=xxx=
|
||||
|
||||
Subject and replacement strings should be kept relatively short (fewer
|
||||
than 256 characters) for substitution tests, as fixed-size buffers are
|
||||
used. To make it easy to test for buffer overflow, if the replacement
|
||||
string starts with a number in square brackets, that number is passed
|
||||
to pcre2_substitute() as the size of the output buffer, with the
|
||||
replacement string starting at the next character. Here is an example
|
||||
If the substitute_callout modifier is set, a substitution callout func-
|
||||
tion is set up. When it is called (after each substitution), the off-
|
||||
sets in the input and output strings are output. For example:
|
||||
|
||||
/abc/g,replace=<$0>,substitute_callout
|
||||
abcdefabcpqr
|
||||
Old 0 3 New 0 5
|
||||
Old 6 9 New 8 13
|
||||
2: <abc>def<abc>pqr
|
||||
|
||||
Subject and replacement strings should be kept relatively short (fewer
|
||||
than 256 characters) for substitution tests, as fixed-size buffers are
|
||||
used. To make it easy to test for buffer overflow, if the replacement
|
||||
string starts with a number in square brackets, that number is passed
|
||||
to pcre2_substitute() as the size of the output buffer, with the
|
||||
replacement string starting at the next character. Here is an example
|
||||
that tests the edge case:
|
||||
|
||||
/abc/
|
||||
|
@ -1253,14 +1286,15 @@ SUBJECT MODIFIERS
|
|||
123abc123\=replace=[9]XYZ
|
||||
Failed: error -47: no more memory
|
||||
|
||||
The default action of pcre2_substitute() is to return
|
||||
PCRE2_ERROR_NOMEMORY when the output buffer is too small. However, if
|
||||
the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set (by using the sub-
|
||||
stitute_overflow_length modifier), pcre2_substitute() continues to go
|
||||
through the motions of matching and substituting, in order to compute
|
||||
the size of buffer that is required. When this happens, pcre2test shows
|
||||
the required buffer length (which includes space for the trailing zero)
|
||||
as part of the error message. For example:
|
||||
The default action of pcre2_substitute() is to return
|
||||
PCRE2_ERROR_NOMEMORY when the output buffer is too small. However, if
|
||||
the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set (by using the sub-
|
||||
stitute_overflow_length modifier), pcre2_substitute() continues to go
|
||||
through the motions of matching and substituting (but not doing any
|
||||
callouts), in order to compute the size of buffer that is required.
|
||||
When this happens, pcre2test shows the required buffer length (which
|
||||
includes space for the trailing zero) as part of the error message. For
|
||||
example:
|
||||
|
||||
/abc/substitute_overflow_length
|
||||
123abc123\=replace=[9]XYZ
|
||||
|
@ -1818,5 +1852,5 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 21 July 2018
|
||||
Last updated: 17 September 2018
|
||||
Copyright (c) 1997-2018 University of Cambridge.
|
||||
|
|
|
@ -505,10 +505,10 @@ typedef struct pcre2_real_jit_stack pcre2_jit_stack; \
|
|||
typedef pcre2_jit_stack *(*pcre2_jit_callback)(void *);
|
||||
|
||||
|
||||
/* The structure for passing out data via the pcre_callout_function. We use a
|
||||
structure so that new fields can be added on the end in future versions,
|
||||
without changing the API of the function, thereby allowing old clients to work
|
||||
without modification. Define the generic version in a macro; the width-specific
|
||||
/* The structures for passing out data via callout functions. We use structures
|
||||
so that new fields can be added on the end in future versions, without changing
|
||||
the API of the function, thereby allowing old clients to work without
|
||||
modification. Define the generic versions in a macro; the width-specific
|
||||
versions are generated from this macro below. */
|
||||
|
||||
/* Flags for the callout_flags field. These are cleared after a callout. */
|
||||
|
@ -550,7 +550,15 @@ typedef struct pcre2_callout_enumerate_block { \
|
|||
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
|
||||
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
|
||||
/* ------------------------------------------------------------------ */ \
|
||||
} pcre2_callout_enumerate_block;
|
||||
} pcre2_callout_enumerate_block; \
|
||||
\
|
||||
typedef struct pcre2_substitute_callout_block { \
|
||||
uint32_t version; /* Identifies version of block */ \
|
||||
/* ------------------------ Version 0 ------------------------------- */ \
|
||||
PCRE2_SIZE input_offsets[2]; /* Matched portion of the input */ \
|
||||
PCRE2_SIZE output_offsets[2]; /* Changed portion of the output */ \
|
||||
/* ------------------------------------------------------------------ */ \
|
||||
} pcre2_substitute_callout_block;
|
||||
|
||||
|
||||
/* List the generic forms of all other functions in macros, which will be
|
||||
|
@ -605,6 +613,9 @@ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
|||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_callout(pcre2_match_context *, \
|
||||
int (*)(pcre2_callout_block *, void *), void *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_substitute_callout(pcre2_match_context *, \
|
||||
void (*)(pcre2_substitute_callout_block *, void *), void *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
|
@ -808,6 +819,7 @@ pcre2_compile are called by application code. */
|
|||
|
||||
#define pcre2_callout_block PCRE2_SUFFIX(pcre2_callout_block_)
|
||||
#define pcre2_callout_enumerate_block PCRE2_SUFFIX(pcre2_callout_enumerate_block_)
|
||||
#define pcre2_substitute_callout_block PCRE2_SUFFIX(pcre2_substitute_callout_block_)
|
||||
#define pcre2_general_context PCRE2_SUFFIX(pcre2_general_context_)
|
||||
#define pcre2_compile_context PCRE2_SUFFIX(pcre2_compile_context_)
|
||||
#define pcre2_convert_context PCRE2_SUFFIX(pcre2_convert_context_)
|
||||
|
@ -873,6 +885,7 @@ pcre2_compile are called by application code. */
|
|||
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
|
||||
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
|
||||
#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_)
|
||||
#define pcre2_set_substitute_callout PCRE2_SUFFIX(pcre2_set_substitute_callout_)
|
||||
#define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_)
|
||||
#define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_)
|
||||
#define pcre2_substring_copy_bynumber PCRE2_SUFFIX(pcre2_substring_copy_bynumber_)
|
||||
|
|
|
@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
|||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2017 University of Cambridge
|
||||
New API code Copyright (c) 2016-2018 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
|
@ -163,11 +163,13 @@ when no context is supplied to a match function. */
|
|||
const pcre2_match_context PRIV(default_match_context) = {
|
||||
{ default_malloc, default_free, NULL },
|
||||
#ifdef SUPPORT_JIT
|
||||
NULL,
|
||||
NULL,
|
||||
NULL, /* JIT callback */
|
||||
NULL, /* JIT callback data */
|
||||
#endif
|
||||
NULL,
|
||||
NULL,
|
||||
NULL, /* Callout function */
|
||||
NULL, /* Callout data */
|
||||
NULL, /* Substitute callout function */
|
||||
NULL, /* Substitute callout data */
|
||||
PCRE2_UNSET, /* Offset limit */
|
||||
HEAP_LIMIT,
|
||||
MATCH_LIMIT,
|
||||
|
@ -403,6 +405,16 @@ mcontext->callout_data = callout_data;
|
|||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_substitute_callout(pcre2_match_context *mcontext,
|
||||
void (*substitute_callout)(pcre2_substitute_callout_block *, void *),
|
||||
void *substitute_callout_data)
|
||||
{
|
||||
mcontext->substitute_callout = substitute_callout;
|
||||
mcontext->substitute_callout_data = substitute_callout_data;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_heap_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||
{
|
||||
|
|
|
@ -585,6 +585,8 @@ typedef struct pcre2_real_match_context {
|
|||
#endif
|
||||
int (*callout)(pcre2_callout_block *, void *);
|
||||
void *callout_data;
|
||||
void (*substitute_callout)(pcre2_substitute_callout_block *, void *);
|
||||
void *substitute_callout_data;
|
||||
PCRE2_SIZE offset_limit;
|
||||
uint32_t heap_limit;
|
||||
uint32_t match_limit;
|
||||
|
|
|
@ -239,7 +239,9 @@ PCRE2_SIZE extra_needed = 0;
|
|||
PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength;
|
||||
PCRE2_SIZE *ovector;
|
||||
PCRE2_SIZE ovecsave[3];
|
||||
pcre2_substitute_callout_block scb;
|
||||
|
||||
scb.version = 0;
|
||||
buff_offset = 0;
|
||||
lengthleft = buff_length = *blength;
|
||||
*blength = PCRE2_UNSET;
|
||||
|
@ -390,7 +392,12 @@ do
|
|||
rc = PCRE2_ERROR_INTERNAL_DUPMATCH;
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
|
||||
/* Save the match point for a possible callout */
|
||||
|
||||
scb.input_offsets[0] = ovector[0];
|
||||
scb.input_offsets[1] = ovector[1];
|
||||
|
||||
/* Count substitutions with a paranoid check for integer overflow; surely no
|
||||
real call to this function would ever hit this! */
|
||||
|
||||
|
@ -401,11 +408,13 @@ do
|
|||
}
|
||||
subs++;
|
||||
|
||||
/* Copy the text leading up to the match. */
|
||||
/* Copy the text leading up to the match, and remember where the insert
|
||||
begins. */
|
||||
|
||||
if (rc == 0) rc = ovector_count;
|
||||
fraglength = ovector[0] - start_offset;
|
||||
CHECKMEMCPY(subject + start_offset, fraglength);
|
||||
scb.output_offsets[0] = buff_offset;
|
||||
|
||||
/* Process the replacement string. Literal mode is set by \Q, but only in
|
||||
extended mode when backslashes are being interpreted. In extended mode we
|
||||
|
@ -821,10 +830,19 @@ do
|
|||
} /* End handling a literal code unit */
|
||||
} /* End of loop for scanning the replacement. */
|
||||
|
||||
/* The replacement has been copied to the output. Save the details of this
|
||||
match. See above for how this data is used. If we matched an empty string, do
|
||||
the magic for global matches. Finally, update the start offset to point to
|
||||
the rest of the subject string. */
|
||||
/* The replacement has been copied to the output, or its size has been
|
||||
remembered. Do the callout if there is one and we have done an actual
|
||||
replacement. */
|
||||
|
||||
if (!overflowed && mcontext->substitute_callout != NULL)
|
||||
{
|
||||
scb.output_offsets[1] = buff_offset;
|
||||
mcontext->substitute_callout(&scb, mcontext->substitute_callout_data);
|
||||
}
|
||||
|
||||
/* Save the details of this match. See above for how this data is used. If we
|
||||
matched an empty string, do the magic for global matches. Finally, update the
|
||||
start offset to point to the rest of the subject string. */
|
||||
|
||||
ovecsave[0] = ovector[0];
|
||||
ovecsave[1] = ovector[1];
|
||||
|
|
104
src/pcre2test.c
104
src/pcre2test.c
|
@ -484,14 +484,15 @@ so many of them that they are split into two fields. */
|
|||
|
||||
/* Second control word */
|
||||
|
||||
#define CTL2_SUBSTITUTE_EXTENDED 0x00000001u
|
||||
#define CTL2_SUBSTITUTE_OVERFLOW_LENGTH 0x00000002u
|
||||
#define CTL2_SUBSTITUTE_UNKNOWN_UNSET 0x00000004u
|
||||
#define CTL2_SUBSTITUTE_UNSET_EMPTY 0x00000008u
|
||||
#define CTL2_SUBJECT_LITERAL 0x00000010u
|
||||
#define CTL2_CALLOUT_NO_WHERE 0x00000020u
|
||||
#define CTL2_CALLOUT_EXTRA 0x00000040u
|
||||
#define CTL2_ALLVECTOR 0x00000080u
|
||||
#define CTL2_SUBSTITUTE_CALLOUT 0x00000001u
|
||||
#define CTL2_SUBSTITUTE_EXTENDED 0x00000002u
|
||||
#define CTL2_SUBSTITUTE_OVERFLOW_LENGTH 0x00000004u
|
||||
#define CTL2_SUBSTITUTE_UNKNOWN_UNSET 0x00000008u
|
||||
#define CTL2_SUBSTITUTE_UNSET_EMPTY 0x00000010u
|
||||
#define CTL2_SUBJECT_LITERAL 0x00000020u
|
||||
#define CTL2_CALLOUT_NO_WHERE 0x00000040u
|
||||
#define CTL2_CALLOUT_EXTRA 0x00000080u
|
||||
#define CTL2_ALLVECTOR 0x00000100u
|
||||
|
||||
#define CTL2_NL_SET 0x40000000u /* Informational */
|
||||
#define CTL2_BSR_SET 0x80000000u /* Informational */
|
||||
|
@ -511,7 +512,8 @@ different things in the two cases. */
|
|||
CTL_STARTCHAR|\
|
||||
CTL_UTF8_INPUT)
|
||||
|
||||
#define CTL2_ALLPD (CTL2_SUBSTITUTE_EXTENDED|\
|
||||
#define CTL2_ALLPD (CTL2_SUBSTITUTE_CALLOUT|\
|
||||
CTL2_SUBSTITUTE_EXTENDED|\
|
||||
CTL2_SUBSTITUTE_OVERFLOW_LENGTH|\
|
||||
CTL2_SUBSTITUTE_UNKNOWN_UNSET|\
|
||||
CTL2_SUBSTITUTE_UNSET_EMPTY|\
|
||||
|
@ -690,6 +692,7 @@ static modstruct modlist[] = {
|
|||
{ "startchar", MOD_PND, MOD_CTL, CTL_STARTCHAR, PO(control) },
|
||||
{ "startoffset", MOD_DAT, MOD_INT, 0, DO(offset) },
|
||||
{ "subject_literal", MOD_PATP, MOD_CTL, CTL2_SUBJECT_LITERAL, PO(control2) },
|
||||
{ "substitute_callout", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_CALLOUT, PO(control2) },
|
||||
{ "substitute_extended", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_EXTENDED, PO(control2) },
|
||||
{ "substitute_overflow_length", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_OVERFLOW_LENGTH, PO(control2) },
|
||||
{ "substitute_unknown_unset", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNKNOWN_UNSET, PO(control2) },
|
||||
|
@ -1355,6 +1358,17 @@ are supported. */
|
|||
else \
|
||||
pcre2_set_parens_nest_limit_32(G(a,32),b)
|
||||
|
||||
#define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
|
||||
if (test_mode == PCRE8_MODE) \
|
||||
pcre2_set_substitute_callout_8(G(a,8), \
|
||||
(void (*)(pcre2_substitute_callout_block_8 *, void *))b,c); \
|
||||
else if (test_mode == PCRE16_MODE) \
|
||||
pcre2_set_substitute_callout_16(G(a,16), \
|
||||
(void (*)(pcre2_substitute_callout_block_16 *, void *))b,c); \
|
||||
else \
|
||||
pcre2_set_substitute_callout_32(G(a,32), \
|
||||
(void (*)(pcre2_substitute_callout_block_32 *, void *))b,c)
|
||||
|
||||
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
|
||||
if (test_mode == PCRE8_MODE) \
|
||||
a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
|
||||
|
@ -1824,6 +1838,14 @@ the three different cases. */
|
|||
else \
|
||||
G(pcre2_set_parens_nest_limit_,BITTWO)(G(a,BITTWO),b)
|
||||
|
||||
#define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
|
||||
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
||||
G(pcre2_set_substitute_callout_,BITONE)(G(a,BITONE), \
|
||||
(void (*)(G(pcre2_substitute_callout_block_,BITONE) *, void *))b,c); \
|
||||
else \
|
||||
G(pcre2_set_substitute_callout_,BITTWO)(G(a,BITTWO), \
|
||||
(void (*)(G(pcre2_substitute_callout_block_,BITTWO) *, void *))b,c)
|
||||
|
||||
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
|
||||
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
||||
a = G(pcre2_substitute_,BITONE)(G(b,BITONE),(G(PCRE2_SPTR,BITONE))c,d,e,f, \
|
||||
|
@ -2025,6 +2047,9 @@ the three different cases. */
|
|||
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b)
|
||||
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b)
|
||||
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b)
|
||||
#define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
|
||||
pcre2_set_substitute_callout_8(G(a,8), \
|
||||
(void (*)(pcre2_substitute_callout_block_8 *, void *))b,c)
|
||||
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
|
||||
a = pcre2_substitute_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
|
||||
(PCRE2_SPTR8)i,j,(PCRE2_UCHAR8 *)k,l)
|
||||
|
@ -2129,6 +2154,9 @@ the three different cases. */
|
|||
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_16(G(a,16),b)
|
||||
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b)
|
||||
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b)
|
||||
#define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
|
||||
pcre2_set_substitute_callout_16(G(a,16), \
|
||||
(void (*)(pcre2_substitute_callout_block_16 *, void *))b,c)
|
||||
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
|
||||
a = pcre2_substitute_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \
|
||||
(PCRE2_SPTR16)i,j,(PCRE2_UCHAR16 *)k,l)
|
||||
|
@ -2221,7 +2249,7 @@ the three different cases. */
|
|||
#define PCRE2_SERIALIZE_GET_NUMBER_OF_CODES(r,a) \
|
||||
r = pcre2_serialize_get_number_of_codes_32(a)
|
||||
#define PCRE2_SET_CALLOUT(a,b,c) \
|
||||
pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c);
|
||||
pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c)
|
||||
#define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b)
|
||||
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
|
||||
pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
|
||||
|
@ -2233,6 +2261,9 @@ the three different cases. */
|
|||
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_32(G(a,32),b)
|
||||
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b)
|
||||
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b)
|
||||
#define PCRE2_SET_SUBSTITUTE_CALLOUT(a,b,c) \
|
||||
pcre2_set_substitute_callout_32(G(a,32), \
|
||||
(void (*)(pcre2_substitute_callout_block_32 *, void *))b,c)
|
||||
#define PCRE2_SUBSTITUTE(a,b,c,d,e,f,g,h,i,j,k,l) \
|
||||
a = pcre2_substitute_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \
|
||||
(PCRE2_SPTR32)i,j,(PCRE2_UCHAR32 *)k,l)
|
||||
|
@ -4022,7 +4053,7 @@ Returns: nothing
|
|||
static void
|
||||
show_controls(uint32_t controls, uint32_t controls2, const char *before)
|
||||
{
|
||||
fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
before,
|
||||
((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
|
||||
((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
|
||||
|
@ -4058,6 +4089,7 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s
|
|||
((controls & CTL_PUSHCOPY) != 0)? " pushcopy" : "",
|
||||
((controls & CTL_PUSHTABLESCOPY) != 0)? " pushtablescopy" : "",
|
||||
((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
|
||||
((controls2 & CTL2_SUBSTITUTE_CALLOUT) != 0)? " substitute_callout" : "",
|
||||
((controls2 & CTL2_SUBSTITUTE_EXTENDED) != 0)? " substitute_extended" : "",
|
||||
((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "",
|
||||
((controls2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) != 0)? " substitute_unknown_unset" : "",
|
||||
|
@ -5896,6 +5928,35 @@ return capcount;
|
|||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Substitute callout function *
|
||||
*************************************************/
|
||||
|
||||
/* Called from pcre2_substitute() when the substitute_callout modifier is set.
|
||||
Print out the data that is passed back. The substitute callout block is
|
||||
identical for all code unit widths, so we just pick one.
|
||||
|
||||
Arguments:
|
||||
scb pointer to substitute callout block
|
||||
data_ptr callout data
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
substitute_callout_function(pcre2_substitute_callout_block_8 *scb,
|
||||
void *data_ptr)
|
||||
{
|
||||
(void)data_ptr; /* Not used */
|
||||
fprintf(outfile, "Old %" SIZ_FORM " %" SIZ_FORM " New %" SIZ_FORM
|
||||
" %" SIZ_FORM "\n",
|
||||
SIZ_CAST scb->input_offsets[0],
|
||||
SIZ_CAST scb->input_offsets[1],
|
||||
SIZ_CAST scb->output_offsets[0],
|
||||
SIZ_CAST scb->output_offsets[1]);
|
||||
}
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Callout function *
|
||||
*************************************************/
|
||||
|
@ -5907,8 +5968,11 @@ callout block for different code unit widths are that the pointers to the
|
|||
subject, the most recent MARK, and a callout argument string point to strings
|
||||
of the appropriate width. Casts can be used to deal with this.
|
||||
|
||||
Argument: a pointer to a callout block
|
||||
Return:
|
||||
Arguments:
|
||||
cb a pointer to a callout block
|
||||
callout_data_ptr the provided callout data
|
||||
|
||||
Returns: 0 or 1 or an error, as determined by settings
|
||||
*/
|
||||
|
||||
static int
|
||||
|
@ -6779,8 +6843,8 @@ if (pat_patctl.replacement[0] != 0)
|
|||
return PR_OK;
|
||||
}
|
||||
if ((dat_datctl.control & CTL_ALLCAPTURES) != 0)
|
||||
fprintf(outfile, "** Ignored with replacement text: allcaptures\n");
|
||||
}
|
||||
fprintf(outfile, "** Ignored with replacement text: allcaptures\n");
|
||||
}
|
||||
|
||||
/* Warn for modifiers that are ignored for DFA. */
|
||||
|
||||
|
@ -7158,6 +7222,16 @@ if (dat_datctl.replacement[0] != 0)
|
|||
rlen = PCRE2_ZERO_TERMINATED;
|
||||
else
|
||||
rlen = (CASTVAR(uint8_t *, r) - rbuffer)/code_unit_size;
|
||||
|
||||
if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CALLOUT) != 0)
|
||||
{
|
||||
PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, substitute_callout_function, NULL);
|
||||
}
|
||||
else
|
||||
{
|
||||
PCRE2_SET_SUBSTITUTE_CALLOUT(dat_context, NULL, NULL); /* No callout */
|
||||
}
|
||||
|
||||
PCRE2_SUBSTITUTE(rc, compiled_code, pp, arg_ulen, dat_datctl.offset,
|
||||
dat_datctl.options|xoptions, match_data, dat_context,
|
||||
rbuffer, rlen, nbuffer, &nsize);
|
||||
|
|
|
@ -475,5 +475,10 @@
|
|||
\x{100}
|
||||
\= Expect no match
|
||||
aaa
|
||||
|
||||
# Offsets are different in 8-bit mode.
|
||||
|
||||
/(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout
|
||||
123abcáyzabcdef789abcሴqr
|
||||
|
||||
# End of testinput10
|
||||
|
|
|
@ -381,5 +381,10 @@
|
|||
\x{100}
|
||||
\= Expect no match
|
||||
aaa
|
||||
|
||||
# Offsets are different in 8-bit mode.
|
||||
|
||||
/(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout
|
||||
123abcáyzabcdef789abcሴqr
|
||||
|
||||
# End of testinput12
|
||||
|
|
|
@ -5514,4 +5514,7 @@ a)"xI
|
|||
abcdef\=ovector=4
|
||||
abxyz\=ovector=4
|
||||
|
||||
/a(b)c|xyz/g,replace=<$0>,substitute_callout
|
||||
abcdefabcpqr
|
||||
|
||||
# End of testinput2
|
||||
|
|
|
@ -1625,5 +1625,15 @@ Subject length lower bound = 1
|
|||
\= Expect no match
|
||||
aaa
|
||||
No match
|
||||
|
||||
# Offsets are different in 8-bit mode.
|
||||
|
||||
/(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout
|
||||
123abcáyzabcdef789abcሴqr
|
||||
Old 6 6 New 6 8
|
||||
Old 13 13 New 15 17
|
||||
Old 13 16 New 17 22
|
||||
Old 22 22 New 28 30
|
||||
4: 123abc<>\x{e1}yzabc<><def>789abc<>\x{1234}qr
|
||||
|
||||
# End of testinput10
|
||||
|
|
|
@ -1470,5 +1470,15 @@ Subject length lower bound = 1
|
|||
\= Expect no match
|
||||
aaa
|
||||
No match
|
||||
|
||||
# Offsets are different in 8-bit mode.
|
||||
|
||||
/(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout
|
||||
123abcáyzabcdef789abcሴqr
|
||||
Old 6 6 New 6 8
|
||||
Old 12 12 New 14 16
|
||||
Old 12 15 New 16 21
|
||||
Old 21 21 New 27 29
|
||||
4: 123abc<>\x{e1}yzabc<><def>789abc<>\x{1234}qr
|
||||
|
||||
# End of testinput12
|
||||
|
|
|
@ -1467,5 +1467,15 @@ Subject length lower bound = 1
|
|||
\= Expect no match
|
||||
aaa
|
||||
No match
|
||||
|
||||
# Offsets are different in 8-bit mode.
|
||||
|
||||
/(?<=abc)(|def)/g,utf,replace=<$0>,substitute_callout
|
||||
123abcáyzabcdef789abcሴqr
|
||||
Old 6 6 New 6 8
|
||||
Old 12 12 New 14 16
|
||||
Old 12 15 New 16 21
|
||||
Old 21 21 New 27 29
|
||||
4: 123abc<>\x{e1}yzabc<><def>789abc<>\x{1234}qr
|
||||
|
||||
# End of testinput12
|
||||
|
|
|
@ -16795,6 +16795,12 @@ Subject length lower bound = 1
|
|||
2: <unchanged>
|
||||
3: <unchanged>
|
||||
|
||||
/a(b)c|xyz/g,replace=<$0>,substitute_callout
|
||||
abcdefabcpqr
|
||||
Old 0 3 New 0 5
|
||||
Old 6 9 New 8 13
|
||||
2: <abc>def<abc>pqr
|
||||
|
||||
# End of testinput2
|
||||
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
|
||||
Error -62: bad serialized data
|
||||
|
|
Loading…
Reference in New Issue