Implement pcre2_callout_enumerate().
This commit is contained in:
parent
b15698b077
commit
4e61019ffe
|
@ -10,7 +10,9 @@ Version 10.20 xx-xx-2015
|
||||||
|
|
||||||
3. The invalid pattern (?(?C) has a missing assertion condition at the end. The
|
3. The invalid pattern (?(?C) has a missing assertion condition at the end. The
|
||||||
pcre2_compile() function read past the end of the input before diagnosing an
|
pcre2_compile() function read past the end of the input before diagnosing an
|
||||||
error.
|
error. This bug was discovered by the LLVM fuzzer.
|
||||||
|
|
||||||
|
4. Implemented pcre2_callout_enumerate().
|
||||||
|
|
||||||
|
|
||||||
Version 10.10 06-March-2015
|
Version 10.10 06-March-2015
|
||||||
|
|
|
@ -24,6 +24,7 @@ dist_html_DATA = \
|
||||||
doc/html/index.html \
|
doc/html/index.html \
|
||||||
doc/html/pcre2-config.html \
|
doc/html/pcre2-config.html \
|
||||||
doc/html/pcre2.html \
|
doc/html/pcre2.html \
|
||||||
|
doc/html/pcre2_callout_enumerate.html \
|
||||||
doc/html/pcre2_code_free.html \
|
doc/html/pcre2_code_free.html \
|
||||||
doc/html/pcre2_compile.html \
|
doc/html/pcre2_compile.html \
|
||||||
doc/html/pcre2_compile_context_copy.html \
|
doc/html/pcre2_compile_context_copy.html \
|
||||||
|
@ -102,6 +103,7 @@ dist_html_DATA = \
|
||||||
dist_man_MANS = \
|
dist_man_MANS = \
|
||||||
doc/pcre2-config.1 \
|
doc/pcre2-config.1 \
|
||||||
doc/pcre2.3 \
|
doc/pcre2.3 \
|
||||||
|
doc/pcre2_callout_enumerate.3 \
|
||||||
doc/pcre2_code_free.3 \
|
doc/pcre2_code_free.3 \
|
||||||
doc/pcre2_compile.3 \
|
doc/pcre2_compile.3 \
|
||||||
doc/pcre2_compile_context_copy.3 \
|
doc/pcre2_compile_context_copy.3 \
|
||||||
|
|
|
@ -88,6 +88,9 @@ in the library.
|
||||||
|
|
||||||
<table>
|
<table>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre2_callout_enumerate.html">pcre2_callout_enumerate</a></td>
|
||||||
|
<td> Enumerate callouts in a compiled pattern</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre2_code_free.html">pcre2_code_free</a></td>
|
<tr><td><a href="pcre2_code_free.html">pcre2_code_free</a></td>
|
||||||
<td> Free a compiled pattern</td></tr>
|
<td> Free a compiled pattern</td></tr>
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,62 @@
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>pcre2_callout_enumerate specification</title>
|
||||||
|
</head>
|
||||||
|
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||||
|
<h1>pcre2_callout_enumerate man page</h1>
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
This page is part of the PCRE2 HTML documentation. It was generated
|
||||||
|
automatically from the original man page. If there is any nonsense in it,
|
||||||
|
please consult the man page, in case the conversion went wrong.
|
||||||
|
<br>
|
||||||
|
<br><b>
|
||||||
|
SYNOPSIS
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
<b>#include <pcre2.h></b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre2_callout_enumerate(const pcre2_code *<i>code</i>,</b>
|
||||||
|
<b> int (*<i>callback</i>)(pcre2_callout_enumerate_block *, void *),</b>
|
||||||
|
<b> void *<i>callout_data</i>);</b>
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
DESCRIPTION
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
This function scans a compiled regular expression and calls the <i>callback()</i>
|
||||||
|
function for each callout within the pattern. The yield of the function is zero
|
||||||
|
for success and non-zero otherwise. The arguments are:
|
||||||
|
<pre>
|
||||||
|
<i>code</i> Points to the compiled pattern
|
||||||
|
<i>callback</i> The callback function
|
||||||
|
<i>callout_data</i> User data that is passed to the callback
|
||||||
|
</pre>
|
||||||
|
The <i>callback()</i> function is passed a pointer to a data block containing
|
||||||
|
the following fields:
|
||||||
|
<pre>
|
||||||
|
<i>version</i> Block version number
|
||||||
|
<i>pattern_position</i> Offset to next item in pattern
|
||||||
|
<i>next_item_length</i> Length of next item in pattern
|
||||||
|
<i>callout_number</i> Number for numbered callouts
|
||||||
|
<i>callout_string_offset</i> Offset to string within pattern
|
||||||
|
<i>callout_string_length</i> Length of callout string
|
||||||
|
<i>callout_string</i> Points to callout string or is NULL
|
||||||
|
</pre>
|
||||||
|
The second argument is the callout data that was passed to
|
||||||
|
<b>pcre2_callout_enumerate()</b>. The <b>callback()</b> function must return zero
|
||||||
|
for success. Any other value causes the pattern scan to stop, with the value
|
||||||
|
being passed back as the result of <b>pcre2_callout_enumerate()</b>.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
There is a complete description of the PCRE2 native API in the
|
||||||
|
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||||
|
page and a description of the POSIX API in the
|
||||||
|
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||||
|
page.
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||||
|
</p>
|
|
@ -35,23 +35,24 @@ please consult the man page, in case the conversion went wrong.
|
||||||
<li><a name="TOC20" href="#SEC20">JUST-IN-TIME (JIT) COMPILATION</a>
|
<li><a name="TOC20" href="#SEC20">JUST-IN-TIME (JIT) COMPILATION</a>
|
||||||
<li><a name="TOC21" href="#SEC21">LOCALE SUPPORT</a>
|
<li><a name="TOC21" href="#SEC21">LOCALE SUPPORT</a>
|
||||||
<li><a name="TOC22" href="#SEC22">INFORMATION ABOUT A COMPILED PATTERN</a>
|
<li><a name="TOC22" href="#SEC22">INFORMATION ABOUT A COMPILED PATTERN</a>
|
||||||
<li><a name="TOC23" href="#SEC23">SERIALIZATION AND PRECOMPILING</a>
|
<li><a name="TOC23" href="#SEC23">INFORMATION ABOUT A PATTERN'S CALLOUTS</a>
|
||||||
<li><a name="TOC24" href="#SEC24">THE MATCH DATA BLOCK</a>
|
<li><a name="TOC24" href="#SEC24">SERIALIZATION AND PRECOMPILING</a>
|
||||||
<li><a name="TOC25" href="#SEC25">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a>
|
<li><a name="TOC25" href="#SEC25">THE MATCH DATA BLOCK</a>
|
||||||
<li><a name="TOC26" href="#SEC26">NEWLINE HANDLING WHEN MATCHING</a>
|
<li><a name="TOC26" href="#SEC26">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a>
|
||||||
<li><a name="TOC27" href="#SEC27">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a>
|
<li><a name="TOC27" href="#SEC27">NEWLINE HANDLING WHEN MATCHING</a>
|
||||||
<li><a name="TOC28" href="#SEC28">OTHER INFORMATION ABOUT A MATCH</a>
|
<li><a name="TOC28" href="#SEC28">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a>
|
||||||
<li><a name="TOC29" href="#SEC29">ERROR RETURNS FROM <b>pcre2_match()</b></a>
|
<li><a name="TOC29" href="#SEC29">OTHER INFORMATION ABOUT A MATCH</a>
|
||||||
<li><a name="TOC30" href="#SEC30">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a>
|
<li><a name="TOC30" href="#SEC30">ERROR RETURNS FROM <b>pcre2_match()</b></a>
|
||||||
<li><a name="TOC31" href="#SEC31">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a>
|
<li><a name="TOC31" href="#SEC31">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a>
|
||||||
<li><a name="TOC32" href="#SEC32">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a>
|
<li><a name="TOC32" href="#SEC32">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a>
|
||||||
<li><a name="TOC33" href="#SEC33">CREATING A NEW STRING WITH SUBSTITUTIONS</a>
|
<li><a name="TOC33" href="#SEC33">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a>
|
||||||
<li><a name="TOC34" href="#SEC34">DUPLICATE SUBPATTERN NAMES</a>
|
<li><a name="TOC34" href="#SEC34">CREATING A NEW STRING WITH SUBSTITUTIONS</a>
|
||||||
<li><a name="TOC35" href="#SEC35">FINDING ALL POSSIBLE MATCHES AT ONE POSITION</a>
|
<li><a name="TOC35" href="#SEC35">DUPLICATE SUBPATTERN NAMES</a>
|
||||||
<li><a name="TOC36" href="#SEC36">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a>
|
<li><a name="TOC36" href="#SEC36">FINDING ALL POSSIBLE MATCHES AT ONE POSITION</a>
|
||||||
<li><a name="TOC37" href="#SEC37">SEE ALSO</a>
|
<li><a name="TOC37" href="#SEC37">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a>
|
||||||
<li><a name="TOC38" href="#SEC38">AUTHOR</a>
|
<li><a name="TOC38" href="#SEC38">SEE ALSO</a>
|
||||||
<li><a name="TOC39" href="#SEC39">REVISION</a>
|
<li><a name="TOC39" href="#SEC39">AUTHOR</a>
|
||||||
|
<li><a name="TOC40" href="#SEC40">REVISION</a>
|
||||||
</ul>
|
</ul>
|
||||||
<P>
|
<P>
|
||||||
<b>#include <pcre2.h></b>
|
<b>#include <pcre2.h></b>
|
||||||
|
@ -291,6 +292,11 @@ document for an overview of all the PCRE2 documentation.
|
||||||
<b>int pcre2_pattern_info(const pcre2 *<i>code</i>, uint32_t <i>what</i>, void *<i>where</i>);</b>
|
<b>int pcre2_pattern_info(const pcre2 *<i>code</i>, uint32_t <i>what</i>, void *<i>where</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
|
<b>int pcre2_callout_enumerate(const pcre2_code *<i>code</i>,</b>
|
||||||
|
<b> int (*<i>callback</i>)(pcre2_callout_enumerate_block *, void *),</b>
|
||||||
|
<b> void *<i>user_data</i>);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
<b>int pcre2_config(uint32_t <i>what</i>, void *<i>where</i>);</b>
|
<b>int pcre2_config(uint32_t <i>what</i>, void *<i>where</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC11" href="#TOC1">PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES</a><br>
|
<br><a name="SEC11" href="#TOC1">PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES</a><br>
|
||||||
|
@ -1433,14 +1439,16 @@ can be processed in different locales.
|
||||||
<b>int pcre2_pattern_info(const pcre2 *<i>code</i>, uint32_t <i>what</i>, void *<i>where</i>);</b>
|
<b>int pcre2_pattern_info(const pcre2 *<i>code</i>, uint32_t <i>what</i>, void *<i>where</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <b>pcre2_pattern_info()</b> function returns information about a compiled
|
The <b>pcre2_pattern_info()</b> function returns general information about a
|
||||||
pattern. The first argument is a pointer to the compiled pattern. The second
|
compiled pattern. For information about callouts, see the
|
||||||
argument specifies which piece of information is required, and the third
|
<a href="pcre2pattern.html#infoaboutcallouts">next section.</a>
|
||||||
argument is a pointer to a variable to receive the data. If the third argument
|
The first argument for <b>pcre2_pattern_info()</b> is a pointer to the compiled
|
||||||
is NULL, the first argument is ignored, and the function returns the size in
|
pattern. The second argument specifies which piece of information is required,
|
||||||
bytes of the variable that is required for the information requested.
|
and the third argument is a pointer to a variable to receive the data. If the
|
||||||
Otherwise, The yield of the function is zero for success, or one of the
|
third argument is NULL, the first argument is ignored, and the function returns
|
||||||
following negative numbers:
|
the size in bytes of the variable that is required for the information
|
||||||
|
requested. Otherwise, The yield of the function is zero for success, or one of
|
||||||
|
the following negative numbers:
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_ERROR_NULL the argument <i>code</i> was NULL
|
PCRE2_ERROR_NULL the argument <i>code</i> was NULL
|
||||||
PCRE2_ERROR_BADMAGIC the "magic number" was not found
|
PCRE2_ERROR_BADMAGIC the "magic number" was not found
|
||||||
|
@ -1719,8 +1727,27 @@ memory in which to place the compiled pattern may be slightly larger than the
|
||||||
value returned by this option, because there are cases where the code that
|
value returned by this option, because there are cases where the code that
|
||||||
calculates the size has to over-estimate. Processing a pattern with the JIT
|
calculates the size has to over-estimate. Processing a pattern with the JIT
|
||||||
compiler does not alter the value returned by this option.
|
compiler does not alter the value returned by this option.
|
||||||
|
<a name="infoaboutcallouts"></a></P>
|
||||||
|
<br><a name="SEC23" href="#TOC1">INFORMATION ABOUT A PATTERN'S CALLOUTS</a><br>
|
||||||
|
<P>
|
||||||
|
<b>int pcre2_callout_enumerate(const pcre2_code *<i>code</i>,</b>
|
||||||
|
<b> int (*<i>callback</i>)(pcre2_callout_enumerate_block *, void *),</b>
|
||||||
|
<b> void *<i>user_data</i>);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
A script language that supports the use of string arguments in callouts might
|
||||||
|
like to scan all the callouts in a pattern before running the match. This can
|
||||||
|
be done by calling <b>pcre2_callout_enumerate()</b>. The first argument is a
|
||||||
|
pointer to a compiled pattern, the second points to a callback function, and
|
||||||
|
the third is arbitrary user data. The callback function is called for every
|
||||||
|
callout in the pattern in the order in which they appear. Its first argument is
|
||||||
|
a pointer to a callout enumeration block, and its second argument is the
|
||||||
|
<i>user_data</i> value that was passed to <b>pcre2_callout_enumerate()</b>. The
|
||||||
|
contents of the callout enumeration block are described in the
|
||||||
|
<a href="pcre2callout.html"><b>pcre2callout</b></a>
|
||||||
|
documentation, which also gives further details about callouts.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC23" href="#TOC1">SERIALIZATION AND PRECOMPILING</a><br>
|
<br><a name="SEC24" href="#TOC1">SERIALIZATION AND PRECOMPILING</a><br>
|
||||||
<P>
|
<P>
|
||||||
It is possible to save compiled patterns on disc or elsewhere, and reload them
|
It is possible to save compiled patterns on disc or elsewhere, and reload them
|
||||||
later, subject to a number of restrictions. The functions whose names begin
|
later, subject to a number of restrictions. The functions whose names begin
|
||||||
|
@ -1729,7 +1756,7 @@ the
|
||||||
<a href="pcre2serialize.html"><b>pcre2serialize</b></a>
|
<a href="pcre2serialize.html"><b>pcre2serialize</b></a>
|
||||||
documentation.
|
documentation.
|
||||||
<a name="matchdatablock"></a></P>
|
<a name="matchdatablock"></a></P>
|
||||||
<br><a name="SEC24" href="#TOC1">THE MATCH DATA BLOCK</a><br>
|
<br><a name="SEC25" href="#TOC1">THE MATCH DATA BLOCK</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre2_match_data_create(uint32_t <i>ovecsize</i>,</b>
|
<b>pcre2_match_data_create(uint32_t <i>ovecsize</i>,</b>
|
||||||
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
||||||
|
@ -1800,7 +1827,7 @@ match data block (for that match) have taken place.
|
||||||
When a match data block itself is no longer needed, it should be freed by
|
When a match data block itself is no longer needed, it should be freed by
|
||||||
calling <b>pcre2_match_data_free()</b>.
|
calling <b>pcre2_match_data_free()</b>.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC25" href="#TOC1">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a><br>
|
<br><a name="SEC26" href="#TOC1">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre2_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
<b>int pcre2_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
||||||
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
||||||
|
@ -2014,7 +2041,7 @@ examples, in the
|
||||||
<a href="pcre2partial.html"><b>pcre2partial</b></a>
|
<a href="pcre2partial.html"><b>pcre2partial</b></a>
|
||||||
documentation.
|
documentation.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC26" href="#TOC1">NEWLINE HANDLING WHEN MATCHING</a><br>
|
<br><a name="SEC27" href="#TOC1">NEWLINE HANDLING WHEN MATCHING</a><br>
|
||||||
<P>
|
<P>
|
||||||
When PCRE2 is built, a default newline convention is set; this is usually the
|
When PCRE2 is built, a default newline convention is set; this is usually the
|
||||||
standard convention for the operating system. The default can be overridden in
|
standard convention for the operating system. The default can be overridden in
|
||||||
|
@ -2049,7 +2076,7 @@ LF in the characters that it matches.
|
||||||
Notwithstanding the above, anomalous effects may still occur when CRLF is a
|
Notwithstanding the above, anomalous effects may still occur when CRLF is a
|
||||||
valid newline sequence and explicit \r or \n escapes appear in the pattern.
|
valid newline sequence and explicit \r or \n escapes appear in the pattern.
|
||||||
<a name="matchedstrings"></a></P>
|
<a name="matchedstrings"></a></P>
|
||||||
<br><a name="SEC27" href="#TOC1">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a><br>
|
<br><a name="SEC28" href="#TOC1">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>uint32_t pcre2_get_ovector_count(pcre2_match_data *<i>match_data</i>);</b>
|
<b>uint32_t pcre2_get_ovector_count(pcre2_match_data *<i>match_data</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
|
@ -2151,7 +2178,7 @@ parentheses, no more than <i>ovector[0]</i> to <i>ovector[2n+1]</i> are set by
|
||||||
<b>pcre2_match()</b>. The other elements retain whatever values they previously
|
<b>pcre2_match()</b>. The other elements retain whatever values they previously
|
||||||
had.
|
had.
|
||||||
<a name="matchotherdata"></a></P>
|
<a name="matchotherdata"></a></P>
|
||||||
<br><a name="SEC28" href="#TOC1">OTHER INFORMATION ABOUT A MATCH</a><br>
|
<br><a name="SEC29" href="#TOC1">OTHER INFORMATION ABOUT A MATCH</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>PCRE2_SPTR pcre2_get_mark(pcre2_match_data *<i>match_data</i>);</b>
|
<b>PCRE2_SPTR pcre2_get_mark(pcre2_match_data *<i>match_data</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
|
@ -2195,7 +2222,7 @@ the code unit offset of the invalid UTF character. Details are given in the
|
||||||
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
|
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
|
||||||
page.
|
page.
|
||||||
<a name="errorlist"></a></P>
|
<a name="errorlist"></a></P>
|
||||||
<br><a name="SEC29" href="#TOC1">ERROR RETURNS FROM <b>pcre2_match()</b></a><br>
|
<br><a name="SEC30" href="#TOC1">ERROR RETURNS FROM <b>pcre2_match()</b></a><br>
|
||||||
<P>
|
<P>
|
||||||
If <b>pcre2_match()</b> fails, it returns a negative number. This can be
|
If <b>pcre2_match()</b> fails, it returns a negative number. This can be
|
||||||
converted to a text string by calling <b>pcre2_get_error_message()</b>. Negative
|
converted to a text string by calling <b>pcre2_get_error_message()</b>. Negative
|
||||||
|
@ -2246,8 +2273,8 @@ of the subject.
|
||||||
PCRE2_ERROR_CALLOUT
|
PCRE2_ERROR_CALLOUT
|
||||||
</pre>
|
</pre>
|
||||||
This error is never generated by <b>pcre2_match()</b> itself. It is provided for
|
This error is never generated by <b>pcre2_match()</b> itself. It is provided for
|
||||||
use by callout functions that want to cause <b>pcre2_match()</b> to return a
|
use by callout functions that want to cause <b>pcre2_match()</b> or
|
||||||
distinctive error code. See the
|
<b>pcre2_callout_enumerate()</b> to return a distinctive error code. See the
|
||||||
<a href="pcre2callout.html"><b>pcre2callout</b></a>
|
<a href="pcre2callout.html"><b>pcre2callout</b></a>
|
||||||
documentation for details.
|
documentation for details.
|
||||||
<pre>
|
<pre>
|
||||||
|
@ -2304,7 +2331,7 @@ is attempted.
|
||||||
</pre>
|
</pre>
|
||||||
The internal recursion limit was reached.
|
The internal recursion limit was reached.
|
||||||
<a name="extractbynumber"></a></P>
|
<a name="extractbynumber"></a></P>
|
||||||
<br><a name="SEC30" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a><br>
|
<br><a name="SEC31" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre2_substring_length_bynumber(pcre2_match_data *<i>match_data</i>,</b>
|
<b>int pcre2_substring_length_bynumber(pcre2_match_data *<i>match_data</i>,</b>
|
||||||
<b> uint32_t <i>number</i>, PCRE2_SIZE *<i>length</i>);</b>
|
<b> uint32_t <i>number</i>, PCRE2_SIZE *<i>length</i>);</b>
|
||||||
|
@ -2401,7 +2428,7 @@ The substring did not participate in the match. For example, if the pattern is
|
||||||
(abc)|(def) and the subject is "def", and the ovector contains at least two
|
(abc)|(def) and the subject is "def", and the ovector contains at least two
|
||||||
capturing slots, substring number 1 is unset.
|
capturing slots, substring number 1 is unset.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC31" href="#TOC1">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a><br>
|
<br><a name="SEC32" href="#TOC1">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre2_substring_list_get(pcre2_match_data *<i>match_data</i>,</b>
|
<b>int pcre2_substring_list_get(pcre2_match_data *<i>match_data</i>,</b>
|
||||||
<b>" PCRE2_UCHAR ***<i>listptr</i>, PCRE2_SIZE **<i>lengthsptr</i>);</b>
|
<b>" PCRE2_UCHAR ***<i>listptr</i>, PCRE2_SIZE **<i>lengthsptr</i>);</b>
|
||||||
|
@ -2440,7 +2467,7 @@ can be distinguished from a genuine zero-length substring by inspecting the
|
||||||
appropriate offset in the ovector, which contain PCRE2_UNSET for unset
|
appropriate offset in the ovector, which contain PCRE2_UNSET for unset
|
||||||
substrings, or by calling <b>pcre2_substring_length_bynumber()</b>.
|
substrings, or by calling <b>pcre2_substring_length_bynumber()</b>.
|
||||||
<a name="extractbyname"></a></P>
|
<a name="extractbyname"></a></P>
|
||||||
<br><a name="SEC32" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a><br>
|
<br><a name="SEC33" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre2_substring_number_from_name(const pcre2_code *<i>code</i>,</b>
|
<b>int pcre2_substring_number_from_name(const pcre2_code *<i>code</i>,</b>
|
||||||
<b> PCRE2_SPTR <i>name</i>);</b>
|
<b> PCRE2_SPTR <i>name</i>);</b>
|
||||||
|
@ -2500,7 +2527,7 @@ names are not included in the compiled code. The matching process uses only
|
||||||
numbers. For this reason, the use of different names for subpatterns of the
|
numbers. For this reason, the use of different names for subpatterns of the
|
||||||
same number causes an error at compile time.
|
same number causes an error at compile time.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC33" href="#TOC1">CREATING A NEW STRING WITH SUBSTITUTIONS</a><br>
|
<br><a name="SEC34" href="#TOC1">CREATING A NEW STRING WITH SUBSTITUTIONS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre2_substitute(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
<b>int pcre2_substitute(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
||||||
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
||||||
|
@ -2561,7 +2588,7 @@ straight back. PCRE2_ERROR_BADREPLACEMENT is returned for an invalid
|
||||||
replacement string (unrecognized sequence following a dollar sign), and
|
replacement string (unrecognized sequence following a dollar sign), and
|
||||||
PCRE2_ERROR_NOMEMORY is returned if the output buffer is not big enough.
|
PCRE2_ERROR_NOMEMORY is returned if the output buffer is not big enough.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC34" href="#TOC1">DUPLICATE SUBPATTERN NAMES</a><br>
|
<br><a name="SEC35" href="#TOC1">DUPLICATE SUBPATTERN NAMES</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre2_substring_nametable_scan(const pcre2_code *<i>code</i>,</b>
|
<b>int pcre2_substring_nametable_scan(const pcre2_code *<i>code</i>,</b>
|
||||||
<b> PCRE2_SPTR <i>name</i>, PCRE2_SPTR *<i>first</i>, PCRE2_SPTR *<i>last</i>);</b>
|
<b> PCRE2_SPTR <i>name</i>, PCRE2_SPTR *<i>first</i>, PCRE2_SPTR *<i>last</i>);</b>
|
||||||
|
@ -2606,7 +2633,7 @@ The format of the name table is described above in the section entitled
|
||||||
Given all the relevant entries for the name, you can extract each of their
|
Given all the relevant entries for the name, you can extract each of their
|
||||||
numbers, and hence the captured data.
|
numbers, and hence the captured data.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC35" href="#TOC1">FINDING ALL POSSIBLE MATCHES AT ONE POSITION</a><br>
|
<br><a name="SEC36" href="#TOC1">FINDING ALL POSSIBLE MATCHES AT ONE POSITION</a><br>
|
||||||
<P>
|
<P>
|
||||||
The traditional matching function uses a similar algorithm to Perl, which stops
|
The traditional matching function uses a similar algorithm to Perl, which stops
|
||||||
when it finds the first match at a given point in the subject. If you want to
|
when it finds the first match at a given point in the subject. If you want to
|
||||||
|
@ -2624,7 +2651,7 @@ substring. Then return 1, which forces <b>pcre2_match()</b> to backtrack and try
|
||||||
other alternatives. Ultimately, when it runs out of matches,
|
other alternatives. Ultimately, when it runs out of matches,
|
||||||
<b>pcre2_match()</b> will yield PCRE2_ERROR_NOMATCH.
|
<b>pcre2_match()</b> will yield PCRE2_ERROR_NOMATCH.
|
||||||
<a name="dfamatch"></a></P>
|
<a name="dfamatch"></a></P>
|
||||||
<br><a name="SEC36" href="#TOC1">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a><br>
|
<br><a name="SEC37" href="#TOC1">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre2_dfa_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
<b>int pcre2_dfa_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
||||||
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
||||||
|
@ -2819,13 +2846,13 @@ some plausibility checks are made on the contents of the workspace, which
|
||||||
should contain data about the previous partial match. If any of these checks
|
should contain data about the previous partial match. If any of these checks
|
||||||
fail, this error is given.
|
fail, this error is given.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC37" href="#TOC1">SEE ALSO</a><br>
|
<br><a name="SEC38" href="#TOC1">SEE ALSO</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre2build</b>(3), <b>pcre2callout</b>(3), <b>pcre2demo(3)</b>,
|
<b>pcre2build</b>(3), <b>pcre2callout</b>(3), <b>pcre2demo(3)</b>,
|
||||||
<b>pcre2matching</b>(3), <b>pcre2partial</b>(3), <b>pcre2posix</b>(3),
|
<b>pcre2matching</b>(3), <b>pcre2partial</b>(3), <b>pcre2posix</b>(3),
|
||||||
<b>pcre2sample</b>(3), <b>pcre2stack</b>(3), <b>pcre2unicode</b>(3).
|
<b>pcre2sample</b>(3), <b>pcre2stack</b>(3), <b>pcre2unicode</b>(3).
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC38" href="#TOC1">AUTHOR</a><br>
|
<br><a name="SEC39" href="#TOC1">AUTHOR</a><br>
|
||||||
<P>
|
<P>
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
<br>
|
<br>
|
||||||
|
@ -2834,9 +2861,9 @@ University Computing Service
|
||||||
Cambridge, England.
|
Cambridge, England.
|
||||||
<br>
|
<br>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC39" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC40" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 23 January 2015
|
Last updated: 23 March 2015
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2015 University of Cambridge.
|
Copyright © 1997-2015 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -17,9 +17,10 @@ please consult the man page, in case the conversion went wrong.
|
||||||
<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
|
<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
|
||||||
<li><a name="TOC3" href="#SEC3">MISSING CALLOUTS</a>
|
<li><a name="TOC3" href="#SEC3">MISSING CALLOUTS</a>
|
||||||
<li><a name="TOC4" href="#SEC4">THE CALLOUT INTERFACE</a>
|
<li><a name="TOC4" href="#SEC4">THE CALLOUT INTERFACE</a>
|
||||||
<li><a name="TOC5" href="#SEC5">RETURN VALUES</a>
|
<li><a name="TOC5" href="#SEC5">RETURN VALUES FROM CALLOUTS</a>
|
||||||
<li><a name="TOC6" href="#SEC6">AUTHOR</a>
|
<li><a name="TOC6" href="#SEC6">CALLOUT ENUMERATION</a>
|
||||||
<li><a name="TOC7" href="#SEC7">REVISION</a>
|
<li><a name="TOC7" href="#SEC7">AUTHOR</a>
|
||||||
|
<li><a name="TOC8" href="#SEC8">REVISION</a>
|
||||||
</ul>
|
</ul>
|
||||||
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
|
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -27,23 +28,32 @@ please consult the man page, in case the conversion went wrong.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>int (*pcre2_callout)(pcre2_callout_block *, void *);</b>
|
<b>int (*pcre2_callout)(pcre2_callout_block *, void *);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
<b>int pcre2_callout_enumerate(const pcre2_code *<i>code</i>,</b>
|
||||||
|
<b> int (*<i>callback</i>)(pcre2_callout_enumerate_block *, void *),</b>
|
||||||
|
<b> void *<i>user_data</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
|
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
|
||||||
<P>
|
<P>
|
||||||
PCRE2 provides a feature called "callout", which is a means of temporarily
|
PCRE2 provides a feature called "callout", which is a means of temporarily
|
||||||
passing control to the caller of PCRE2 in the middle of pattern matching. The
|
passing control to the caller of PCRE2 in the middle of pattern matching. The
|
||||||
caller of PCRE2 provides an external function by putting its entry point in
|
caller of PCRE2 provides an external function by putting its entry point in
|
||||||
a match context (see <b>pcre2_set_callout()</b>) in the
|
a match context (see <b>pcre2_set_callout()</b> in the
|
||||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||||
documentation).
|
documentation).
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Within a regular expression, (?C) indicates the points at which the external
|
Within a regular expression, (?C<arg>) indicates a point at which the external
|
||||||
function is to be called. Different callout points can be identified by putting
|
function is to be called. Different callout points can be identified by putting
|
||||||
a number less than 256 after the letter C. The default value is zero.
|
a number less than 256 after the letter C. The default value is zero.
|
||||||
For example, this pattern has two callout points:
|
Alternatively, the argument may be a delimited string. The starting delimiter
|
||||||
|
must be one of ` ' " ^ % # $ { and the ending delimiter is the same as the
|
||||||
|
start, except for {, where the ending delimiter is }. If the ending delimiter
|
||||||
|
is needed within the string, it must be doubled. For example, this pattern has
|
||||||
|
two callout points:
|
||||||
<pre>
|
<pre>
|
||||||
(?C1)abc(?C2)def
|
(?C1)abc(?C"some ""arbitrary"" text")def
|
||||||
</pre>
|
</pre>
|
||||||
If the PCRE2_AUTO_CALLOUT option bit is set when a pattern is compiled, PCRE2
|
If the PCRE2_AUTO_CALLOUT option bit is set when a pattern is compiled, PCRE2
|
||||||
automatically inserts callouts, all with number 255, before each item in the
|
automatically inserts callouts, all with number 255, before each item in the
|
||||||
|
@ -62,19 +72,18 @@ alternation bar. If the pattern contains a conditional group whose condition is
|
||||||
an assertion, an automatic callout is inserted immediately before the
|
an assertion, an automatic callout is inserted immediately before the
|
||||||
condition. Such a callout may also be inserted explicitly, for example:
|
condition. Such a callout may also be inserted explicitly, for example:
|
||||||
<pre>
|
<pre>
|
||||||
(?(?C9)(?=a)ab|de)
|
(?(?C9)(?=a)ab|de) (?(?C%text%)(?!=d)ab|de)
|
||||||
</pre>
|
</pre>
|
||||||
This applies only to assertion conditions (because they are themselves
|
This applies only to assertion conditions (because they are themselves
|
||||||
independent groups).
|
independent groups).
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Automatic callouts can be used for tracking the progress of pattern matching.
|
Callouts can be useful for tracking the progress of pattern matching. The
|
||||||
The
|
|
||||||
<a href="pcre2test.html"><b>pcre2test</b></a>
|
<a href="pcre2test.html"><b>pcre2test</b></a>
|
||||||
program has a pattern qualifier (/auto_callout) that sets automatic callouts;
|
program has a pattern qualifier (/auto_callout) that sets automatic callouts.
|
||||||
when it is used, the output indicates how the pattern is being matched. This is
|
When any callouts are present, the output from <b>pcre2test</b> indicates how
|
||||||
useful information when you are trying to optimize the performance of a
|
the pattern is being matched. This is useful information when you are trying to
|
||||||
particular pattern.
|
optimize the performance of a particular pattern.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC3" href="#TOC1">MISSING CALLOUTS</a><br>
|
<br><a name="SEC3" href="#TOC1">MISSING CALLOUTS</a><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -185,7 +194,7 @@ You can disable these optimizations by passing the PCRE2_NO_START_OPTIMIZE
|
||||||
option to <b>pcre2_compile()</b>, or by starting the pattern with
|
option to <b>pcre2_compile()</b>, or by starting the pattern with
|
||||||
(*NO_START_OPT). This slows down the matching process, but does ensure that
|
(*NO_START_OPT). This slows down the matching process, but does ensure that
|
||||||
callouts such as the example above are obeyed.
|
callouts such as the example above are obeyed.
|
||||||
</P>
|
<a name="calloutinterface"></a></P>
|
||||||
<br><a name="SEC4" href="#TOC1">THE CALLOUT INTERFACE</a><br>
|
<br><a name="SEC4" href="#TOC1">THE CALLOUT INTERFACE</a><br>
|
||||||
<P>
|
<P>
|
||||||
During matching, when PCRE2 reaches a callout point, if an external function is
|
During matching, when PCRE2 reaches a callout point, if an external function is
|
||||||
|
@ -209,16 +218,53 @@ documentation). The callout block structure contains the following fields:
|
||||||
PCRE2_SIZE <i>current_position</i>;
|
PCRE2_SIZE <i>current_position</i>;
|
||||||
PCRE2_SIZE <i>pattern_position</i>;
|
PCRE2_SIZE <i>pattern_position</i>;
|
||||||
PCRE2_SIZE <i>next_item_length</i>;
|
PCRE2_SIZE <i>next_item_length</i>;
|
||||||
|
PCRE2_SIZE <i>callout_string_offset</i>;
|
||||||
|
PCRE2_SIZE <i>callout_string_length</i>;
|
||||||
|
PCRE2_SPTR <i>callout_string</i>;
|
||||||
</pre>
|
</pre>
|
||||||
The <i>version</i> field contains the version number of the block format. The
|
The <i>version</i> field contains the version number of the block format. The
|
||||||
current version is 0. The version number will change in future if additional
|
current version is 1; the three callout string fields were added for this
|
||||||
fields are added, but the intention is never to remove any of the existing
|
version. If you are writing an application that might use an earlier release of
|
||||||
fields.
|
PCRE2, you should check the version number before accessing any of these
|
||||||
|
fields. The version number will increase in future if more fields are added,
|
||||||
|
but the intention is never to remove any of the existing fields.
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
Fields for numerical callouts
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
For a numerical callout, <i>callout_string</i> is NULL, and <i>callout_number</i>
|
||||||
|
contains the number of the callout, in the range 0-255. This is the number
|
||||||
|
that follows (?C for manual callouts; it is 255 for automatically generated
|
||||||
|
callouts.
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
Fields for string callouts
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
For callouts with string arguments, <i>callout_number</i> is always zero, and
|
||||||
|
<i>callout_string</i> points to the string that is contained within the compiled
|
||||||
|
pattern. Its length is given by <i>callout_string_length</i>. Duplicated ending
|
||||||
|
delimiters that were present in the original pattern string have been turned
|
||||||
|
into single characters, but there is no other processing of the callout string
|
||||||
|
argument. An additional code unit containing binary zero is present after the
|
||||||
|
string, but is not included in the length. The delimiter that was used to start
|
||||||
|
the string is also stored within the pattern, immediately before the string
|
||||||
|
itself. You can access this delimiter as <i>callout_string</i>[-1] if you need
|
||||||
|
it.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <i>callout_number</i> field contains the number of the callout, as compiled
|
The <i>callout_string_offset</i> field is the code unit offset to the start of
|
||||||
into the pattern (that is, the number after ?C for manual callouts, and 255 for
|
the callout argument string within the original pattern string. This is
|
||||||
automatically generated callouts).
|
provided for the benefit of applications such as script languages that might
|
||||||
|
need to report errors in the callout string within the pattern.
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
Fields for all callouts
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
The remaining fields in the callout block are the same for both kinds of
|
||||||
|
callout.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <i>offset_vector</i> field is a pointer to the vector of capturing offsets
|
The <i>offset_vector</i> field is a pointer to the vector of capturing offsets
|
||||||
|
@ -259,8 +305,8 @@ substrings have been captured, the value of <i>capture_last</i> is 0. This is
|
||||||
always the case for the DFA matching functions.
|
always the case for the DFA matching functions.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <i>pattern_position</i> field contains the offset to the next item to be
|
The <i>pattern_position</i> field contains the offset in the pattern string to
|
||||||
matched in the pattern string.
|
the next item to be matched.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <i>next_item_length</i> field contains the length of the next item to be
|
The <i>next_item_length</i> field contains the length of the next item to be
|
||||||
|
@ -272,7 +318,9 @@ of the entire subpattern.
|
||||||
<P>
|
<P>
|
||||||
The <i>pattern_position</i> and <i>next_item_length</i> fields are intended to
|
The <i>pattern_position</i> and <i>next_item_length</i> fields are intended to
|
||||||
help in distinguishing between different automatic callouts, which all have the
|
help in distinguishing between different automatic callouts, which all have the
|
||||||
same callout number. However, they are set for all callouts.
|
same callout number. However, they are set for all callouts, and are used by
|
||||||
|
<b>pcre2test</b> to show the next item to be matched when displaying callout
|
||||||
|
information.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
In callouts from <b>pcre2_match()</b> the <i>mark</i> field contains a pointer to
|
In callouts from <b>pcre2_match()</b> the <i>mark</i> field contains a pointer to
|
||||||
|
@ -281,7 +329,7 @@ the zero-terminated name of the most recently passed (*MARK), (*PRUNE), or
|
||||||
of (*PRUNE) or (*THEN) without a name do not obliterate a previous (*MARK). In
|
of (*PRUNE) or (*THEN) without a name do not obliterate a previous (*MARK). In
|
||||||
callouts from the DFA matching function this field always contains NULL.
|
callouts from the DFA matching function this field always contains NULL.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC5" href="#TOC1">RETURN VALUES</a><br>
|
<br><a name="SEC5" href="#TOC1">RETURN VALUES FROM CALLOUTS</a><br>
|
||||||
<P>
|
<P>
|
||||||
The external callout function returns an integer to PCRE2. If the value is
|
The external callout function returns an integer to PCRE2. If the value is
|
||||||
zero, matching proceeds as normal. If the value is greater than zero, matching
|
zero, matching proceeds as normal. If the value is greater than zero, matching
|
||||||
|
@ -296,7 +344,51 @@ values. In particular, PCRE2_ERROR_NOMATCH forces a standard "no match"
|
||||||
failure. The error number PCRE2_ERROR_CALLOUT is reserved for use by callout
|
failure. The error number PCRE2_ERROR_CALLOUT is reserved for use by callout
|
||||||
functions; it will never be used by PCRE2 itself.
|
functions; it will never be used by PCRE2 itself.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC6" href="#TOC1">AUTHOR</a><br>
|
<br><a name="SEC6" href="#TOC1">CALLOUT ENUMERATION</a><br>
|
||||||
|
<P>
|
||||||
|
<b>int pcre2_callout_enumerate(const pcre2_code *<i>code</i>,</b>
|
||||||
|
<b> int (*<i>callback</i>)(pcre2_callout_enumerate_block *, void *),</b>
|
||||||
|
<b> void *<i>user_data</i>);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
A script language that supports the use of string arguments in callouts might
|
||||||
|
like to scan all the callouts in a pattern before running the match. This can
|
||||||
|
be done by calling <b>pcre2_callout_enumerate()</b>. The first argument is a
|
||||||
|
pointer to a compiled pattern, the second points to a callback function, and
|
||||||
|
the third is arbitrary user data. The callback function is called for every
|
||||||
|
callout in the pattern in the order in which they appear. Its first argument is
|
||||||
|
a pointer to a callout enumeration block, and its second argument is the
|
||||||
|
<i>user_data</i> value that was passed to <b>pcre2_callout_enumerate()</b>. The
|
||||||
|
data block contains the following fields:
|
||||||
|
<pre>
|
||||||
|
<i>version</i> Block version number
|
||||||
|
<i>pattern_position</i> Offset to next item in pattern
|
||||||
|
<i>next_item_length</i> Length of next item in pattern
|
||||||
|
<i>callout_number</i> Number for numbered callouts
|
||||||
|
<i>callout_string_offset</i> Offset to string within pattern
|
||||||
|
<i>callout_string_length</i> Length of callout string
|
||||||
|
<i>callout_string</i> Points to callout string or is NULL
|
||||||
|
</pre>
|
||||||
|
The version number is currently 0. It will increase if new fields are ever
|
||||||
|
added to the block. The remaining fields are the same as their namesakes in the
|
||||||
|
<b>pcre2_callout</b> block that is used for callouts during matching, as
|
||||||
|
described
|
||||||
|
<a href="#calloutinterface">above.</a>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
Note that the value of <i>pattern_position</i> is unique for each callout.
|
||||||
|
However, if a callout occurs inside a group that is quantified with a non-zero
|
||||||
|
minimum or a fixed maximum, the group is replicated inside the compiled
|
||||||
|
pattern. For example, a pattern such as /(a){2}/ is compiled as if it were
|
||||||
|
/(a)(a)/. This means that the callout will be enumerated more than once, but
|
||||||
|
with the same value for <i>pattern_position</i> in each case.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The callback function should normally return zero. If it returns a non-zero
|
||||||
|
value, scanning the pattern stops, and that value is returned from
|
||||||
|
<b>pcre2_callout_enumerate()</b>.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC7" href="#TOC1">AUTHOR</a><br>
|
||||||
<P>
|
<P>
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
<br>
|
<br>
|
||||||
|
@ -305,9 +397,9 @@ University Computing Service
|
||||||
Cambridge, England.
|
Cambridge, England.
|
||||||
<br>
|
<br>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC7" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC8" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 02 January 2015
|
Last updated: 23 March 2015
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2015 University of Cambridge.
|
Copyright © 1997-2015 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -83,11 +83,11 @@ the
|
||||||
documentation for details.
|
documentation for details.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
8. Subpatterns that are called as subroutines (whether or not recursively) are
|
8. Subroutine calls (whether recursive or not) are treated as atomic groups.
|
||||||
always treated as atomic groups in PCRE2. This is like Python, but unlike Perl.
|
Atomic recursion is like Python, but unlike Perl. Captured values that are set
|
||||||
Captured values that are set outside a subroutine call can be reference from
|
outside a subroutine call can be referenced from inside in PCRE2, but not in
|
||||||
inside in PCRE2, but not in Perl. There is a discussion that explains these
|
Perl. There is a discussion that explains these differences in more detail in
|
||||||
differences in more detail in the
|
the
|
||||||
<a href="pcre2pattern.html#recursiondifference">section on recursion differences from Perl</a>
|
<a href="pcre2pattern.html#recursiondifference">section on recursion differences from Perl</a>
|
||||||
in the
|
in the
|
||||||
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
|
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
|
||||||
|
@ -214,9 +214,9 @@ Cambridge, England.
|
||||||
REVISION
|
REVISION
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 28 September 2014
|
Last updated: 15 March 2015
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2014 University of Cambridge.
|
Copyright © 1997-2015 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||||
|
|
|
@ -2786,43 +2786,70 @@ same pair of parentheses when there is a repetition.
|
||||||
PCRE2 provides a similar feature, but of course it cannot obey arbitrary Perl
|
PCRE2 provides a similar feature, but of course it cannot obey arbitrary Perl
|
||||||
code. The feature is called "callout". The caller of PCRE2 provides an external
|
code. The feature is called "callout". The caller of PCRE2 provides an external
|
||||||
function by putting its entry point in a match context using the function
|
function by putting its entry point in a match context using the function
|
||||||
<b>pcre2_set_callout()</b> and passing the context to <b>pcre2_match()</b> or
|
<b>pcre2_set_callout()</b>, and then passing that context to <b>pcre2_match()</b>
|
||||||
<b>pcre2_dfa_match()</b>. If no match context is passed, or if the callout entry
|
or <b>pcre2_dfa_match()</b>. If no match context is passed, or if the callout
|
||||||
point is set to NULL, callouts are disabled.
|
entry point is set to NULL, callouts are disabled.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Within a regular expression, (?C) indicates the points at which the external
|
Within a regular expression, (?C<arg>) indicates a point at which the external
|
||||||
function is to be called. If you want to identify different callout points, you
|
function is to be called. There are two kinds of callout: those with a
|
||||||
can put a number less than 256 after the letter C. The default value is zero.
|
numerical argument and those with a string argument. (?C) on its own with no
|
||||||
For example, this pattern has two callout points:
|
argument is treated as (?C0). A numerical argument allows the application to
|
||||||
|
distinguish between different callouts. String arguments were added for release
|
||||||
|
10.20 to make it possible for script languages that use PCRE2 to embed short
|
||||||
|
scripts within patterns in a similar way to Perl.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
During matching, when PCRE2 reaches a callout point, the external function is
|
||||||
|
called. It is provided with the number or string argument of the callout, the
|
||||||
|
position in the pattern, and one item of data that is also set in the match
|
||||||
|
block. The callout function may cause matching to proceed, to backtrack, or to
|
||||||
|
fail.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
By default, PCRE2 implements a number of optimizations at matching time, and
|
||||||
|
one side-effect is that sometimes callouts are skipped. If you need all
|
||||||
|
possible callouts to happen, you need to set options that disable the relevant
|
||||||
|
optimizations. More details, including a complete description of the
|
||||||
|
programming interface to the callout function, are given in the
|
||||||
|
<a href="pcre2callout.html"><b>pcre2callout</b></a>
|
||||||
|
documentation.
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
Callouts with numerical arguments
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
If you just want to have a means of identifying different callout points, put a
|
||||||
|
number less than 256 after the letter C. For example, this pattern has two
|
||||||
|
callout points:
|
||||||
<pre>
|
<pre>
|
||||||
(?C1)abc(?C2)def
|
(?C1)abc(?C2)def
|
||||||
</pre>
|
</pre>
|
||||||
If the PCRE2_AUTO_CALLOUT flag is passed to <b>pcre2_compile()</b>, callouts are
|
If the PCRE2_AUTO_CALLOUT flag is passed to <b>pcre2_compile()</b>, numerical
|
||||||
automatically installed before each item in the pattern. They are all numbered
|
callouts are automatically installed before each item in the pattern. They are
|
||||||
255. If there is a conditional group in the pattern whose condition is an
|
all numbered 255. If there is a conditional group in the pattern whose
|
||||||
assertion, an additional callout is inserted just before the condition. An
|
condition is an assertion, an additional callout is inserted just before the
|
||||||
explicit callout may also be set at this position, as in this example:
|
condition. An explicit callout may also be set at this position, as in this
|
||||||
|
example:
|
||||||
<pre>
|
<pre>
|
||||||
(?(?C9)(?=a)abc|def)
|
(?(?C9)(?=a)abc|def)
|
||||||
</pre>
|
</pre>
|
||||||
Note that this applies only to assertion conditions, not to other types of
|
Note that this applies only to assertion conditions, not to other types of
|
||||||
condition.
|
condition.
|
||||||
</P>
|
</P>
|
||||||
|
<br><b>
|
||||||
|
Callouts with string arguments
|
||||||
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
During matching, when PCRE2 reaches a callout point, the external function is
|
A delimited string may be used instead of a number as a callout argument. The
|
||||||
called. It is provided with the number of the callout, the position in the
|
starting delimiter must be one of ` ' " ^ % # $ { and the ending delimiter is
|
||||||
pattern, and one item of data that is also set in the match block. The callout
|
the same as the start, except for {, where the ending delimiter is }. If the
|
||||||
function may cause matching to proceed, to backtrack, or to fail.
|
ending delimiter is needed within the string, it must be doubled. For
|
||||||
</P>
|
example:
|
||||||
<P>
|
<pre>
|
||||||
By default, PCRE2 implements a number of optimizations at matching time, and
|
(?C'ab ''c'' d')xyz(?C{any text})pqr
|
||||||
one side-effect is that sometimes callouts are skipped. If you need all
|
</pre>
|
||||||
possible callouts to happen, you need to set options that disable the relevant
|
The doubling is removed before the string is passed to the callout function.
|
||||||
optimizations. More details, and a complete description of the interface to the
|
|
||||||
callout function, are given in the
|
|
||||||
<a href="pcre2callout.html"><b>pcre2callout</b></a>
|
|
||||||
documentation.
|
|
||||||
<a name="backtrackcontrol"></a></P>
|
<a name="backtrackcontrol"></a></P>
|
||||||
<br><a name="SEC27" href="#TOC1">BACKTRACKING CONTROL</a><br>
|
<br><a name="SEC27" href="#TOC1">BACKTRACKING CONTROL</a><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -3258,7 +3285,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC30" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC30" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 28 January 2015
|
Last updated: 15 March 2015
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2015 University of Cambridge.
|
Copyright © 1997-2015 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -535,9 +535,13 @@ pattern is not anchored.
|
||||||
<br><a name="SEC24" href="#TOC1">CALLOUTS</a><br>
|
<br><a name="SEC24" href="#TOC1">CALLOUTS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<pre>
|
<pre>
|
||||||
(?C) callout
|
(?C) callout (assumed number 0)
|
||||||
(?Cn) callout with data n
|
(?Cn) callout with numerical data n
|
||||||
</PRE>
|
(?C"text") callout with string data
|
||||||
|
</pre>
|
||||||
|
The allowed string delimiters are ` ' " ^ % # $ (which are the same for the
|
||||||
|
start and the end), and the starting delimiter { matched with the ending
|
||||||
|
delimiter }. To encode the ending delimiter within the string, double it.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC25" href="#TOC1">SEE ALSO</a><br>
|
<br><a name="SEC25" href="#TOC1">SEE ALSO</a><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -555,7 +559,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC27" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC27" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 26 January 2015
|
Last updated: 15 March 2015
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2015 University of Cambridge.
|
Copyright © 1997-2015 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -90,11 +90,18 @@ names used in the libraries have a suffix _8, _16, or _32, as appropriate.
|
||||||
<P>
|
<P>
|
||||||
Input to <b>pcre2test</b> is processed line by line, either by calling the C
|
Input to <b>pcre2test</b> is processed line by line, either by calling the C
|
||||||
library's <b>fgets()</b> function, or via the <b>libreadline</b> library (see
|
library's <b>fgets()</b> function, or via the <b>libreadline</b> library (see
|
||||||
below). In Unix-like environments, <b>fgets()</b> treats any bytes other than
|
below). The input is processed using using C's string functions, so must not
|
||||||
newline as data characters. However, in some Windows environments character 26
|
contain binary zeroes, even though in Unix-like environments, <b>fgets()</b>
|
||||||
(hex 1A) causes an immediate end of file, and no further data is read. For
|
treats any bytes other than newline as data characters. In some Windows
|
||||||
maximum portability, therefore, it is safest to avoid non-printing characters
|
environments character 26 (hex 1A) causes an immediate end of file, and no
|
||||||
in <b>pcre2test</b> input files.
|
further data is read.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
For maximum portability, therefore, it is safest to avoid non-printing
|
||||||
|
characters in <b>pcre2test</b> input files. There is a facility for specifying a
|
||||||
|
pattern's characters as hexadecimal pairs, thus making it possible to include
|
||||||
|
binary zeroes in a pattern for testing purposes. Subject lines are processed
|
||||||
|
for backslash escapes, which makes it possible to include any data value.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC4" href="#TOC1">COMMAND LINE OPTIONS</a><br>
|
<br><a name="SEC4" href="#TOC1">COMMAND LINE OPTIONS</a><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -499,6 +506,7 @@ about the pattern:
|
||||||
<pre>
|
<pre>
|
||||||
bsr=[anycrlf|unicode] specify \R handling
|
bsr=[anycrlf|unicode] specify \R handling
|
||||||
/B bincode show binary code without lengths
|
/B bincode show binary code without lengths
|
||||||
|
callout_info show callout information
|
||||||
debug same as info,fullbincode
|
debug same as info,fullbincode
|
||||||
fullbincode show binary code with lengths
|
fullbincode show binary code with lengths
|
||||||
/I info show info about compiled pattern
|
/I info show info about compiled pattern
|
||||||
|
@ -580,6 +588,12 @@ unit" is the last literal code unit that must be present in any match. This is
|
||||||
not necessarily the last character. These lines are omitted if no starting or
|
not necessarily the last character. These lines are omitted if no starting or
|
||||||
ending code units are recorded.
|
ending code units are recorded.
|
||||||
</P>
|
</P>
|
||||||
|
<P>
|
||||||
|
The <b>callout_info</b> modifier requests information about all the callouts in
|
||||||
|
the pattern. A list of them is output at the end of any other information that
|
||||||
|
is requested. For each callout, either its number or string is given, followed
|
||||||
|
by the item that follows it in the pattern.
|
||||||
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
Specifying a pattern in hex
|
Specifying a pattern in hex
|
||||||
</b><br>
|
</b><br>
|
||||||
|
@ -907,12 +921,15 @@ set, the current captured groups are output when a callout occurs.
|
||||||
The <b>callout_fail</b> modifier can be given one or two numbers. If there is
|
The <b>callout_fail</b> modifier can be given one or two numbers. If there is
|
||||||
only one number, 1 is returned instead of 0 when a callout of that number is
|
only one number, 1 is returned instead of 0 when a callout of that number is
|
||||||
reached. If two numbers are given, 1 is returned when callout <n> is reached
|
reached. If two numbers are given, 1 is returned when callout <n> is reached
|
||||||
for the <m>th time.
|
for the <m>th time. Note that callouts with string arguments are always given
|
||||||
|
the number zero. See "Callouts" below for a description of the output when a
|
||||||
|
callout it taken.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <b>callout_data</b> modifier can be given an unsigned or a negative number.
|
The <b>callout_data</b> modifier can be given an unsigned or a negative number.
|
||||||
Any value other than zero is used as a return from <b>pcre2test</b>'s callout
|
This is set as the "user data" that is passed to the matching function, and
|
||||||
function.
|
passed back when the callout function is invoked. Any value other than zero is
|
||||||
|
used as a return from <b>pcre2test</b>'s callout function.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
Finding all matches in a string
|
Finding all matches in a string
|
||||||
|
@ -1262,10 +1279,32 @@ documentation.
|
||||||
<br><a name="SEC16" href="#TOC1">CALLOUTS</a><br>
|
<br><a name="SEC16" href="#TOC1">CALLOUTS</a><br>
|
||||||
<P>
|
<P>
|
||||||
If the pattern contains any callout requests, <b>pcre2test</b>'s callout
|
If the pattern contains any callout requests, <b>pcre2test</b>'s callout
|
||||||
function is called during matching. This works with both matching functions. By
|
function is called during matching unless <b>callout_none</b> is specified.
|
||||||
default, the called function displays the callout number, the start and current
|
This works with both matching functions.
|
||||||
positions in the text at the callout time, and the next pattern item to be
|
</P>
|
||||||
tested. For example:
|
<P>
|
||||||
|
The callout function in <b>pcre2test</b> returns zero (carry on matching) by
|
||||||
|
default, but you can use a <b>callout_fail</b> modifier in a subject line (as
|
||||||
|
described above) to change this and other parameters of the callout.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
Inserting callouts can be helpful when using <b>pcre2test</b> to check
|
||||||
|
complicated regular expressions. For further information about callouts, see
|
||||||
|
the
|
||||||
|
<a href="pcre2callout.html"><b>pcre2callout</b></a>
|
||||||
|
documentation.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The output for callouts with numerical arguments and those with string
|
||||||
|
arguments is slightly different.
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
Callouts with numerical arguments
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
By default, the callout function displays the callout number, the start and
|
||||||
|
current positions in the subject text at the callout time, and the next pattern
|
||||||
|
item to be tested. For example:
|
||||||
<pre>
|
<pre>
|
||||||
--->pqrabcdef
|
--->pqrabcdef
|
||||||
0 ^ ^ \d
|
0 ^ ^ \d
|
||||||
|
@ -1308,17 +1347,27 @@ The mark changes between matching "a" and "b", but stays the same for the rest
|
||||||
of the match, so nothing more is output. If, as a result of backtracking, the
|
of the match, so nothing more is output. If, as a result of backtracking, the
|
||||||
mark reverts to being unset, the text "<unset>" is output.
|
mark reverts to being unset, the text "<unset>" is output.
|
||||||
</P>
|
</P>
|
||||||
|
<br><b>
|
||||||
|
Callouts with string arguments
|
||||||
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
The callout function in <b>pcre2test</b> returns zero (carry on matching) by
|
The output for a callout with a string argument is similar, except that instead
|
||||||
default, but you can use a <b>callout_fail</b> modifier in a subject line (as
|
of outputting a callout number before the position indicators, the callout
|
||||||
described above) to change this and other parameters of the callout.
|
string and its offset in the pattern string are output before the reflection of
|
||||||
</P>
|
the subject string, and the subject string is reflected for each callout. For
|
||||||
<P>
|
example:
|
||||||
Inserting callouts can be helpful when using <b>pcre2test</b> to check
|
<pre>
|
||||||
complicated regular expressions. For further information about callouts, see
|
re> /^ab(?C'first')cd(?C"second")ef/
|
||||||
the
|
data> abcdefg
|
||||||
<a href="pcre2callout.html"><b>pcre2callout</b></a>
|
Callout (7): 'first'
|
||||||
documentation.
|
--->abcdefg
|
||||||
|
^ ^ c
|
||||||
|
Callout (20): "second"
|
||||||
|
--->abcdefg
|
||||||
|
^ ^ e
|
||||||
|
0: abcdef
|
||||||
|
|
||||||
|
</PRE>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC17" href="#TOC1">NON-PRINTING CHARACTERS</a><br>
|
<br><a name="SEC17" href="#TOC1">NON-PRINTING CHARACTERS</a><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -1411,7 +1460,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 23 January 2015
|
Last updated: 22 March 2015
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2015 University of Cambridge.
|
Copyright © 1997-2015 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -88,6 +88,9 @@ in the library.
|
||||||
|
|
||||||
<table>
|
<table>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre2_callout_enumerate.html">pcre2_callout_enumerate</a></td>
|
||||||
|
<td> Enumerate callouts in a compiled pattern</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre2_code_free.html">pcre2_code_free</a></td>
|
<tr><td><a href="pcre2_code_free.html">pcre2_code_free</a></td>
|
||||||
<td> Free a compiled pattern</td></tr>
|
<td> Free a compiled pattern</td></tr>
|
||||||
|
|
||||||
|
|
194
doc/pcre2.txt
194
doc/pcre2.txt
|
@ -367,6 +367,10 @@ PCRE2 NATIVE API AUXILIARY FUNCTIONS
|
||||||
|
|
||||||
int pcre2_pattern_info(const pcre2 *code, uint32_t what, void *where);
|
int pcre2_pattern_info(const pcre2 *code, uint32_t what, void *where);
|
||||||
|
|
||||||
|
int pcre2_callout_enumerate(const pcre2_code *code,
|
||||||
|
int (*callback)(pcre2_callout_enumerate_block *, void *),
|
||||||
|
void *user_data);
|
||||||
|
|
||||||
int pcre2_config(uint32_t what, void *where);
|
int pcre2_config(uint32_t what, void *where);
|
||||||
|
|
||||||
|
|
||||||
|
@ -1452,14 +1456,16 @@ INFORMATION ABOUT A COMPILED PATTERN
|
||||||
|
|
||||||
int pcre2_pattern_info(const pcre2 *code, uint32_t what, void *where);
|
int pcre2_pattern_info(const pcre2 *code, uint32_t what, void *where);
|
||||||
|
|
||||||
The pcre2_pattern_info() function returns information about a compiled
|
The pcre2_pattern_info() function returns general information about a
|
||||||
pattern. The first argument is a pointer to the compiled pattern. The
|
compiled pattern. For information about callouts, see the next section.
|
||||||
second argument specifies which piece of information is required, and
|
The first argument for pcre2_pattern_info() is a pointer to the com-
|
||||||
the third argument is a pointer to a variable to receive the data. If
|
piled pattern. The second argument specifies which piece of information
|
||||||
the third argument is NULL, the first argument is ignored, and the
|
is required, and the third argument is a pointer to a variable to
|
||||||
function returns the size in bytes of the variable that is required for
|
receive the data. If the third argument is NULL, the first argument is
|
||||||
the information requested. Otherwise, The yield of the function is
|
ignored, and the function returns the size in bytes of the variable
|
||||||
zero for success, or one of the following negative numbers:
|
that is required for the information requested. Otherwise, The yield of
|
||||||
|
the function is zero for success, or one of the following negative num-
|
||||||
|
bers:
|
||||||
|
|
||||||
PCRE2_ERROR_NULL the argument code was NULL
|
PCRE2_ERROR_NULL the argument code was NULL
|
||||||
PCRE2_ERROR_BADMAGIC the "magic number" was not found
|
PCRE2_ERROR_BADMAGIC the "magic number" was not found
|
||||||
|
@ -1744,6 +1750,25 @@ INFORMATION ABOUT A COMPILED PATTERN
|
||||||
alter the value returned by this option.
|
alter the value returned by this option.
|
||||||
|
|
||||||
|
|
||||||
|
INFORMATION ABOUT A PATTERN'S CALLOUTS
|
||||||
|
|
||||||
|
int pcre2_callout_enumerate(const pcre2_code *code,
|
||||||
|
int (*callback)(pcre2_callout_enumerate_block *, void *),
|
||||||
|
void *user_data);
|
||||||
|
|
||||||
|
A script language that supports the use of string arguments in callouts
|
||||||
|
might like to scan all the callouts in a pattern before running the
|
||||||
|
match. This can be done by calling pcre2_callout_enumerate(). The first
|
||||||
|
argument is a pointer to a compiled pattern, the second points to a
|
||||||
|
callback function, and the third is arbitrary user data. The callback
|
||||||
|
function is called for every callout in the pattern in the order in
|
||||||
|
which they appear. Its first argument is a pointer to a callout enumer-
|
||||||
|
ation block, and its second argument is the user_data value that was
|
||||||
|
passed to pcre2_callout_enumerate(). The contents of the callout enu-
|
||||||
|
meration block are described in the pcre2callout documentation, which
|
||||||
|
also gives further details about callouts.
|
||||||
|
|
||||||
|
|
||||||
SERIALIZATION AND PRECOMPILING
|
SERIALIZATION AND PRECOMPILING
|
||||||
|
|
||||||
It is possible to save compiled patterns on disc or elsewhere, and
|
It is possible to save compiled patterns on disc or elsewhere, and
|
||||||
|
@ -2221,9 +2246,9 @@ ERROR RETURNS FROM pcre2_match()
|
||||||
PCRE2_ERROR_CALLOUT
|
PCRE2_ERROR_CALLOUT
|
||||||
|
|
||||||
This error is never generated by pcre2_match() itself. It is provided
|
This error is never generated by pcre2_match() itself. It is provided
|
||||||
for use by callout functions that want to cause pcre2_match() to return
|
for use by callout functions that want to cause pcre2_match() or
|
||||||
a distinctive error code. See the pcre2callout documentation for
|
pcre2_callout_enumerate() to return a distinctive error code. See the
|
||||||
details.
|
pcre2callout documentation for details.
|
||||||
|
|
||||||
PCRE2_ERROR_INTERNAL
|
PCRE2_ERROR_INTERNAL
|
||||||
|
|
||||||
|
@ -2771,7 +2796,7 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 23 January 2015
|
Last updated: 23 March 2015
|
||||||
Copyright (c) 1997-2015 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@ -3250,22 +3275,30 @@ SYNOPSIS
|
||||||
|
|
||||||
int (*pcre2_callout)(pcre2_callout_block *, void *);
|
int (*pcre2_callout)(pcre2_callout_block *, void *);
|
||||||
|
|
||||||
|
int pcre2_callout_enumerate(const pcre2_code *code,
|
||||||
|
int (*callback)(pcre2_callout_enumerate_block *, void *),
|
||||||
|
void *user_data);
|
||||||
|
|
||||||
|
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
|
|
||||||
PCRE2 provides a feature called "callout", which is a means of tempo-
|
PCRE2 provides a feature called "callout", which is a means of tempo-
|
||||||
rarily passing control to the caller of PCRE2 in the middle of pattern
|
rarily passing control to the caller of PCRE2 in the middle of pattern
|
||||||
matching. The caller of PCRE2 provides an external function by putting
|
matching. The caller of PCRE2 provides an external function by putting
|
||||||
its entry point in a match context (see pcre2_set_callout()) in the
|
its entry point in a match context (see pcre2_set_callout() in the
|
||||||
pcre2api documentation).
|
pcre2api documentation).
|
||||||
|
|
||||||
Within a regular expression, (?C) indicates the points at which the
|
Within a regular expression, (?C<arg>) indicates a point at which the
|
||||||
external function is to be called. Different callout points can be
|
external function is to be called. Different callout points can be
|
||||||
identified by putting a number less than 256 after the letter C. The
|
identified by putting a number less than 256 after the letter C. The
|
||||||
default value is zero. For example, this pattern has two callout
|
default value is zero. Alternatively, the argument may be a delimited
|
||||||
|
string. The starting delimiter must be one of ` ' " ^ % # $ { and the
|
||||||
|
ending delimiter is the same as the start, except for {, where the end-
|
||||||
|
ing delimiter is }. If the ending delimiter is needed within the
|
||||||
|
string, it must be doubled. For example, this pattern has two callout
|
||||||
points:
|
points:
|
||||||
|
|
||||||
(?C1)abc(?C2)def
|
(?C1)abc(?C"some ""arbitrary"" text")def
|
||||||
|
|
||||||
If the PCRE2_AUTO_CALLOUT option bit is set when a pattern is compiled,
|
If the PCRE2_AUTO_CALLOUT option bit is set when a pattern is compiled,
|
||||||
PCRE2 automatically inserts callouts, all with number 255, before each
|
PCRE2 automatically inserts callouts, all with number 255, before each
|
||||||
|
@ -3284,16 +3317,17 @@ DESCRIPTION
|
||||||
before the condition. Such a callout may also be inserted explicitly,
|
before the condition. Such a callout may also be inserted explicitly,
|
||||||
for example:
|
for example:
|
||||||
|
|
||||||
(?(?C9)(?=a)ab|de)
|
(?(?C9)(?=a)ab|de) (?(?C%text%)(?!=d)ab|de)
|
||||||
|
|
||||||
This applies only to assertion conditions (because they are themselves
|
This applies only to assertion conditions (because they are themselves
|
||||||
independent groups).
|
independent groups).
|
||||||
|
|
||||||
Automatic callouts can be used for tracking the progress of pattern
|
Callouts can be useful for tracking the progress of pattern matching.
|
||||||
matching. The pcre2test program has a pattern qualifier (/auto_call-
|
The pcre2test program has a pattern qualifier (/auto_callout) that sets
|
||||||
out) that sets automatic callouts; when it is used, the output indi-
|
automatic callouts. When any callouts are present, the output from
|
||||||
cates how the pattern is being matched. This is useful information when
|
pcre2test indicates how the pattern is being matched. This is useful
|
||||||
you are trying to optimize the performance of a particular pattern.
|
information when you are trying to optimize the performance of a par-
|
||||||
|
ticular pattern.
|
||||||
|
|
||||||
|
|
||||||
MISSING CALLOUTS
|
MISSING CALLOUTS
|
||||||
|
@ -3422,15 +3456,47 @@ THE CALLOUT INTERFACE
|
||||||
PCRE2_SIZE current_position;
|
PCRE2_SIZE current_position;
|
||||||
PCRE2_SIZE pattern_position;
|
PCRE2_SIZE pattern_position;
|
||||||
PCRE2_SIZE next_item_length;
|
PCRE2_SIZE next_item_length;
|
||||||
|
PCRE2_SIZE callout_string_offset;
|
||||||
|
PCRE2_SIZE callout_string_length;
|
||||||
|
PCRE2_SPTR callout_string;
|
||||||
|
|
||||||
The version field contains the version number of the block format. The
|
The version field contains the version number of the block format. The
|
||||||
current version is 0. The version number will change in future if addi-
|
current version is 1; the three callout string fields were added for
|
||||||
tional fields are added, but the intention is never to remove any of
|
this version. If you are writing an application that might use an ear-
|
||||||
the existing fields.
|
lier release of PCRE2, you should check the version number before
|
||||||
|
accessing any of these fields. The version number will increase in
|
||||||
|
future if more fields are added, but the intention is never to remove
|
||||||
|
any of the existing fields.
|
||||||
|
|
||||||
The callout_number field contains the number of the callout, as com-
|
Fields for numerical callouts
|
||||||
piled into the pattern (that is, the number after ?C for manual call-
|
|
||||||
outs, and 255 for automatically generated callouts).
|
For a numerical callout, callout_string is NULL, and callout_number
|
||||||
|
contains the number of the callout, in the range 0-255. This is the
|
||||||
|
number that follows (?C for manual callouts; it is 255 for automati-
|
||||||
|
cally generated callouts.
|
||||||
|
|
||||||
|
Fields for string callouts
|
||||||
|
|
||||||
|
For callouts with string arguments, callout_number is always zero, and
|
||||||
|
callout_string points to the string that is contained within the com-
|
||||||
|
piled pattern. Its length is given by callout_string_length. Duplicated
|
||||||
|
ending delimiters that were present in the original pattern string have
|
||||||
|
been turned into single characters, but there is no other processing of
|
||||||
|
the callout string argument. An additional code unit containing binary
|
||||||
|
zero is present after the string, but is not included in the length.
|
||||||
|
The delimiter that was used to start the string is also stored within
|
||||||
|
the pattern, immediately before the string itself. You can access this
|
||||||
|
delimiter as callout_string[-1] if you need it.
|
||||||
|
|
||||||
|
The callout_string_offset field is the code unit offset to the start of
|
||||||
|
the callout argument string within the original pattern string. This is
|
||||||
|
provided for the benefit of applications such as script languages that
|
||||||
|
might need to report errors in the callout string within the pattern.
|
||||||
|
|
||||||
|
Fields for all callouts
|
||||||
|
|
||||||
|
The remaining fields in the callout block are the same for both kinds
|
||||||
|
of callout.
|
||||||
|
|
||||||
The offset_vector field is a pointer to the vector of capturing offsets
|
The offset_vector field is a pointer to the vector of capturing offsets
|
||||||
(the "ovector") that was passed to the matching function in the match
|
(the "ovector") that was passed to the matching function in the match
|
||||||
|
@ -3464,8 +3530,8 @@ THE CALLOUT INTERFACE
|
||||||
substrings. If no substrings have been captured, the value of cap-
|
substrings. If no substrings have been captured, the value of cap-
|
||||||
ture_last is 0. This is always the case for the DFA matching functions.
|
ture_last is 0. This is always the case for the DFA matching functions.
|
||||||
|
|
||||||
The pattern_position field contains the offset to the next item to be
|
The pattern_position field contains the offset in the pattern string to
|
||||||
matched in the pattern string.
|
the next item to be matched.
|
||||||
|
|
||||||
The next_item_length field contains the length of the next item to be
|
The next_item_length field contains the length of the next item to be
|
||||||
matched in the pattern string. When the callout immediately precedes an
|
matched in the pattern string. When the callout immediately precedes an
|
||||||
|
@ -3475,7 +3541,9 @@ THE CALLOUT INTERFACE
|
||||||
|
|
||||||
The pattern_position and next_item_length fields are intended to help
|
The pattern_position and next_item_length fields are intended to help
|
||||||
in distinguishing between different automatic callouts, which all have
|
in distinguishing between different automatic callouts, which all have
|
||||||
the same callout number. However, they are set for all callouts.
|
the same callout number. However, they are set for all callouts, and
|
||||||
|
are used by pcre2test to show the next item to be matched when display-
|
||||||
|
ing callout information.
|
||||||
|
|
||||||
In callouts from pcre2_match() the mark field contains a pointer to the
|
In callouts from pcre2_match() the mark field contains a pointer to the
|
||||||
zero-terminated name of the most recently passed (*MARK), (*PRUNE), or
|
zero-terminated name of the most recently passed (*MARK), (*PRUNE), or
|
||||||
|
@ -3485,7 +3553,7 @@ THE CALLOUT INTERFACE
|
||||||
always contains NULL.
|
always contains NULL.
|
||||||
|
|
||||||
|
|
||||||
RETURN VALUES
|
RETURN VALUES FROM CALLOUTS
|
||||||
|
|
||||||
The external callout function returns an integer to PCRE2. If the value
|
The external callout function returns an integer to PCRE2. If the value
|
||||||
is zero, matching proceeds as normal. If the value is greater than
|
is zero, matching proceeds as normal. If the value is greater than
|
||||||
|
@ -3501,6 +3569,49 @@ RETURN VALUES
|
||||||
itself.
|
itself.
|
||||||
|
|
||||||
|
|
||||||
|
CALLOUT ENUMERATION
|
||||||
|
|
||||||
|
int pcre2_callout_enumerate(const pcre2_code *code,
|
||||||
|
int (*callback)(pcre2_callout_enumerate_block *, void *),
|
||||||
|
void *user_data);
|
||||||
|
|
||||||
|
A script language that supports the use of string arguments in callouts
|
||||||
|
might like to scan all the callouts in a pattern before running the
|
||||||
|
match. This can be done by calling pcre2_callout_enumerate(). The first
|
||||||
|
argument is a pointer to a compiled pattern, the second points to a
|
||||||
|
callback function, and the third is arbitrary user data. The callback
|
||||||
|
function is called for every callout in the pattern in the order in
|
||||||
|
which they appear. Its first argument is a pointer to a callout enumer-
|
||||||
|
ation block, and its second argument is the user_data value that was
|
||||||
|
passed to pcre2_callout_enumerate(). The data block contains the fol-
|
||||||
|
lowing fields:
|
||||||
|
|
||||||
|
version Block version number
|
||||||
|
pattern_position Offset to next item in pattern
|
||||||
|
next_item_length Length of next item in pattern
|
||||||
|
callout_number Number for numbered callouts
|
||||||
|
callout_string_offset Offset to string within pattern
|
||||||
|
callout_string_length Length of callout string
|
||||||
|
callout_string Points to callout string or is NULL
|
||||||
|
|
||||||
|
The version number is currently 0. It will increase if new fields are
|
||||||
|
ever added to the block. The remaining fields are the same as their
|
||||||
|
namesakes in the pcre2_callout block that is used for callouts during
|
||||||
|
matching, as described above.
|
||||||
|
|
||||||
|
Note that the value of pattern_position is unique for each callout.
|
||||||
|
However, if a callout occurs inside a group that is quantified with a
|
||||||
|
non-zero minimum or a fixed maximum, the group is replicated inside the
|
||||||
|
compiled pattern. For example, a pattern such as /(a){2}/ is compiled
|
||||||
|
as if it were /(a)(a)/. This means that the callout will be enumerated
|
||||||
|
more than once, but with the same value for pattern_position in each
|
||||||
|
case.
|
||||||
|
|
||||||
|
The callback function should normally return zero. If it returns a non-
|
||||||
|
zero value, scanning the pattern stops, and that value is returned from
|
||||||
|
pcre2_callout_enumerate().
|
||||||
|
|
||||||
|
|
||||||
AUTHOR
|
AUTHOR
|
||||||
|
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
|
@ -3510,7 +3621,7 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 02 January 2015
|
Last updated: 23 March 2015
|
||||||
Copyright (c) 1997-2015 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@ -3585,13 +3696,12 @@ DIFFERENCES BETWEEN PCRE2 AND PERL
|
||||||
during pattern matching. See the pcre2callout documentation for
|
during pattern matching. See the pcre2callout documentation for
|
||||||
details.
|
details.
|
||||||
|
|
||||||
8. Subpatterns that are called as subroutines (whether or not recur-
|
8. Subroutine calls (whether recursive or not) are treated as atomic
|
||||||
sively) are always treated as atomic groups in PCRE2. This is like
|
groups. Atomic recursion is like Python, but unlike Perl. Captured
|
||||||
Python, but unlike Perl. Captured values that are set outside a sub-
|
values that are set outside a subroutine call can be referenced from
|
||||||
routine call can be reference from inside in PCRE2, but not in Perl.
|
inside in PCRE2, but not in Perl. There is a discussion that explains
|
||||||
There is a discussion that explains these differences in more detail in
|
these differences in more detail in the section on recursion differ-
|
||||||
the section on recursion differences from Perl in the pcre2pattern
|
ences from Perl in the pcre2pattern page.
|
||||||
page.
|
|
||||||
|
|
||||||
9. If any of the backtracking control verbs are used in a subpattern
|
9. If any of the backtracking control verbs are used in a subpattern
|
||||||
that is called as a subroutine (whether or not recursively), their
|
that is called as a subroutine (whether or not recursively), their
|
||||||
|
@ -3696,8 +3806,8 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 28 September 2014
|
Last updated: 15 March 2015
|
||||||
Copyright (c) 1997-2014 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,50 @@
|
||||||
|
.TH PCRE2_COMPILE 3 "23 March 2015" "PCRE2 10.20"
|
||||||
|
.SH NAME
|
||||||
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
|
.SH SYNOPSIS
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
.B #include <pcre2.h>
|
||||||
|
.PP
|
||||||
|
.nf
|
||||||
|
.B int pcre2_callout_enumerate(const pcre2_code *\fIcode\fP,
|
||||||
|
.B " int (*\fIcallback\fP)(pcre2_callout_enumerate_block *, void *),"
|
||||||
|
.B " void *\fIcallout_data\fP);"
|
||||||
|
.fi
|
||||||
|
.
|
||||||
|
.SH DESCRIPTION
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
This function scans a compiled regular expression and calls the \fIcallback()\fP
|
||||||
|
function for each callout within the pattern. The yield of the function is zero
|
||||||
|
for success and non-zero otherwise. The arguments are:
|
||||||
|
.sp
|
||||||
|
\fIcode\fP Points to the compiled pattern
|
||||||
|
\fIcallback\fP The callback function
|
||||||
|
\fIcallout_data\fP User data that is passed to the callback
|
||||||
|
.sp
|
||||||
|
The \fIcallback()\fP function is passed a pointer to a data block containing
|
||||||
|
the following fields:
|
||||||
|
.sp
|
||||||
|
\fIversion\fP Block version number
|
||||||
|
\fIpattern_position\fP Offset to next item in pattern
|
||||||
|
\fInext_item_length\fP Length of next item in pattern
|
||||||
|
\fIcallout_number\fP Number for numbered callouts
|
||||||
|
\fIcallout_string_offset\fP Offset to string within pattern
|
||||||
|
\fIcallout_string_length\fP Length of callout string
|
||||||
|
\fIcallout_string\fP Points to callout string or is NULL
|
||||||
|
.sp
|
||||||
|
The second argument is the callout data that was passed to
|
||||||
|
\fBpcre2_callout_enumerate()\fP. The \fBcallback()\fP function must return zero
|
||||||
|
for success. Any other value causes the pattern scan to stop, with the value
|
||||||
|
being passed back as the result of \fBpcre2_callout_enumerate()\fP.
|
||||||
|
.P
|
||||||
|
There is a complete description of the PCRE2 native API in the
|
||||||
|
.\" HREF
|
||||||
|
\fBpcre2api\fP
|
||||||
|
.\"
|
||||||
|
page and a description of the POSIX API in the
|
||||||
|
.\" HREF
|
||||||
|
\fBpcre2posix\fP
|
||||||
|
.\"
|
||||||
|
page.
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2API 3 "23 January 2015" "PCRE2 10.10"
|
.TH PCRE2API 3 "23 March 2015" "PCRE2 10.20"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.sp
|
.sp
|
||||||
|
@ -234,6 +234,10 @@ document for an overview of all the PCRE2 documentation.
|
||||||
.sp
|
.sp
|
||||||
.B int pcre2_pattern_info(const pcre2 *\fIcode\fP, uint32_t \fIwhat\fP, void *\fIwhere\fP);
|
.B int pcre2_pattern_info(const pcre2 *\fIcode\fP, uint32_t \fIwhat\fP, void *\fIwhere\fP);
|
||||||
.sp
|
.sp
|
||||||
|
.B int pcre2_callout_enumerate(const pcre2_code *\fIcode\fP,
|
||||||
|
.B " int (*\fIcallback\fP)(pcre2_callout_enumerate_block *, void *),"
|
||||||
|
.B " void *\fIuser_data\fP);"
|
||||||
|
.sp
|
||||||
.B int pcre2_config(uint32_t \fIwhat\fP, void *\fIwhere\fP);
|
.B int pcre2_config(uint32_t \fIwhat\fP, void *\fIwhere\fP);
|
||||||
.fi
|
.fi
|
||||||
.
|
.
|
||||||
|
@ -1427,14 +1431,19 @@ can be processed in different locales.
|
||||||
.B int pcre2_pattern_info(const pcre2 *\fIcode\fP, uint32_t \fIwhat\fP, void *\fIwhere\fP);
|
.B int pcre2_pattern_info(const pcre2 *\fIcode\fP, uint32_t \fIwhat\fP, void *\fIwhere\fP);
|
||||||
.fi
|
.fi
|
||||||
.P
|
.P
|
||||||
The \fBpcre2_pattern_info()\fP function returns information about a compiled
|
The \fBpcre2_pattern_info()\fP function returns general information about a
|
||||||
pattern. The first argument is a pointer to the compiled pattern. The second
|
compiled pattern. For information about callouts, see the
|
||||||
argument specifies which piece of information is required, and the third
|
.\" HTML <a href="pcre2pattern.html#infoaboutcallouts">
|
||||||
argument is a pointer to a variable to receive the data. If the third argument
|
.\" </a>
|
||||||
is NULL, the first argument is ignored, and the function returns the size in
|
next section.
|
||||||
bytes of the variable that is required for the information requested.
|
.\"
|
||||||
Otherwise, The yield of the function is zero for success, or one of the
|
The first argument for \fBpcre2_pattern_info()\fP is a pointer to the compiled
|
||||||
following negative numbers:
|
pattern. The second argument specifies which piece of information is required,
|
||||||
|
and the third argument is a pointer to a variable to receive the data. If the
|
||||||
|
third argument is NULL, the first argument is ignored, and the function returns
|
||||||
|
the size in bytes of the variable that is required for the information
|
||||||
|
requested. Otherwise, The yield of the function is zero for success, or one of
|
||||||
|
the following negative numbers:
|
||||||
.sp
|
.sp
|
||||||
PCRE2_ERROR_NULL the argument \fIcode\fP was NULL
|
PCRE2_ERROR_NULL the argument \fIcode\fP was NULL
|
||||||
PCRE2_ERROR_BADMAGIC the "magic number" was not found
|
PCRE2_ERROR_BADMAGIC the "magic number" was not found
|
||||||
|
@ -1716,6 +1725,31 @@ calculates the size has to over-estimate. Processing a pattern with the JIT
|
||||||
compiler does not alter the value returned by this option.
|
compiler does not alter the value returned by this option.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
.\" HTML <a name="infoaboutcallouts"></a>
|
||||||
|
.SH "INFORMATION ABOUT A PATTERN'S CALLOUTS"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
.nf
|
||||||
|
.B int pcre2_callout_enumerate(const pcre2_code *\fIcode\fP,
|
||||||
|
.B " int (*\fIcallback\fP)(pcre2_callout_enumerate_block *, void *),"
|
||||||
|
.B " void *\fIuser_data\fP);"
|
||||||
|
.fi
|
||||||
|
.sp
|
||||||
|
A script language that supports the use of string arguments in callouts might
|
||||||
|
like to scan all the callouts in a pattern before running the match. This can
|
||||||
|
be done by calling \fBpcre2_callout_enumerate()\fP. The first argument is a
|
||||||
|
pointer to a compiled pattern, the second points to a callback function, and
|
||||||
|
the third is arbitrary user data. The callback function is called for every
|
||||||
|
callout in the pattern in the order in which they appear. Its first argument is
|
||||||
|
a pointer to a callout enumeration block, and its second argument is the
|
||||||
|
\fIuser_data\fP value that was passed to \fBpcre2_callout_enumerate()\fP. The
|
||||||
|
contents of the callout enumeration block are described in the
|
||||||
|
.\" HREF
|
||||||
|
\fBpcre2callout\fP
|
||||||
|
.\"
|
||||||
|
documentation, which also gives further details about callouts.
|
||||||
|
.
|
||||||
|
.
|
||||||
.SH "SERIALIZATION AND PRECOMPILING"
|
.SH "SERIALIZATION AND PRECOMPILING"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
@ -2275,8 +2309,8 @@ of the subject.
|
||||||
PCRE2_ERROR_CALLOUT
|
PCRE2_ERROR_CALLOUT
|
||||||
.sp
|
.sp
|
||||||
This error is never generated by \fBpcre2_match()\fP itself. It is provided for
|
This error is never generated by \fBpcre2_match()\fP itself. It is provided for
|
||||||
use by callout functions that want to cause \fBpcre2_match()\fP to return a
|
use by callout functions that want to cause \fBpcre2_match()\fP or
|
||||||
distinctive error code. See the
|
\fBpcre2_callout_enumerate()\fP to return a distinctive error code. See the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
\fBpcre2callout\fP
|
\fBpcre2callout\fP
|
||||||
.\"
|
.\"
|
||||||
|
@ -2885,6 +2919,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 23 January 2015
|
Last updated: 23 March 2015
|
||||||
Copyright (c) 1997-2015 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2CALLOUT 3 "16 March 2015" "PCRE2 10.20"
|
.TH PCRE2CALLOUT 3 "23 March 2015" "PCRE2 10.20"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -7,7 +7,13 @@ PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.B #include <pcre2.h>
|
.B #include <pcre2.h>
|
||||||
.PP
|
.PP
|
||||||
.SM
|
.SM
|
||||||
|
.nf
|
||||||
.B int (*pcre2_callout)(pcre2_callout_block *, void *);
|
.B int (*pcre2_callout)(pcre2_callout_block *, void *);
|
||||||
|
.sp
|
||||||
|
.B int pcre2_callout_enumerate(const pcre2_code *\fIcode\fP,
|
||||||
|
.B " int (*\fIcallback\fP)(pcre2_callout_enumerate_block *, void *),"
|
||||||
|
.B " void *\fIuser_data\fP);"
|
||||||
|
.fi
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
.rs
|
.rs
|
||||||
|
@ -170,6 +176,7 @@ option to \fBpcre2_compile()\fP, or by starting the pattern with
|
||||||
callouts such as the example above are obeyed.
|
callouts such as the example above are obeyed.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
.\" HTML <a name="calloutinterface"></a>
|
||||||
.SH "THE CALLOUT INTERFACE"
|
.SH "THE CALLOUT INTERFACE"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
@ -199,7 +206,6 @@ documentation). The callout block structure contains the following fields:
|
||||||
PCRE2_SIZE \fIcallout_string_offset\fP;
|
PCRE2_SIZE \fIcallout_string_offset\fP;
|
||||||
PCRE2_SIZE \fIcallout_string_length\fP;
|
PCRE2_SIZE \fIcallout_string_length\fP;
|
||||||
PCRE2_SPTR \fIcallout_string\fP;
|
PCRE2_SPTR \fIcallout_string\fP;
|
||||||
|
|
||||||
.sp
|
.sp
|
||||||
The \fIversion\fP field contains the version number of the block format. The
|
The \fIversion\fP field contains the version number of the block format. The
|
||||||
current version is 1; the three callout string fields were added for this
|
current version is 1; the three callout string fields were added for this
|
||||||
|
@ -276,8 +282,8 @@ outside the recursion, as do the values of all captured substrings. If no
|
||||||
substrings have been captured, the value of \fIcapture_last\fP is 0. This is
|
substrings have been captured, the value of \fIcapture_last\fP is 0. This is
|
||||||
always the case for the DFA matching functions.
|
always the case for the DFA matching functions.
|
||||||
.P
|
.P
|
||||||
The \fIpattern_position\fP field contains the offset to the next item to be
|
The \fIpattern_position\fP field contains the offset in the pattern string to
|
||||||
matched in the pattern string.
|
the next item to be matched.
|
||||||
.P
|
.P
|
||||||
The \fInext_item_length\fP field contains the length of the next item to be
|
The \fInext_item_length\fP field contains the length of the next item to be
|
||||||
matched in the pattern string. When the callout immediately precedes an
|
matched in the pattern string. When the callout immediately precedes an
|
||||||
|
@ -298,7 +304,7 @@ of (*PRUNE) or (*THEN) without a name do not obliterate a previous (*MARK). In
|
||||||
callouts from the DFA matching function this field always contains NULL.
|
callouts from the DFA matching function this field always contains NULL.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "RETURN VALUES"
|
.SH "RETURN VALUES FROM CALLOUTS"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
The external callout function returns an integer to PCRE2. If the value is
|
The external callout function returns an integer to PCRE2. If the value is
|
||||||
|
@ -314,6 +320,54 @@ failure. The error number PCRE2_ERROR_CALLOUT is reserved for use by callout
|
||||||
functions; it will never be used by PCRE2 itself.
|
functions; it will never be used by PCRE2 itself.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
.SH "CALLOUT ENUMERATION"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
.nf
|
||||||
|
.B int pcre2_callout_enumerate(const pcre2_code *\fIcode\fP,
|
||||||
|
.B " int (*\fIcallback\fP)(pcre2_callout_enumerate_block *, void *),"
|
||||||
|
.B " void *\fIuser_data\fP);"
|
||||||
|
.fi
|
||||||
|
.sp
|
||||||
|
A script language that supports the use of string arguments in callouts might
|
||||||
|
like to scan all the callouts in a pattern before running the match. This can
|
||||||
|
be done by calling \fBpcre2_callout_enumerate()\fP. The first argument is a
|
||||||
|
pointer to a compiled pattern, the second points to a callback function, and
|
||||||
|
the third is arbitrary user data. The callback function is called for every
|
||||||
|
callout in the pattern in the order in which they appear. Its first argument is
|
||||||
|
a pointer to a callout enumeration block, and its second argument is the
|
||||||
|
\fIuser_data\fP value that was passed to \fBpcre2_callout_enumerate()\fP. The
|
||||||
|
data block contains the following fields:
|
||||||
|
.sp
|
||||||
|
\fIversion\fP Block version number
|
||||||
|
\fIpattern_position\fP Offset to next item in pattern
|
||||||
|
\fInext_item_length\fP Length of next item in pattern
|
||||||
|
\fIcallout_number\fP Number for numbered callouts
|
||||||
|
\fIcallout_string_offset\fP Offset to string within pattern
|
||||||
|
\fIcallout_string_length\fP Length of callout string
|
||||||
|
\fIcallout_string\fP Points to callout string or is NULL
|
||||||
|
.sp
|
||||||
|
The version number is currently 0. It will increase if new fields are ever
|
||||||
|
added to the block. The remaining fields are the same as their namesakes in the
|
||||||
|
\fBpcre2_callout\fP block that is used for callouts during matching, as
|
||||||
|
described
|
||||||
|
.\" HTML <a href="#calloutinterface">
|
||||||
|
.\" </a>
|
||||||
|
above.
|
||||||
|
.\"
|
||||||
|
.P
|
||||||
|
Note that the value of \fIpattern_position\fP is unique for each callout.
|
||||||
|
However, if a callout occurs inside a group that is quantified with a non-zero
|
||||||
|
minimum or a fixed maximum, the group is replicated inside the compiled
|
||||||
|
pattern. For example, a pattern such as /(a){2}/ is compiled as if it were
|
||||||
|
/(a)(a)/. This means that the callout will be enumerated more than once, but
|
||||||
|
with the same value for \fIpattern_position\fP in each case.
|
||||||
|
.P
|
||||||
|
The callback function should normally return zero. If it returns a non-zero
|
||||||
|
value, scanning the pattern stops, and that value is returned from
|
||||||
|
\fBpcre2_callout_enumerate()\fP.
|
||||||
|
.
|
||||||
|
.
|
||||||
.SH AUTHOR
|
.SH AUTHOR
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
@ -328,6 +382,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 16 March 2015
|
Last updated: 23 March 2015
|
||||||
Copyright (c) 1997-2015 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2TEST 1 "16 March 2015" "PCRE 10.20"
|
.TH PCRE2TEST 1 "22 March 2015" "PCRE 10.20"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
pcre2test - a program for testing Perl-compatible regular expressions.
|
pcre2test - a program for testing Perl-compatible regular expressions.
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -473,6 +473,7 @@ about the pattern:
|
||||||
.sp
|
.sp
|
||||||
bsr=[anycrlf|unicode] specify \eR handling
|
bsr=[anycrlf|unicode] specify \eR handling
|
||||||
/B bincode show binary code without lengths
|
/B bincode show binary code without lengths
|
||||||
|
callout_info show callout information
|
||||||
debug same as info,fullbincode
|
debug same as info,fullbincode
|
||||||
fullbincode show binary code with lengths
|
fullbincode show binary code with lengths
|
||||||
/I info show info about compiled pattern
|
/I info show info about compiled pattern
|
||||||
|
@ -549,6 +550,11 @@ if there is more than one they are listed as "starting code units". "Last code
|
||||||
unit" is the last literal code unit that must be present in any match. This is
|
unit" is the last literal code unit that must be present in any match. This is
|
||||||
not necessarily the last character. These lines are omitted if no starting or
|
not necessarily the last character. These lines are omitted if no starting or
|
||||||
ending code units are recorded.
|
ending code units are recorded.
|
||||||
|
.P
|
||||||
|
The \fBcallout_info\fP modifier requests information about all the callouts in
|
||||||
|
the pattern. A list of them is output at the end of any other information that
|
||||||
|
is requested. For each callout, either its number or string is given, followed
|
||||||
|
by the item that follows it in the pattern.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SS "Specifying a pattern in hex"
|
.SS "Specifying a pattern in hex"
|
||||||
|
@ -1437,6 +1443,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 16 March 2015
|
Last updated: 22 March 2015
|
||||||
Copyright (c) 1997-2015 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -59,11 +59,18 @@ INPUT ENCODING
|
||||||
|
|
||||||
Input to pcre2test is processed line by line, either by calling the C
|
Input to pcre2test is processed line by line, either by calling the C
|
||||||
library's fgets() function, or via the libreadline library (see below).
|
library's fgets() function, or via the libreadline library (see below).
|
||||||
In Unix-like environments, fgets() treats any bytes other than newline
|
The input is processed using using C's string functions, so must not
|
||||||
as data characters. However, in some Windows environments character 26
|
contain binary zeroes, even though in Unix-like environments, fgets()
|
||||||
(hex 1A) causes an immediate end of file, and no further data is read.
|
treats any bytes other than newline as data characters. In some Windows
|
||||||
|
environments character 26 (hex 1A) causes an immediate end of file, and
|
||||||
|
no further data is read.
|
||||||
|
|
||||||
For maximum portability, therefore, it is safest to avoid non-printing
|
For maximum portability, therefore, it is safest to avoid non-printing
|
||||||
characters in pcre2test input files.
|
characters in pcre2test input files. There is a facility for specifying
|
||||||
|
a pattern's characters as hexadecimal pairs, thus making it possible to
|
||||||
|
include binary zeroes in a pattern for testing purposes. Subject lines
|
||||||
|
are processed for backslash escapes, which makes it possible to include
|
||||||
|
any data value.
|
||||||
|
|
||||||
|
|
||||||
COMMAND LINE OPTIONS
|
COMMAND LINE OPTIONS
|
||||||
|
@ -443,6 +450,7 @@ PATTERN MODIFIERS
|
||||||
|
|
||||||
bsr=[anycrlf|unicode] specify \R handling
|
bsr=[anycrlf|unicode] specify \R handling
|
||||||
/B bincode show binary code without lengths
|
/B bincode show binary code without lengths
|
||||||
|
callout_info show callout information
|
||||||
debug same as info,fullbincode
|
debug same as info,fullbincode
|
||||||
fullbincode show binary code with lengths
|
fullbincode show binary code with lengths
|
||||||
/I info show info about compiled pattern
|
/I info show info about compiled pattern
|
||||||
|
@ -518,6 +526,11 @@ PATTERN MODIFIERS
|
||||||
last character. These lines are omitted if no starting or ending code
|
last character. These lines are omitted if no starting or ending code
|
||||||
units are recorded.
|
units are recorded.
|
||||||
|
|
||||||
|
The callout_info modifier requests information about all the callouts
|
||||||
|
in the pattern. A list of them is output at the end of any other infor-
|
||||||
|
mation that is requested. For each callout, either its number or string
|
||||||
|
is given, followed by the item that follows it in the pattern.
|
||||||
|
|
||||||
Specifying a pattern in hex
|
Specifying a pattern in hex
|
||||||
|
|
||||||
The hex modifier specifies that the characters of the pattern are to be
|
The hex modifier specifies that the characters of the pattern are to be
|
||||||
|
@ -808,11 +821,15 @@ SUBJECT MODIFIERS
|
||||||
The callout_fail modifier can be given one or two numbers. If there is
|
The callout_fail modifier can be given one or two numbers. If there is
|
||||||
only one number, 1 is returned instead of 0 when a callout of that num-
|
only one number, 1 is returned instead of 0 when a callout of that num-
|
||||||
ber is reached. If two numbers are given, 1 is returned when callout
|
ber is reached. If two numbers are given, 1 is returned when callout
|
||||||
<n> is reached for the <m>th time.
|
<n> is reached for the <m>th time. Note that callouts with string argu-
|
||||||
|
ments are always given the number zero. See "Callouts" below for a
|
||||||
|
description of the output when a callout it taken.
|
||||||
|
|
||||||
The callout_data modifier can be given an unsigned or a negative num-
|
The callout_data modifier can be given an unsigned or a negative num-
|
||||||
ber. Any value other than zero is used as a return from pcre2test's
|
ber. This is set as the "user data" that is passed to the matching
|
||||||
callout function.
|
function, and passed back when the callout function is invoked. Any
|
||||||
|
value other than zero is used as a return from pcre2test's callout
|
||||||
|
function.
|
||||||
|
|
||||||
Finding all matches in a string
|
Finding all matches in a string
|
||||||
|
|
||||||
|
@ -1136,9 +1153,24 @@ RESTARTING AFTER A PARTIAL MATCH
|
||||||
CALLOUTS
|
CALLOUTS
|
||||||
|
|
||||||
If the pattern contains any callout requests, pcre2test's callout func-
|
If the pattern contains any callout requests, pcre2test's callout func-
|
||||||
tion is called during matching. This works with both matching func-
|
tion is called during matching unless callout_none is specified. This
|
||||||
tions. By default, the called function displays the callout number, the
|
works with both matching functions.
|
||||||
start and current positions in the text at the callout time, and the
|
|
||||||
|
The callout function in pcre2test returns zero (carry on matching) by
|
||||||
|
default, but you can use a callout_fail modifier in a subject line (as
|
||||||
|
described above) to change this and other parameters of the callout.
|
||||||
|
|
||||||
|
Inserting callouts can be helpful when using pcre2test to check compli-
|
||||||
|
cated regular expressions. For further information about callouts, see
|
||||||
|
the pcre2callout documentation.
|
||||||
|
|
||||||
|
The output for callouts with numerical arguments and those with string
|
||||||
|
arguments is slightly different.
|
||||||
|
|
||||||
|
Callouts with numerical arguments
|
||||||
|
|
||||||
|
By default, the callout function displays the callout number, the start
|
||||||
|
and current positions in the subject text at the callout time, and the
|
||||||
next pattern item to be tested. For example:
|
next pattern item to be tested. For example:
|
||||||
|
|
||||||
--->pqrabcdef
|
--->pqrabcdef
|
||||||
|
@ -1184,13 +1216,23 @@ CALLOUTS
|
||||||
backtracking, the mark reverts to being unset, the text "<unset>" is
|
backtracking, the mark reverts to being unset, the text "<unset>" is
|
||||||
output.
|
output.
|
||||||
|
|
||||||
The callout function in pcre2test returns zero (carry on matching) by
|
Callouts with string arguments
|
||||||
default, but you can use a callout_fail modifier in a subject line (as
|
|
||||||
described above) to change this and other parameters of the callout.
|
|
||||||
|
|
||||||
Inserting callouts can be helpful when using pcre2test to check compli-
|
The output for a callout with a string argument is similar, except that
|
||||||
cated regular expressions. For further information about callouts, see
|
instead of outputting a callout number before the position indicators,
|
||||||
the pcre2callout documentation.
|
the callout string and its offset in the pattern string are output
|
||||||
|
before the reflection of the subject string, and the subject string is
|
||||||
|
reflected for each callout. For example:
|
||||||
|
|
||||||
|
re> /^ab(?C'first')cd(?C"second")ef/
|
||||||
|
data> abcdefg
|
||||||
|
Callout (7): 'first'
|
||||||
|
--->abcdefg
|
||||||
|
^ ^ c
|
||||||
|
Callout (20): "second"
|
||||||
|
--->abcdefg
|
||||||
|
^ ^ e
|
||||||
|
0: abcdef
|
||||||
|
|
||||||
|
|
||||||
NON-PRINTING CHARACTERS
|
NON-PRINTING CHARACTERS
|
||||||
|
@ -1280,5 +1322,5 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 23 January 2015
|
Last updated: 22 March 2015
|
||||||
Copyright (c) 1997-2015 University of Cambridge.
|
Copyright (c) 1997-2015 University of Cambridge.
|
||||||
|
|
|
@ -342,7 +342,19 @@ typedef struct pcre2_callout_block { \
|
||||||
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
|
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
|
||||||
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
|
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
|
||||||
/* ------------------------------------------------------------------ */ \
|
/* ------------------------------------------------------------------ */ \
|
||||||
} pcre2_callout_block;
|
} pcre2_callout_block; \
|
||||||
|
\
|
||||||
|
typedef struct pcre2_callout_enumerate_block { \
|
||||||
|
uint32_t version; /* Identifies version of block */ \
|
||||||
|
/* ------------------------ Version 0 ------------------------------- */ \
|
||||||
|
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
|
||||||
|
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
|
||||||
|
uint32_t callout_number; /* Number compiled into pattern */ \
|
||||||
|
PCRE2_SIZE callout_string_offset; /* Offset to string within pattern */ \
|
||||||
|
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
|
||||||
|
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
|
||||||
|
/* ------------------------------------------------------------------ */ \
|
||||||
|
} pcre2_callout_enumerate_block;
|
||||||
|
|
||||||
|
|
||||||
/* List the generic forms of all other functions in macros, which will be
|
/* List the generic forms of all other functions in macros, which will be
|
||||||
|
@ -410,6 +422,9 @@ PCRE2_EXP_DECL void pcre2_code_free(pcre2_code *);
|
||||||
|
|
||||||
#define PCRE2_PATTERN_INFO_FUNCTIONS \
|
#define PCRE2_PATTERN_INFO_FUNCTIONS \
|
||||||
PCRE2_EXP_DECL int pcre2_pattern_info(const pcre2_code *, uint32_t, \
|
PCRE2_EXP_DECL int pcre2_pattern_info(const pcre2_code *, uint32_t, \
|
||||||
|
void *); \
|
||||||
|
PCRE2_EXP_DECL int pcre2_callout_enumerate(const pcre2_code *, \
|
||||||
|
int (*)(pcre2_callout_enumerate_block *, void *), \
|
||||||
void *);
|
void *);
|
||||||
|
|
||||||
|
|
||||||
|
@ -539,6 +554,7 @@ pcre2_compile are called by application code. */
|
||||||
/* Data blocks */
|
/* Data blocks */
|
||||||
|
|
||||||
#define pcre2_callout_block PCRE2_SUFFIX(pcre2_callout_block_)
|
#define pcre2_callout_block PCRE2_SUFFIX(pcre2_callout_block_)
|
||||||
|
#define pcre2_callout_enumerate_block PCRE2_SUFFIX(pcre2_callout_enumerate_block_)
|
||||||
#define pcre2_general_context PCRE2_SUFFIX(pcre2_general_context_)
|
#define pcre2_general_context PCRE2_SUFFIX(pcre2_general_context_)
|
||||||
#define pcre2_compile_context PCRE2_SUFFIX(pcre2_compile_context_)
|
#define pcre2_compile_context PCRE2_SUFFIX(pcre2_compile_context_)
|
||||||
#define pcre2_match_context PCRE2_SUFFIX(pcre2_match_context_)
|
#define pcre2_match_context PCRE2_SUFFIX(pcre2_match_context_)
|
||||||
|
@ -547,6 +563,7 @@ pcre2_compile are called by application code. */
|
||||||
|
|
||||||
/* Functions: the complete list in alphabetical order */
|
/* Functions: the complete list in alphabetical order */
|
||||||
|
|
||||||
|
#define pcre2_callout_enumerate PCRE2_SUFFIX(pcre2_callout_enumerate_)
|
||||||
#define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_)
|
#define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_)
|
||||||
#define pcre2_compile PCRE2_SUFFIX(pcre2_compile_)
|
#define pcre2_compile PCRE2_SUFFIX(pcre2_compile_)
|
||||||
#define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_)
|
#define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_)
|
||||||
|
@ -554,7 +571,6 @@ pcre2_compile are called by application code. */
|
||||||
#define pcre2_compile_context_free PCRE2_SUFFIX(pcre2_compile_context_free_)
|
#define pcre2_compile_context_free PCRE2_SUFFIX(pcre2_compile_context_free_)
|
||||||
#define pcre2_config PCRE2_SUFFIX(pcre2_config_)
|
#define pcre2_config PCRE2_SUFFIX(pcre2_config_)
|
||||||
#define pcre2_dfa_match PCRE2_SUFFIX(pcre2_dfa_match_)
|
#define pcre2_dfa_match PCRE2_SUFFIX(pcre2_dfa_match_)
|
||||||
#define pcre2_match PCRE2_SUFFIX(pcre2_match_)
|
|
||||||
#define pcre2_general_context_copy PCRE2_SUFFIX(pcre2_general_context_copy_)
|
#define pcre2_general_context_copy PCRE2_SUFFIX(pcre2_general_context_copy_)
|
||||||
#define pcre2_general_context_create PCRE2_SUFFIX(pcre2_general_context_create_)
|
#define pcre2_general_context_create PCRE2_SUFFIX(pcre2_general_context_create_)
|
||||||
#define pcre2_general_context_free PCRE2_SUFFIX(pcre2_general_context_free_)
|
#define pcre2_general_context_free PCRE2_SUFFIX(pcre2_general_context_free_)
|
||||||
|
@ -570,6 +586,7 @@ pcre2_compile are called by application code. */
|
||||||
#define pcre2_jit_stack_create PCRE2_SUFFIX(pcre2_jit_stack_create_)
|
#define pcre2_jit_stack_create PCRE2_SUFFIX(pcre2_jit_stack_create_)
|
||||||
#define pcre2_jit_stack_free PCRE2_SUFFIX(pcre2_jit_stack_free_)
|
#define pcre2_jit_stack_free PCRE2_SUFFIX(pcre2_jit_stack_free_)
|
||||||
#define pcre2_maketables PCRE2_SUFFIX(pcre2_maketables_)
|
#define pcre2_maketables PCRE2_SUFFIX(pcre2_maketables_)
|
||||||
|
#define pcre2_match PCRE2_SUFFIX(pcre2_match_)
|
||||||
#define pcre2_match_context_copy PCRE2_SUFFIX(pcre2_match_context_copy_)
|
#define pcre2_match_context_copy PCRE2_SUFFIX(pcre2_match_context_copy_)
|
||||||
#define pcre2_match_context_create PCRE2_SUFFIX(pcre2_match_context_create_)
|
#define pcre2_match_context_create PCRE2_SUFFIX(pcre2_match_context_create_)
|
||||||
#define pcre2_match_context_free PCRE2_SUFFIX(pcre2_match_context_free_)
|
#define pcre2_match_context_free PCRE2_SUFFIX(pcre2_match_context_free_)
|
||||||
|
|
|
@ -225,4 +225,181 @@ switch(what)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Callout enumerator *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/*
|
||||||
|
Arguments:
|
||||||
|
code points to compiled code
|
||||||
|
callback function called for each callout block
|
||||||
|
callout_data user data passed to the callback
|
||||||
|
|
||||||
|
Returns: 0 when successfully completed
|
||||||
|
< 0 on local error
|
||||||
|
!= 0 for callback error
|
||||||
|
*/
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_callout_enumerate(const pcre2_code *code,
|
||||||
|
int (*callback)(pcre2_callout_enumerate_block *, void *), void *callout_data)
|
||||||
|
{
|
||||||
|
pcre2_real_code *re = (pcre2_real_code *)code;
|
||||||
|
pcre2_callout_enumerate_block cb;
|
||||||
|
PCRE2_SPTR cc;
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (re == NULL) return PCRE2_ERROR_NULL;
|
||||||
|
|
||||||
|
/* Check that the first field in the block is the magic number. If it is not,
|
||||||
|
return with PCRE2_ERROR_BADMAGIC. */
|
||||||
|
|
||||||
|
if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
|
||||||
|
|
||||||
|
/* Check that this pattern was compiled in the correct bit mode */
|
||||||
|
|
||||||
|
if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
|
||||||
|
|
||||||
|
cb.version = 0;
|
||||||
|
cc = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code))
|
||||||
|
+ re->name_count * re->name_entry_size;
|
||||||
|
|
||||||
|
while (TRUE)
|
||||||
|
{
|
||||||
|
int rc;
|
||||||
|
switch (*cc)
|
||||||
|
{
|
||||||
|
case OP_END:
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
case OP_CHAR:
|
||||||
|
case OP_CHARI:
|
||||||
|
case OP_NOT:
|
||||||
|
case OP_NOTI:
|
||||||
|
case OP_STAR:
|
||||||
|
case OP_MINSTAR:
|
||||||
|
case OP_PLUS:
|
||||||
|
case OP_MINPLUS:
|
||||||
|
case OP_QUERY:
|
||||||
|
case OP_MINQUERY:
|
||||||
|
case OP_UPTO:
|
||||||
|
case OP_MINUPTO:
|
||||||
|
case OP_EXACT:
|
||||||
|
case OP_POSSTAR:
|
||||||
|
case OP_POSPLUS:
|
||||||
|
case OP_POSQUERY:
|
||||||
|
case OP_POSUPTO:
|
||||||
|
case OP_STARI:
|
||||||
|
case OP_MINSTARI:
|
||||||
|
case OP_PLUSI:
|
||||||
|
case OP_MINPLUSI:
|
||||||
|
case OP_QUERYI:
|
||||||
|
case OP_MINQUERYI:
|
||||||
|
case OP_UPTOI:
|
||||||
|
case OP_MINUPTOI:
|
||||||
|
case OP_EXACTI:
|
||||||
|
case OP_POSSTARI:
|
||||||
|
case OP_POSPLUSI:
|
||||||
|
case OP_POSQUERYI:
|
||||||
|
case OP_POSUPTOI:
|
||||||
|
case OP_NOTSTAR:
|
||||||
|
case OP_NOTMINSTAR:
|
||||||
|
case OP_NOTPLUS:
|
||||||
|
case OP_NOTMINPLUS:
|
||||||
|
case OP_NOTQUERY:
|
||||||
|
case OP_NOTMINQUERY:
|
||||||
|
case OP_NOTUPTO:
|
||||||
|
case OP_NOTMINUPTO:
|
||||||
|
case OP_NOTEXACT:
|
||||||
|
case OP_NOTPOSSTAR:
|
||||||
|
case OP_NOTPOSPLUS:
|
||||||
|
case OP_NOTPOSQUERY:
|
||||||
|
case OP_NOTPOSUPTO:
|
||||||
|
case OP_NOTSTARI:
|
||||||
|
case OP_NOTMINSTARI:
|
||||||
|
case OP_NOTPLUSI:
|
||||||
|
case OP_NOTMINPLUSI:
|
||||||
|
case OP_NOTQUERYI:
|
||||||
|
case OP_NOTMINQUERYI:
|
||||||
|
case OP_NOTUPTOI:
|
||||||
|
case OP_NOTMINUPTOI:
|
||||||
|
case OP_NOTEXACTI:
|
||||||
|
case OP_NOTPOSSTARI:
|
||||||
|
case OP_NOTPOSPLUSI:
|
||||||
|
case OP_NOTPOSQUERYI:
|
||||||
|
case OP_NOTPOSUPTOI:
|
||||||
|
cc += PRIV(OP_lengths)[*cc];
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_TYPESTAR:
|
||||||
|
case OP_TYPEMINSTAR:
|
||||||
|
case OP_TYPEPLUS:
|
||||||
|
case OP_TYPEMINPLUS:
|
||||||
|
case OP_TYPEQUERY:
|
||||||
|
case OP_TYPEMINQUERY:
|
||||||
|
case OP_TYPEUPTO:
|
||||||
|
case OP_TYPEMINUPTO:
|
||||||
|
case OP_TYPEEXACT:
|
||||||
|
case OP_TYPEPOSSTAR:
|
||||||
|
case OP_TYPEPOSPLUS:
|
||||||
|
case OP_TYPEPOSQUERY:
|
||||||
|
case OP_TYPEPOSUPTO:
|
||||||
|
cc += PRIV(OP_lengths)[*cc];
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if (cc[-1] == OP_PROP || cc[-1] == OP_NOTPROP) cc += 2;
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
|
||||||
|
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
|
||||||
|
case OP_XCLASS:
|
||||||
|
cc += GET(cc, 1);
|
||||||
|
break;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
case OP_MARK:
|
||||||
|
case OP_PRUNE_ARG:
|
||||||
|
case OP_SKIP_ARG:
|
||||||
|
case OP_THEN_ARG:
|
||||||
|
cc += PRIV(OP_lengths)[*cc] + cc[1];
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_CALLOUT:
|
||||||
|
cb.pattern_position = GET(cc, 1);
|
||||||
|
cb.next_item_length = GET(cc, 1 + LINK_SIZE);
|
||||||
|
cb.callout_number = cc[1 + 2*LINK_SIZE];
|
||||||
|
cb.callout_string_offset = 0;
|
||||||
|
cb.callout_string_length = 0;
|
||||||
|
cb.callout_string = NULL;
|
||||||
|
rc = callback(&cb, callout_data);
|
||||||
|
if (rc != 0) return rc;
|
||||||
|
cc += PRIV(OP_lengths)[*cc];
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OP_CALLOUT_STR:
|
||||||
|
cb.pattern_position = GET(cc, 1);
|
||||||
|
cb.next_item_length = GET(cc, 1 + LINK_SIZE);
|
||||||
|
cb.callout_number = 0;
|
||||||
|
cb.callout_string_offset = GET(cc, 1 + 3*LINK_SIZE);
|
||||||
|
cb.callout_string_length =
|
||||||
|
GET(cc, 1 + 2*LINK_SIZE) - (1 + 4*LINK_SIZE) - 2;
|
||||||
|
cb.callout_string = cc + (1 + 4*LINK_SIZE) + 1;
|
||||||
|
rc = callback(&cb, callout_data);
|
||||||
|
if (rc != 0) return rc;
|
||||||
|
cc += GET(cc, 1 + 2*LINK_SIZE);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
cc += PRIV(OP_lengths)[*cc];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* End of pcre2_pattern_info.c */
|
/* End of pcre2_pattern_info.c */
|
||||||
|
|
139
src/pcre2test.c
139
src/pcre2test.c
|
@ -382,28 +382,29 @@ either on a pattern or a data line, so they must all be distinct. */
|
||||||
#define CTL_ALTGLOBAL 0x00000010u
|
#define CTL_ALTGLOBAL 0x00000010u
|
||||||
#define CTL_BINCODE 0x00000020u
|
#define CTL_BINCODE 0x00000020u
|
||||||
#define CTL_CALLOUT_CAPTURE 0x00000040u
|
#define CTL_CALLOUT_CAPTURE 0x00000040u
|
||||||
#define CTL_CALLOUT_NONE 0x00000080u
|
#define CTL_CALLOUT_INFO 0x00000080u
|
||||||
#define CTL_DFA 0x00000100u
|
#define CTL_CALLOUT_NONE 0x00000100u
|
||||||
#define CTL_FINDLIMITS 0x00000200u
|
#define CTL_DFA 0x00000200u
|
||||||
#define CTL_FULLBINCODE 0x00000400u
|
#define CTL_FINDLIMITS 0x00000400u
|
||||||
#define CTL_GETALL 0x00000800u
|
#define CTL_FULLBINCODE 0x00000800u
|
||||||
#define CTL_GLOBAL 0x00001000u
|
#define CTL_GETALL 0x00001000u
|
||||||
#define CTL_HEXPAT 0x00002000u
|
#define CTL_GLOBAL 0x00002000u
|
||||||
#define CTL_INFO 0x00004000u
|
#define CTL_HEXPAT 0x00004000u
|
||||||
#define CTL_JITFAST 0x00008000u
|
#define CTL_INFO 0x00008000u
|
||||||
#define CTL_JITVERIFY 0x00010000u
|
#define CTL_JITFAST 0x00010000u
|
||||||
#define CTL_MARK 0x00020000u
|
#define CTL_JITVERIFY 0x00020000u
|
||||||
#define CTL_MEMORY 0x00040000u
|
#define CTL_MARK 0x00040000u
|
||||||
#define CTL_POSIX 0x00080000u
|
#define CTL_MEMORY 0x00080000u
|
||||||
#define CTL_PUSH 0x00100000u
|
#define CTL_POSIX 0x00100000u
|
||||||
#define CTL_STARTCHAR 0x00200000u
|
#define CTL_PUSH 0x00200000u
|
||||||
#define CTL_ZERO_TERMINATE 0x00400000u
|
#define CTL_STARTCHAR 0x00400000u
|
||||||
|
#define CTL_ZERO_TERMINATE 0x00800000u
|
||||||
|
|
||||||
#define CTL_BSR_SET 0x80000000u /* This is informational */
|
#define CTL_BSR_SET 0x80000000u /* This is informational */
|
||||||
#define CTL_NL_SET 0x40000000u /* This is informational */
|
#define CTL_NL_SET 0x40000000u /* This is informational */
|
||||||
|
|
||||||
#define CTL_DEBUG (CTL_FULLBINCODE|CTL_INFO) /* For setting */
|
#define CTL_DEBUG (CTL_FULLBINCODE|CTL_INFO) /* For setting */
|
||||||
#define CTL_ANYINFO (CTL_DEBUG|CTL_BINCODE) /* For testing */
|
#define CTL_ANYINFO (CTL_DEBUG|CTL_BINCODE|CTL_CALLOUT_INFO)
|
||||||
#define CTL_ANYGLOB (CTL_ALTGLOBAL|CTL_GLOBAL)
|
#define CTL_ANYGLOB (CTL_ALTGLOBAL|CTL_GLOBAL)
|
||||||
|
|
||||||
/* These are all the controls that may be set either on a pattern or on a
|
/* These are all the controls that may be set either on a pattern or on a
|
||||||
|
@ -494,6 +495,7 @@ static modstruct modlist[] = {
|
||||||
{ "callout_capture", MOD_DAT, MOD_CTL, CTL_CALLOUT_CAPTURE, DO(control) },
|
{ "callout_capture", MOD_DAT, MOD_CTL, CTL_CALLOUT_CAPTURE, DO(control) },
|
||||||
{ "callout_data", MOD_DAT, MOD_INS, 0, DO(callout_data) },
|
{ "callout_data", MOD_DAT, MOD_INS, 0, DO(callout_data) },
|
||||||
{ "callout_fail", MOD_DAT, MOD_IN2, 0, DO(cfail) },
|
{ "callout_fail", MOD_DAT, MOD_IN2, 0, DO(cfail) },
|
||||||
|
{ "callout_info", MOD_PAT, MOD_CTL, CTL_CALLOUT_INFO, PO(control) },
|
||||||
{ "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) },
|
{ "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) },
|
||||||
{ "caseless", MOD_PATP, MOD_OPT, PCRE2_CASELESS, PO(options) },
|
{ "caseless", MOD_PATP, MOD_OPT, PCRE2_CASELESS, PO(options) },
|
||||||
{ "copy", MOD_DAT, MOD_NN, DO(copy_numbers), DO(copy_names) },
|
{ "copy", MOD_DAT, MOD_NN, DO(copy_numbers), DO(copy_names) },
|
||||||
|
@ -578,8 +580,8 @@ static modstruct modlist[] = {
|
||||||
/* Control bits that are not ignored with 'push'. */
|
/* Control bits that are not ignored with 'push'. */
|
||||||
|
|
||||||
#define PUSH_SUPPORTED_COMPILE_CONTROLS ( \
|
#define PUSH_SUPPORTED_COMPILE_CONTROLS ( \
|
||||||
CTL_BINCODE|CTL_FULLBINCODE|CTL_HEXPAT|CTL_INFO|CTL_JITVERIFY| \
|
CTL_BINCODE|CTL_CALLOUT_INFO|CTL_FULLBINCODE|CTL_HEXPAT|CTL_INFO| \
|
||||||
CTL_MEMORY|CTL_PUSH|CTL_BSR_SET|CTL_NL_SET)
|
CTL_JITVERIFY|CTL_MEMORY|CTL_PUSH|CTL_BSR_SET|CTL_NL_SET)
|
||||||
|
|
||||||
/* Controls that apply only at compile time with 'push'. */
|
/* Controls that apply only at compile time with 'push'. */
|
||||||
|
|
||||||
|
@ -841,6 +843,17 @@ are supported. */
|
||||||
else \
|
else \
|
||||||
(void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
|
(void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
|
||||||
|
|
||||||
|
#define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
|
||||||
|
if (test_mode == PCRE8_MODE) \
|
||||||
|
a = pcre2_callout_enumerate_8(compiled_code8, \
|
||||||
|
(int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c); \
|
||||||
|
else if (test_mode == PCRE16_MODE) \
|
||||||
|
a = pcre2_callout_enumerate_16(compiled_code16, \
|
||||||
|
(int(*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c); \
|
||||||
|
else \
|
||||||
|
a = pcre2_callout_enumerate_32(compiled_code32, \
|
||||||
|
(int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
|
||||||
|
|
||||||
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
|
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
|
||||||
if (test_mode == PCRE8_MODE) \
|
if (test_mode == PCRE8_MODE) \
|
||||||
G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,G(g,8)); \
|
G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,G(g,8)); \
|
||||||
|
@ -1268,6 +1281,14 @@ the three different cases. */
|
||||||
else \
|
else \
|
||||||
(void)G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
|
(void)G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f)
|
||||||
|
|
||||||
|
#define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
|
||||||
|
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
||||||
|
a = G(pcre2_callout_enumerate,BITONE)(G(compiled_code,BITONE), \
|
||||||
|
(int (*)(struct G(pcre2_callout_enumerate_block_,BITONE) *, void *))b,c); \
|
||||||
|
else \
|
||||||
|
a = G(pcre2_callout_enumerate,BITTWO)(G(compiled_code,BITTWO), \
|
||||||
|
(int (*)(struct G(pcre2_callout_enumerate_block_,BITTWO) *, void *))b,c)
|
||||||
|
|
||||||
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
|
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
|
||||||
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
||||||
G(a,BITONE) = G(pcre2_compile_,BITONE)(G(b,BITONE),c,d,e,f,G(g,BITONE)); \
|
G(a,BITONE) = G(pcre2_compile_,BITONE)(G(b,BITONE),c,d,e,f,G(g,BITONE)); \
|
||||||
|
@ -1588,6 +1609,9 @@ the three different cases. */
|
||||||
lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
|
lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
|
||||||
#define PCHARSV(p, offset, len, utf, f) \
|
#define PCHARSV(p, offset, len, utf, f) \
|
||||||
(void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
|
(void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f)
|
||||||
|
#define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
|
||||||
|
a = pcre2_callout_enumerate_8(compiled_code8, \
|
||||||
|
(int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c)
|
||||||
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
|
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
|
||||||
G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,G(g,8))
|
G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,G(g,8))
|
||||||
#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
|
#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
|
||||||
|
@ -1676,6 +1700,9 @@ the three different cases. */
|
||||||
lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
|
lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
|
||||||
#define PCHARSV(p, offset, len, utf, f) \
|
#define PCHARSV(p, offset, len, utf, f) \
|
||||||
(void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
|
(void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f)
|
||||||
|
#define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
|
||||||
|
a = pcre2_callout_enumerate_16(compiled_code16, \
|
||||||
|
(int (*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c)
|
||||||
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
|
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
|
||||||
G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,G(g,16))
|
G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,G(g,16))
|
||||||
#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
|
#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
|
||||||
|
@ -1764,6 +1791,9 @@ the three different cases. */
|
||||||
lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
|
lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
|
||||||
#define PCHARSV(p, offset, len, utf, f) \
|
#define PCHARSV(p, offset, len, utf, f) \
|
||||||
(void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
|
(void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f)
|
||||||
|
#define PCRE2_CALLOUT_ENUMERATE(a,b,c) \
|
||||||
|
a = pcre2_callout_enumerate_32(compiled_code32, \
|
||||||
|
(int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
|
||||||
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
|
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
|
||||||
G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,G(g,32))
|
G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,G(g,32))
|
||||||
#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
|
#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
|
||||||
|
@ -3381,7 +3411,7 @@ Returns: nothing
|
||||||
static void
|
static void
|
||||||
show_controls(uint32_t controls, const char *before)
|
show_controls(uint32_t controls, const char *before)
|
||||||
{
|
{
|
||||||
fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||||
before,
|
before,
|
||||||
((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
|
((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
|
||||||
((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
|
((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
|
||||||
|
@ -3390,6 +3420,7 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||||
((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "",
|
((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "",
|
||||||
((controls & CTL_BINCODE) != 0)? " bincode" : "",
|
((controls & CTL_BINCODE) != 0)? " bincode" : "",
|
||||||
((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "",
|
((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "",
|
||||||
|
((controls & CTL_CALLOUT_INFO) != 0)? " callout_info" : "",
|
||||||
((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "",
|
((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "",
|
||||||
((controls & CTL_DFA) != 0)? " dfa" : "",
|
((controls & CTL_DFA) != 0)? " dfa" : "",
|
||||||
((controls & CTL_FINDLIMITS) != 0)? " find_limits" : "",
|
((controls & CTL_FINDLIMITS) != 0)? " find_limits" : "",
|
||||||
|
@ -3517,6 +3548,56 @@ if (pat_patctl.jit != 0)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Callback function for callout enumeration *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* The only differences in the callout emumeration block for different code
|
||||||
|
unit widths are that the pointers to the subject, the most recent MARK, and a
|
||||||
|
callout argument string point to strings of the appropriate width. Casts can be
|
||||||
|
used to deal with this.
|
||||||
|
|
||||||
|
Argument:
|
||||||
|
cb pointer to enumerate block
|
||||||
|
callout_data user data
|
||||||
|
|
||||||
|
Returns: 0
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int callout_callback(pcre2_callout_enumerate_block_8 *cb,
|
||||||
|
void *callout_data)
|
||||||
|
{
|
||||||
|
uint32_t i;
|
||||||
|
BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0;
|
||||||
|
|
||||||
|
(void)callout_data; /* Not currently displayed */
|
||||||
|
|
||||||
|
fprintf(outfile, "Callout ");
|
||||||
|
if (cb->callout_string != NULL)
|
||||||
|
{
|
||||||
|
uint32_t delimiter = CODE_UNIT(cb->callout_string, -1);
|
||||||
|
fprintf(outfile, "%c", delimiter);
|
||||||
|
PCHARSV(cb->callout_string, 0,
|
||||||
|
cb->callout_string_length, utf, outfile);
|
||||||
|
for (i = 0; callout_start_delims[i] != 0; i++)
|
||||||
|
if (delimiter == callout_start_delims[i])
|
||||||
|
{
|
||||||
|
delimiter = callout_end_delims[i];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
fprintf(outfile, "%c ", delimiter);
|
||||||
|
}
|
||||||
|
else fprintf(outfile, "%d ", cb->callout_number);
|
||||||
|
|
||||||
|
fprintf(outfile, "%.*s\n",
|
||||||
|
(int)((cb->next_item_length == 0)? 1 : cb->next_item_length),
|
||||||
|
pbuffer8 + cb->pattern_position);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Show information about a pattern *
|
* Show information about a pattern *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
@ -3789,6 +3870,24 @@ if ((pat_patctl.control & CTL_INFO) != 0)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((pat_patctl.control & CTL_CALLOUT_INFO) != 0)
|
||||||
|
{
|
||||||
|
int errorcode;
|
||||||
|
PCRE2_CALLOUT_ENUMERATE(errorcode, callout_callback, 0);
|
||||||
|
if (errorcode != 0)
|
||||||
|
{
|
||||||
|
int len;
|
||||||
|
fprintf(outfile, "Callout enumerate failed: error %d: ", errorcode);
|
||||||
|
if (errorcode < 0)
|
||||||
|
{
|
||||||
|
PCRE2_GET_ERROR_MESSAGE(len, errorcode, pbuffer);
|
||||||
|
PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, outfile);
|
||||||
|
}
|
||||||
|
fprintf(outfile, "\n");
|
||||||
|
return PR_SKIP;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return PR_OK;
|
return PR_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -4206,11 +4206,11 @@ a random value. /Ix
|
||||||
/^a(b)c(?C{AB})def/B
|
/^a(b)c(?C{AB})def/B
|
||||||
abcdef\=callout_capture
|
abcdef\=callout_capture
|
||||||
|
|
||||||
/(?C`a``b`)(?C'a''b')(?C"a""b")(?C^a^^b^)(?C%a%%b%)(?C#a##b#)(?C$a$$b$)(?C{a}}b})/B
|
/(?C`a``b`)(?C'a''b')(?C"a""b")(?C^a^^b^)(?C%a%%b%)(?C#a##b#)(?C$a$$b$)(?C{a}}b})/B,callout_info
|
||||||
|
|
||||||
/(?:a(?C`code`)){3}/B
|
/(?:a(?C`code`)){3}/B
|
||||||
|
|
||||||
/^(?(?C25)(?=abc)abcd|xyz)/B
|
/^(?(?C25)(?=abc)abcd|xyz)/B,callout_info
|
||||||
abcdefg
|
abcdefg
|
||||||
xyz123
|
xyz123
|
||||||
|
|
||||||
|
@ -4226,7 +4226,7 @@ a random value. /Ix
|
||||||
|
|
||||||
# Binary zero in callout string
|
# Binary zero in callout string
|
||||||
# a ( ? C ' x z ' ) b
|
# a ( ? C ' x z ' ) b
|
||||||
/ 61 28 3f 43 27 78 00 7a 27 29 62/hex
|
/ 61 28 3f 43 27 78 00 7a 27 29 62/hex,callout_info
|
||||||
abcdefgh
|
abcdefgh
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
|
|
|
@ -14060,7 +14060,7 @@ Callout (10): {AB} last capture = 1
|
||||||
0: abcdef
|
0: abcdef
|
||||||
1: b
|
1: b
|
||||||
|
|
||||||
/(?C`a``b`)(?C'a''b')(?C"a""b")(?C^a^^b^)(?C%a%%b%)(?C#a##b#)(?C$a$$b$)(?C{a}}b})/B
|
/(?C`a``b`)(?C'a''b')(?C"a""b")(?C^a^^b^)(?C%a%%b%)(?C#a##b#)(?C$a$$b$)(?C{a}}b})/B,callout_info
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Bra
|
Bra
|
||||||
CalloutStr `a`b` 4 10 0
|
CalloutStr `a`b` 4 10 0
|
||||||
|
@ -14074,6 +14074,14 @@ Callout (10): {AB} last capture = 1
|
||||||
Ket
|
Ket
|
||||||
End
|
End
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
|
Callout `a`b` (
|
||||||
|
Callout 'a'b' (
|
||||||
|
Callout "a"b" (
|
||||||
|
Callout ^a^b^ (
|
||||||
|
Callout %a%b% (
|
||||||
|
Callout #a#b# (
|
||||||
|
Callout $a$b$ (
|
||||||
|
Callout {a}b}
|
||||||
|
|
||||||
/(?:a(?C`code`)){3}/B
|
/(?:a(?C`code`)){3}/B
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
|
@ -14094,7 +14102,7 @@ Callout (10): {AB} last capture = 1
|
||||||
End
|
End
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
|
|
||||||
/^(?(?C25)(?=abc)abcd|xyz)/B
|
/^(?(?C25)(?=abc)abcd|xyz)/B,callout_info
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
Bra
|
Bra
|
||||||
^
|
^
|
||||||
|
@ -14110,6 +14118,7 @@ Callout (10): {AB} last capture = 1
|
||||||
Ket
|
Ket
|
||||||
End
|
End
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
|
Callout 25 (?=abc)
|
||||||
abcdefg
|
abcdefg
|
||||||
--->abcdefg
|
--->abcdefg
|
||||||
25 ^ (?=abc)
|
25 ^ (?=abc)
|
||||||
|
@ -14171,7 +14180,8 @@ Callout (8): `code`
|
||||||
|
|
||||||
# Binary zero in callout string
|
# Binary zero in callout string
|
||||||
# a ( ? C ' x z ' ) b
|
# a ( ? C ' x z ' ) b
|
||||||
/ 61 28 3f 43 27 78 00 7a 27 29 62/hex
|
/ 61 28 3f 43 27 78 00 7a 27 29 62/hex,callout_info
|
||||||
|
Callout 'x\x00z' b
|
||||||
abcdefgh
|
abcdefgh
|
||||||
Callout (5): 'x\x00z'
|
Callout (5): 'x\x00z'
|
||||||
--->abcdefgh
|
--->abcdefgh
|
||||||
|
|
Loading…
Reference in New Issue