Substitute code update and documentation.
This commit is contained in:
parent
bb34dede56
commit
b3ac0ffb32
|
@ -63,6 +63,7 @@ dist_html_DATA = \
|
||||||
doc/html/pcre2_set_parens_nest_limit.html \
|
doc/html/pcre2_set_parens_nest_limit.html \
|
||||||
doc/html/pcre2_set_recursion_limit.html \
|
doc/html/pcre2_set_recursion_limit.html \
|
||||||
doc/html/pcre2_set_recursion_memory_management.html \
|
doc/html/pcre2_set_recursion_memory_management.html \
|
||||||
|
doc/html/pcre2_substitute.html \
|
||||||
doc/html/pcre2_substring_copy_byname.html \
|
doc/html/pcre2_substring_copy_byname.html \
|
||||||
doc/html/pcre2_substring_copy_bynumber.html \
|
doc/html/pcre2_substring_copy_bynumber.html \
|
||||||
doc/html/pcre2_substring_free.html \
|
doc/html/pcre2_substring_free.html \
|
||||||
|
@ -134,6 +135,7 @@ dist_man_MANS = \
|
||||||
doc/pcre2_set_parens_nest_limit.3 \
|
doc/pcre2_set_parens_nest_limit.3 \
|
||||||
doc/pcre2_set_recursion_limit.3 \
|
doc/pcre2_set_recursion_limit.3 \
|
||||||
doc/pcre2_set_recursion_memory_management.3 \
|
doc/pcre2_set_recursion_memory_management.3 \
|
||||||
|
doc/pcre2_substitute.3 \
|
||||||
doc/pcre2_substring_copy_byname.3 \
|
doc/pcre2_substring_copy_byname.3 \
|
||||||
doc/pcre2_substring_copy_bynumber.3 \
|
doc/pcre2_substring_copy_bynumber.3 \
|
||||||
doc/pcre2_substring_free.3 \
|
doc/pcre2_substring_free.3 \
|
||||||
|
|
|
@ -204,6 +204,10 @@ in the library.
|
||||||
<tr><td><a href="pcre2_set_recursion_memory_management.html">pcre2_set_recursion_memory_management</a></td>
|
<tr><td><a href="pcre2_set_recursion_memory_management.html">pcre2_set_recursion_memory_management</a></td>
|
||||||
<td> Set match recursion memory management</td></tr>
|
<td> Set match recursion memory management</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre2_substitute.html">pcre2_substitute</a></td>
|
||||||
|
<td> Match a compiled pattern to a subject string and do
|
||||||
|
substitutions</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre2_substring_copy_byname.html">pcre2_substring_copy_byname</a></td>
|
<tr><td><a href="pcre2_substring_copy_byname.html">pcre2_substring_copy_byname</a></td>
|
||||||
<td> Extract named substring into given buffer</td></tr>
|
<td> Extract named substring into given buffer</td></tr>
|
||||||
|
|
||||||
|
|
|
@ -70,8 +70,8 @@ available codes are:
|
||||||
</pre>
|
</pre>
|
||||||
The function yields a non-negative value on success or the negative value
|
The function yields a non-negative value on success or the negative value
|
||||||
PCRE2_ERROR_BADOPTION otherwise. This is also the result for the
|
PCRE2_ERROR_BADOPTION otherwise. This is also the result for the
|
||||||
PCRE2_CONFIG_JITTARGET code if JIT support is not available. When a string
|
PCRE2_CONFIG_JITTARGET code if JIT support is not available. When a string is
|
||||||
is returned the yield is the length of the string, in code units, excluding the
|
requested, the function returns the number of code units used, including the
|
||||||
terminating zero.
|
terminating zero.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
|
|
@ -22,7 +22,7 @@ SYNOPSIS
|
||||||
<b>int pcre2_jit_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
<b>int pcre2_jit_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
||||||
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
||||||
<b> uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
|
<b> uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
|
||||||
<b> pcre2_match_context *<i>mcontext</i>, pcre2_jit_stack *<i>jit_stack</i>);</b>
|
<b> pcre2_match_context *<i>mcontext</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
|
@ -33,11 +33,7 @@ processed by the JIT compiler against a given subject string, using a matching
|
||||||
algorithm that is similar to Perl's. It is a "fast path" interface to JIT, and
|
algorithm that is similar to Perl's. It is a "fast path" interface to JIT, and
|
||||||
it bypasses some of the sanity checks that <b>pcre2_match()</b> applies.
|
it bypasses some of the sanity checks that <b>pcre2_match()</b> applies.
|
||||||
Its arguments are exactly the same as for
|
Its arguments are exactly the same as for
|
||||||
<a href="pcre2_match.html"><b>pcre2_match()</b></a>
|
<a href="pcre2_match.html"><b>pcre2_match()</b>.</a>
|
||||||
plus one additional argument that must either point to a JIT stack or be NULL.
|
|
||||||
In the latter case, if a callback function has been set up by
|
|
||||||
<b>pcre2_jit_stack_create()</b>, it is called. Otherwise the system stack is
|
|
||||||
used.
|
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The supported options are PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY,
|
The supported options are PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY,
|
||||||
|
|
|
@ -19,18 +19,20 @@ SYNOPSIS
|
||||||
<b>#include <pcre2.h></b>
|
<b>#include <pcre2.h></b>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>void pcre2_jit_stack_assign(const pcre2_code *<i>code</i>,</b>
|
<b>void pcre2_jit_stack_assign(pcre2_match_context *<i>mcontext</i>,</b>
|
||||||
<b> pcre2_jit_callback <i>callback_function</i>, void *<i>callback_data</i>);</b>
|
<b> pcre2_jit_callback <i>callback_function</i>, void *<i>callback_data</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
This function provides control over the memory used as a stack at run-time by a
|
This function provides control over the memory used by JIT as a run-time stack
|
||||||
call to <b>pcre2_match()</b> or <b>pcre2_jit_match()</b> with a pattern that has
|
when <b>pcre2_match()</b> or <b>pcre2_jit_match()</b> is called with a pattern
|
||||||
been successfully processed by the JIT compiler. The arguments are:
|
that has been successfully processed by the JIT compiler. The information that
|
||||||
|
determines which stack is used is put into a match context that is subsequently
|
||||||
|
passed to a matching function. The arguments of this function are:
|
||||||
<pre>
|
<pre>
|
||||||
code the pointer returned by <b>pcre2_compile()</b>
|
mcontext a pointer to a match context
|
||||||
callback a callback function
|
callback a callback function
|
||||||
callback_data a JIT stack or a value to be passed to the callback
|
callback_data a JIT stack or a value to be passed to the callback
|
||||||
</PRE>
|
</PRE>
|
||||||
|
@ -51,7 +53,7 @@ result is NULL, the internal 32K stack is used; otherwise the return value must
|
||||||
be a valid JIT stack, the result of calling <b>pcre2_jit_stack_create()</b>.
|
be a valid JIT stack, the result of calling <b>pcre2_jit_stack_create()</b>.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
You may safely assign the same JIT stack to multiple patterns, as long as they
|
You may safely use the same JIT stack for multiple patterns, as long as they
|
||||||
are all matched in the same thread. In a multithread application, each thread
|
are all matched in the same thread. In a multithread application, each thread
|
||||||
must use its own JIT stack. For more details, see the
|
must use its own JIT stack. For more details, see the
|
||||||
<a href="pcre2jit.html"><b>pcre2jit</b></a>
|
<a href="pcre2jit.html"><b>pcre2jit</b></a>
|
||||||
|
|
|
@ -19,7 +19,7 @@ SYNOPSIS
|
||||||
<b>#include <pcre2.h></b>
|
<b>#include <pcre2.h></b>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre2_match_data_create_from_pattern(pcre2_code *<i>code</i>,</b>
|
<b>pcre2_match_data_create_from_pattern(const pcre2_code *<i>code</i>,</b>
|
||||||
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
|
|
|
@ -0,0 +1,85 @@
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>pcre2_substitute specification</title>
|
||||||
|
</head>
|
||||||
|
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||||
|
<h1>pcre2_substitute man page</h1>
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
This page is part of the PCRE2 HTML documentation. It was generated
|
||||||
|
automatically from the original man page. If there is any nonsense in it,
|
||||||
|
please consult the man page, in case the conversion went wrong.
|
||||||
|
<br>
|
||||||
|
<br><b>
|
||||||
|
SYNOPSIS
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
<b>#include <pcre2.h></b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre2_substitute(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
||||||
|
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
||||||
|
<b> uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
|
||||||
|
<b> pcre2_match_context *<i>mcontext</i>, PCRE2_SPTR \fIreplacementzfP,</b>
|
||||||
|
<b> PCRE2_SIZE <i>rlength</i>, PCRE2_UCHAR *<i>outputbuffer</i>,</b>
|
||||||
|
<b> PCRE2_SIZE *<i>outlengthptr</i>);</b>
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
DESCRIPTION
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
This function matches a compiled regular expression against a given subject
|
||||||
|
string, using a matching algorithm that is similar to Perl's. It then makes a
|
||||||
|
copy of the subject, substituting a replacement string for what was matched.
|
||||||
|
Its arguments are:
|
||||||
|
<pre>
|
||||||
|
<i>code</i> Points to the compiled pattern
|
||||||
|
<i>subject</i> Points to the subject string
|
||||||
|
<i>length</i> Length of the subject string
|
||||||
|
<i>startoffset</i> Offset in the subject at which to start matching
|
||||||
|
<i>options</i> Option bits
|
||||||
|
<i>match_data</i> Points to a match data block, or is NULL
|
||||||
|
<i>mcontext</i> Points to a match context, or is NULL
|
||||||
|
<i>replacement</i> Points to the replacement string
|
||||||
|
<i>rlength</i> Length of the replacement string
|
||||||
|
<i>outputbuffer</i> Points to the output buffer
|
||||||
|
<i>outlengthptr</i> Points to the length of the output buffer
|
||||||
|
</pre>
|
||||||
|
A match context is needed only if you want to:
|
||||||
|
<pre>
|
||||||
|
Set up a callout function
|
||||||
|
Change the limit for calling the internal function <i>match()</i>
|
||||||
|
Change the limit for calling <i>match()</i> recursively
|
||||||
|
Set custom memory management when the heap is used for recursion
|
||||||
|
</pre>
|
||||||
|
The <i>length</i>, <i>startoffset</i> and <i>rlength</i> values are code
|
||||||
|
units, not characters, as is the contents of the variable pointed at by
|
||||||
|
<i>outlengthptr</i>, which is updated to the actual length of the new string.
|
||||||
|
The options are:
|
||||||
|
<pre>
|
||||||
|
PCRE2_ANCHORED Match only at the first position
|
||||||
|
PCRE2_NOTBOL Subject string is not the beginning of a line
|
||||||
|
PCRE2_NOTEOL Subject string is not the end of a line
|
||||||
|
PCRE2_NOTEMPTY An empty string is not a valid match
|
||||||
|
PCRE2_NOTEMPTY_ATSTART An empty string at the start of the subject
|
||||||
|
is not a valid match
|
||||||
|
PCRE2_NO_UTF_CHECK Do not check the subject or replacement for
|
||||||
|
UTF validity (only relevant if PCRE2_UTF
|
||||||
|
was set at compile time)
|
||||||
|
PCRE2_SUBSTITUTE_GLOBAL Replace all occurrences in the subject
|
||||||
|
</pre>
|
||||||
|
The function returns the number of substitutions, which may be zero if there
|
||||||
|
were no matches. The result can be greater than one only when
|
||||||
|
PCRE2_SUBSTITUTE_GLOBAL is set.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
There is a complete description of the PCRE2 native API in the
|
||||||
|
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||||
|
page and a description of the POSIX API in the
|
||||||
|
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||||
|
page.
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||||
|
</p>
|
|
@ -19,32 +19,34 @@ please consult the man page, in case the conversion went wrong.
|
||||||
<li><a name="TOC4" href="#SEC4">PCRE2 NATIVE API COMPILE CONTEXT FUNCTIONS</a>
|
<li><a name="TOC4" href="#SEC4">PCRE2 NATIVE API COMPILE CONTEXT FUNCTIONS</a>
|
||||||
<li><a name="TOC5" href="#SEC5">PCRE2 NATIVE API MATCH CONTEXT FUNCTIONS</a>
|
<li><a name="TOC5" href="#SEC5">PCRE2 NATIVE API MATCH CONTEXT FUNCTIONS</a>
|
||||||
<li><a name="TOC6" href="#SEC6">PCRE2 NATIVE API STRING EXTRACTION FUNCTIONS</a>
|
<li><a name="TOC6" href="#SEC6">PCRE2 NATIVE API STRING EXTRACTION FUNCTIONS</a>
|
||||||
<li><a name="TOC7" href="#SEC7">PCRE2 NATIVE API JIT FUNCTIONS</a>
|
<li><a name="TOC7" href="#SEC7">PCRE2 NATIVE API STRING SUBSTITUTION FUNCTION</a>
|
||||||
<li><a name="TOC8" href="#SEC8">PCRE2 NATIVE API AUXILIARY FUNCTIONS</a>
|
<li><a name="TOC8" href="#SEC8">PCRE2 NATIVE API JIT FUNCTIONS</a>
|
||||||
<li><a name="TOC9" href="#SEC9">PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES</a>
|
<li><a name="TOC9" href="#SEC9">PCRE2 NATIVE API AUXILIARY FUNCTIONS</a>
|
||||||
<li><a name="TOC10" href="#SEC10">PCRE2 API OVERVIEW</a>
|
<li><a name="TOC10" href="#SEC10">PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES</a>
|
||||||
<li><a name="TOC11" href="#SEC11">NEWLINES</a>
|
<li><a name="TOC11" href="#SEC11">PCRE2 API OVERVIEW</a>
|
||||||
<li><a name="TOC12" href="#SEC12">MULTITHREADING</a>
|
<li><a name="TOC12" href="#SEC12">NEWLINES</a>
|
||||||
<li><a name="TOC13" href="#SEC13">PCRE2 CONTEXTS</a>
|
<li><a name="TOC13" href="#SEC13">MULTITHREADING</a>
|
||||||
<li><a name="TOC14" href="#SEC14">CHECKING BUILD-TIME OPTIONS</a>
|
<li><a name="TOC14" href="#SEC14">PCRE2 CONTEXTS</a>
|
||||||
<li><a name="TOC15" href="#SEC15">COMPILING A PATTERN</a>
|
<li><a name="TOC15" href="#SEC15">CHECKING BUILD-TIME OPTIONS</a>
|
||||||
<li><a name="TOC16" href="#SEC16">COMPILATION ERROR CODES</a>
|
<li><a name="TOC16" href="#SEC16">COMPILING A PATTERN</a>
|
||||||
<li><a name="TOC17" href="#SEC17">JUST-IN-TIME (JIT) COMPILATION</a>
|
<li><a name="TOC17" href="#SEC17">COMPILATION ERROR CODES</a>
|
||||||
<li><a name="TOC18" href="#SEC18">LOCALE SUPPORT</a>
|
<li><a name="TOC18" href="#SEC18">JUST-IN-TIME (JIT) COMPILATION</a>
|
||||||
<li><a name="TOC19" href="#SEC19">INFORMATION ABOUT A COMPILED PATTERN</a>
|
<li><a name="TOC19" href="#SEC19">LOCALE SUPPORT</a>
|
||||||
<li><a name="TOC20" href="#SEC20">THE MATCH DATA BLOCK</a>
|
<li><a name="TOC20" href="#SEC20">INFORMATION ABOUT A COMPILED PATTERN</a>
|
||||||
<li><a name="TOC21" href="#SEC21">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a>
|
<li><a name="TOC21" href="#SEC21">THE MATCH DATA BLOCK</a>
|
||||||
<li><a name="TOC22" href="#SEC22">NEWLINE HANDLING WHEN MATCHING</a>
|
<li><a name="TOC22" href="#SEC22">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a>
|
||||||
<li><a name="TOC23" href="#SEC23">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a>
|
<li><a name="TOC23" href="#SEC23">NEWLINE HANDLING WHEN MATCHING</a>
|
||||||
<li><a name="TOC24" href="#SEC24">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a>
|
<li><a name="TOC24" href="#SEC24">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a>
|
||||||
<li><a name="TOC25" href="#SEC25">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a>
|
<li><a name="TOC25" href="#SEC25">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a>
|
||||||
<li><a name="TOC26" href="#SEC26">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a>
|
<li><a name="TOC26" href="#SEC26">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a>
|
||||||
<li><a name="TOC27" href="#SEC27">DUPLICATE SUBPATTERN NAMES</a>
|
<li><a name="TOC27" href="#SEC27">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a>
|
||||||
<li><a name="TOC28" href="#SEC28">FINDING ALL POSSIBLE MATCHES</a>
|
<li><a name="TOC28" href="#SEC28">CREATING A NEW STRING WITH SUBSTITUTIONS</a>
|
||||||
<li><a name="TOC29" href="#SEC29">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a>
|
<li><a name="TOC29" href="#SEC29">DUPLICATE SUBPATTERN NAMES</a>
|
||||||
<li><a name="TOC30" href="#SEC30">SEE ALSO</a>
|
<li><a name="TOC30" href="#SEC30">FINDING ALL POSSIBLE MATCHES</a>
|
||||||
<li><a name="TOC31" href="#SEC31">AUTHOR</a>
|
<li><a name="TOC31" href="#SEC31">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a>
|
||||||
<li><a name="TOC32" href="#SEC32">REVISION</a>
|
<li><a name="TOC32" href="#SEC32">SEE ALSO</a>
|
||||||
|
<li><a name="TOC33" href="#SEC33">AUTHOR</a>
|
||||||
|
<li><a name="TOC34" href="#SEC34">REVISION</a>
|
||||||
</ul>
|
</ul>
|
||||||
<P>
|
<P>
|
||||||
<b>#include <pcre2.h></b>
|
<b>#include <pcre2.h></b>
|
||||||
|
@ -69,7 +71,7 @@ document for an overview of all the PCRE2 documentation.
|
||||||
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
<b>pcre2_match_data_create_from_pattern(pcre2_code *<i>code</i>,</b>
|
<b>pcre2_match_data_create_from_pattern(const pcre2_code *<i>code</i>,</b>
|
||||||
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
|
@ -222,7 +224,16 @@ document for an overview of all the PCRE2 documentation.
|
||||||
<b>int pcre2_substring_list_get(pcre2_match_data *<i>match_data</i>,</b>
|
<b>int pcre2_substring_list_get(pcre2_match_data *<i>match_data</i>,</b>
|
||||||
<b>" PCRE2_UCHAR ***<i>listptr</i>, PCRE2_SIZE **<i>lengthsptr</i>);</b>
|
<b>" PCRE2_UCHAR ***<i>listptr</i>, PCRE2_SIZE **<i>lengthsptr</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC7" href="#TOC1">PCRE2 NATIVE API JIT FUNCTIONS</a><br>
|
<br><a name="SEC7" href="#TOC1">PCRE2 NATIVE API STRING SUBSTITUTION FUNCTION</a><br>
|
||||||
|
<P>
|
||||||
|
<b>int pcre2_substitute(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
||||||
|
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
||||||
|
<b> uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
|
||||||
|
<b> pcre2_match_context *<i>mcontext</i>, PCRE2_SPTR \fIreplacementzfP,</b>
|
||||||
|
<b> PCRE2_SIZE <i>rlength</i>, PCRE2_UCHAR *<i>outputbuffer</i>,</b>
|
||||||
|
<b> PCRE2_SIZE *<i>outlengthptr</i>);</b>
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC8" href="#TOC1">PCRE2 NATIVE API JIT FUNCTIONS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre2_jit_compile(pcre2_code *<i>code</i>, uint32_t <i>options</i>);</b>
|
<b>int pcre2_jit_compile(pcre2_code *<i>code</i>, uint32_t <i>options</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
|
@ -230,7 +241,7 @@ document for an overview of all the PCRE2 documentation.
|
||||||
<b>int pcre2_jit_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
<b>int pcre2_jit_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
||||||
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
||||||
<b> uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
|
<b> uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
|
||||||
<b> pcre2_match_context *<i>mcontext</i>, pcre2_jit_stack *<i>jit_stack</i>);</b>
|
<b> pcre2_match_context *<i>mcontext</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
<b>void pcre2_jit_free_unused_memory(pcre2_general_context *<i>gcontext</i>);</b>
|
<b>void pcre2_jit_free_unused_memory(pcre2_general_context *<i>gcontext</i>);</b>
|
||||||
|
@ -240,13 +251,13 @@ document for an overview of all the PCRE2 documentation.
|
||||||
<b> PCRE2_SIZE <i>startsize</i>, PCRE2_SIZE <i>maxsize</i>);</b>
|
<b> PCRE2_SIZE <i>startsize</i>, PCRE2_SIZE <i>maxsize</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
<b>void pcre2_jit_stack_assign(const pcre2_code *<i>code</i>,</b>
|
<b>void pcre2_jit_stack_assign(pcre2_match_context *<i>mcontext</i>,</b>
|
||||||
<b> pcre2_jit_callback <i>callback_function</i>, void *<i>callback_data</i>);</b>
|
<b> pcre2_jit_callback <i>callback_function</i>, void *<i>callback_data</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
<b>void pcre2_jit_stack_free(pcre2_jit_stack *<i>jit_stack</i>);</b>
|
<b>void pcre2_jit_stack_free(pcre2_jit_stack *<i>jit_stack</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC8" href="#TOC1">PCRE2 NATIVE API AUXILIARY FUNCTIONS</a><br>
|
<br><a name="SEC9" href="#TOC1">PCRE2 NATIVE API AUXILIARY FUNCTIONS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre2_get_error_message(int <i>errorcode</i>, PCRE2_UCHAR *<i>buffer</i>,</b>
|
<b>int pcre2_get_error_message(int <i>errorcode</i>, PCRE2_UCHAR *<i>buffer</i>,</b>
|
||||||
<b> PCRE2_SIZE <i>bufflen</i>);</b>
|
<b> PCRE2_SIZE <i>bufflen</i>);</b>
|
||||||
|
@ -260,7 +271,7 @@ document for an overview of all the PCRE2 documentation.
|
||||||
<br>
|
<br>
|
||||||
<b>int pcre2_config(uint32_t <i>what</i>, void *<i>where</i>);</b>
|
<b>int pcre2_config(uint32_t <i>what</i>, void *<i>where</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC9" href="#TOC1">PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES</a><br>
|
<br><a name="SEC10" href="#TOC1">PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES</a><br>
|
||||||
<P>
|
<P>
|
||||||
There are three PCRE2 libraries, supporting 8-bit, 16-bit, and 32-bit code
|
There are three PCRE2 libraries, supporting 8-bit, 16-bit, and 32-bit code
|
||||||
units, respectively. However, there is just one header file, <b>pcre2.h</b>.
|
units, respectively. However, there is just one header file, <b>pcre2.h</b>.
|
||||||
|
@ -321,7 +332,7 @@ In the function summaries above, and in the rest of this document and other
|
||||||
PCRE2 documents, functions and data types are described using their generic
|
PCRE2 documents, functions and data types are described using their generic
|
||||||
names, without the 8, 16, or 32 suffix.
|
names, without the 8, 16, or 32 suffix.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC10" href="#TOC1">PCRE2 API OVERVIEW</a><br>
|
<br><a name="SEC11" href="#TOC1">PCRE2 API OVERVIEW</a><br>
|
||||||
<P>
|
<P>
|
||||||
PCRE2 has its own native API, which is described in this document. There are
|
PCRE2 has its own native API, which is described in this document. There are
|
||||||
also some wrapper functions for the 8-bit library that correspond to the
|
also some wrapper functions for the 8-bit library that correspond to the
|
||||||
|
@ -386,8 +397,8 @@ documentation. There is no JIT support for <b>pcre2_dfa_match()</b>.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
In addition to the main compiling and matching functions, there are convenience
|
In addition to the main compiling and matching functions, there are convenience
|
||||||
functions for extracting captured substrings from a subject string that is
|
functions for extracting captured substrings from a subject string that has
|
||||||
matched by <b>pcre2_match()</b>. They are:
|
been matched by <b>pcre2_match()</b>. They are:
|
||||||
<pre>
|
<pre>
|
||||||
<b>pcre2_substring_copy_byname()</b>
|
<b>pcre2_substring_copy_byname()</b>
|
||||||
<b>pcre2_substring_copy_bynumber()</b>
|
<b>pcre2_substring_copy_bynumber()</b>
|
||||||
|
@ -403,11 +414,16 @@ matched by <b>pcre2_match()</b>. They are:
|
||||||
provided, to free the memory used for extracted strings.
|
provided, to free the memory used for extracted strings.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
There are functions for finding out information about a compiled pattern
|
The function <b>pcre2_substitute()</b> can be called to match a pattern and
|
||||||
(<b>pcre2_pattern_info()</b>) and about the configuration with which PCRE2 was
|
return a copy of the subject string with substitutions for parts that were
|
||||||
built (<b>pcre2_config()</b>).
|
matched.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
Finally, there are functions for finding out information about a compiled
|
||||||
|
pattern (<b>pcre2_pattern_info()</b>) and about the configuration with which
|
||||||
|
PCRE2 was built (<b>pcre2_config()</b>).
|
||||||
<a name="newlines"></a></P>
|
<a name="newlines"></a></P>
|
||||||
<br><a name="SEC11" href="#TOC1">NEWLINES</a><br>
|
<br><a name="SEC12" href="#TOC1">NEWLINES</a><br>
|
||||||
<P>
|
<P>
|
||||||
PCRE2 supports five different conventions for indicating line breaks in
|
PCRE2 supports five different conventions for indicating line breaks in
|
||||||
strings: a single CR (carriage return) character, a single LF (linefeed)
|
strings: a single CR (carriage return) character, a single LF (linefeed)
|
||||||
|
@ -446,7 +462,7 @@ The choice of newline convention does not affect the interpretation of
|
||||||
the \n or \r escape sequences, nor does it affect what \R matches, which has
|
the \n or \r escape sequences, nor does it affect what \R matches, which has
|
||||||
its own separate control.
|
its own separate control.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC12" href="#TOC1">MULTITHREADING</a><br>
|
<br><a name="SEC13" href="#TOC1">MULTITHREADING</a><br>
|
||||||
<P>
|
<P>
|
||||||
In a multithreaded application it is important to keep thread-specific data
|
In a multithreaded application it is important to keep thread-specific data
|
||||||
separate from data that can be shared between threads. The PCRE2 library code
|
separate from data that can be shared between threads. The PCRE2 library code
|
||||||
|
@ -491,7 +507,7 @@ storing the results of a match. This includes details of what was matched, as
|
||||||
well as additional information such as the name of a (*MARK) setting. Each
|
well as additional information such as the name of a (*MARK) setting. Each
|
||||||
thread must provide its own version of this memory.
|
thread must provide its own version of this memory.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC13" href="#TOC1">PCRE2 CONTEXTS</a><br>
|
<br><a name="SEC14" href="#TOC1">PCRE2 CONTEXTS</a><br>
|
||||||
<P>
|
<P>
|
||||||
Some PCRE2 functions have a lot of parameters, many of which are used only by
|
Some PCRE2 functions have a lot of parameters, many of which are used only by
|
||||||
specialist applications, for example, those that use custom memory management
|
specialist applications, for example, those that use custom memory management
|
||||||
|
@ -765,7 +781,7 @@ so that they can be re-used when possible during the match. In the absence of
|
||||||
these functions, the normal custom memory management functions are used, if
|
these functions, the normal custom memory management functions are used, if
|
||||||
supplied, otherwise the system functions.
|
supplied, otherwise the system functions.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC14" href="#TOC1">CHECKING BUILD-TIME OPTIONS</a><br>
|
<br><a name="SEC15" href="#TOC1">CHECKING BUILD-TIME OPTIONS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre2_config(uint32_t <i>what</i>, void *<i>where</i>);</b>
|
<b>int pcre2_config(uint32_t <i>what</i>, void *<i>where</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
|
@ -809,8 +825,9 @@ units long. (The exact length needed can be found by calling
|
||||||
<b>pcre2_config()</b> with <b>where</b> set to NULL.) The buffer is filled with a
|
<b>pcre2_config()</b> with <b>where</b> set to NULL.) The buffer is filled with a
|
||||||
string that contains the name of the architecture for which the JIT compiler is
|
string that contains the name of the architecture for which the JIT compiler is
|
||||||
configured, for example "x86 32bit (little endian + unaligned)". If JIT support
|
configured, for example "x86 32bit (little endian + unaligned)". If JIT support
|
||||||
is not available, PCRE2_ERROR_BADOPTION is returned, otherwise the length of
|
is not available, PCRE2_ERROR_BADOPTION is returned, otherwise the number of
|
||||||
the string, in code units, is returned.
|
code units used is returned. This is the length of the string, plus one unit
|
||||||
|
for the terminating zero.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_CONFIG_LINKSIZE
|
PCRE2_CONFIG_LINKSIZE
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -878,8 +895,8 @@ units long. (The exact length needed can be found by calling
|
||||||
<b>pcre2_config()</b> with <b>where</b> set to NULL.) If PCRE2 has been compiled
|
<b>pcre2_config()</b> with <b>where</b> set to NULL.) If PCRE2 has been compiled
|
||||||
without Unicode support, the buffer is filled with the text "Unicode not
|
without Unicode support, the buffer is filled with the text "Unicode not
|
||||||
supported". Otherwise, the Unicode version string (for example, "7.0.0") is
|
supported". Otherwise, the Unicode version string (for example, "7.0.0") is
|
||||||
inserted. The string is zero-terminated. The function returns the length of the
|
inserted. The number of code units used is returned. This is the length of the
|
||||||
string in code units.
|
string plus one unit for the terminating zero.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_CONFIG_UNICODE
|
PCRE2_CONFIG_UNICODE
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -891,10 +908,11 @@ otherwise it is set to zero. Unicode support implies UTF support.
|
||||||
The <i>where</i> argument should point to a buffer that is at least 12 code
|
The <i>where</i> argument should point to a buffer that is at least 12 code
|
||||||
units long. (The exact length needed can be found by calling
|
units long. (The exact length needed can be found by calling
|
||||||
<b>pcre2_config()</b> with <b>where</b> set to NULL.) The buffer is filled with
|
<b>pcre2_config()</b> with <b>where</b> set to NULL.) The buffer is filled with
|
||||||
the PCRE2 version string, zero-terminated. The length of the string (in code
|
the PCRE2 version string, zero-terminated. The number of code units used is
|
||||||
units) is returned.
|
returned. This is the length of the string plus one unit for the terminating
|
||||||
|
zero.
|
||||||
<a name="compiling"></a></P>
|
<a name="compiling"></a></P>
|
||||||
<br><a name="SEC15" href="#TOC1">COMPILING A PATTERN</a><br>
|
<br><a name="SEC16" href="#TOC1">COMPILING A PATTERN</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre2_code *pcre2_compile(PCRE2_SPTR <i>pattern</i>, PCRE2_SIZE <i>length</i>,</b>
|
<b>pcre2_code *pcre2_compile(PCRE2_SPTR <i>pattern</i>, PCRE2_SIZE <i>length</i>,</b>
|
||||||
<b> uint32_t <i>options</i>, int *<i>errorcode</i>, PCRE2_SIZE *<i>erroroffset,</i></b>
|
<b> uint32_t <i>options</i>, int *<i>errorcode</i>, PCRE2_SIZE *<i>erroroffset,</i></b>
|
||||||
|
@ -1248,7 +1266,7 @@ of how this option changes the behaviour of PCRE2 are given in the
|
||||||
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
|
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
|
||||||
page.
|
page.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC16" href="#TOC1">COMPILATION ERROR CODES</a><br>
|
<br><a name="SEC17" href="#TOC1">COMPILATION ERROR CODES</a><br>
|
||||||
<P>
|
<P>
|
||||||
There are over 80 positive error codes that <b>pcre2_compile()</b> may return if
|
There are over 80 positive error codes that <b>pcre2_compile()</b> may return if
|
||||||
it finds an error in the pattern. There are also some negative error codes that
|
it finds an error in the pattern. There are also some negative error codes that
|
||||||
|
@ -1258,7 +1276,7 @@ are used for invalid UTF strings. These are the same as given by
|
||||||
page. The <b>pcre2_get_error_message()</b> function can be called to obtain a
|
page. The <b>pcre2_get_error_message()</b> function can be called to obtain a
|
||||||
textual error message from any error code.
|
textual error message from any error code.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC17" href="#TOC1">JUST-IN-TIME (JIT) COMPILATION</a><br>
|
<br><a name="SEC18" href="#TOC1">JUST-IN-TIME (JIT) COMPILATION</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre2_jit_compile(pcre2_code *<i>code</i>, uint32_t <i>options</i>);</b>
|
<b>int pcre2_jit_compile(pcre2_code *<i>code</i>, uint32_t <i>options</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
|
@ -1266,7 +1284,7 @@ textual error message from any error code.
|
||||||
<b>int pcre2_jit_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
<b>int pcre2_jit_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
||||||
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
||||||
<b> uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
|
<b> uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
|
||||||
<b> pcre2_match_context *<i>mcontext</i>, pcre2_jit_stack *<i>jit_stack</i>);</b>
|
<b> pcre2_match_context *<i>mcontext</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
<b>void pcre2_jit_free_unused_memory(pcre2_general_context *<i>gcontext</i>);</b>
|
<b>void pcre2_jit_free_unused_memory(pcre2_general_context *<i>gcontext</i>);</b>
|
||||||
|
@ -1276,7 +1294,7 @@ textual error message from any error code.
|
||||||
<b> PCRE2_SIZE <i>startsize</i>, PCRE2_SIZE <i>maxsize</i>);</b>
|
<b> PCRE2_SIZE <i>startsize</i>, PCRE2_SIZE <i>maxsize</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
<b>void pcre2_jit_stack_assign(const pcre2_code *<i>code</i>,</b>
|
<b>void pcre2_jit_stack_assign(pcre2_match_context *<i>mcontext</i>,</b>
|
||||||
<b> pcre2_jit_callback <i>callback_function</i>, void *<i>callback_data</i>);</b>
|
<b> pcre2_jit_callback <i>callback_function</i>, void *<i>callback_data</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
|
@ -1296,7 +1314,7 @@ patterns to be analyzed, and for one-off matches and simple patterns the
|
||||||
benefit of faster execution might be offset by a much slower compilation time.
|
benefit of faster execution might be offset by a much slower compilation time.
|
||||||
Most, but not all patterns can be optimized by the JIT compiler.
|
Most, but not all patterns can be optimized by the JIT compiler.
|
||||||
<a name="localesupport"></a></P>
|
<a name="localesupport"></a></P>
|
||||||
<br><a name="SEC18" href="#TOC1">LOCALE SUPPORT</a><br>
|
<br><a name="SEC19" href="#TOC1">LOCALE SUPPORT</a><br>
|
||||||
<P>
|
<P>
|
||||||
PCRE2 handles caseless matching, and determines whether characters are letters,
|
PCRE2 handles caseless matching, and determines whether characters are letters,
|
||||||
digits, or whatever, by reference to a set of tables, indexed by character code
|
digits, or whatever, by reference to a set of tables, indexed by character code
|
||||||
|
@ -1353,7 +1371,7 @@ is saved with the compiled pattern, and the same tables are used by
|
||||||
compilation, and matching all happen in the same locale, but different patterns
|
compilation, and matching all happen in the same locale, but different patterns
|
||||||
can be processed in different locales.
|
can be processed in different locales.
|
||||||
<a name="infoaboutpattern"></a></P>
|
<a name="infoaboutpattern"></a></P>
|
||||||
<br><a name="SEC19" href="#TOC1">INFORMATION ABOUT A COMPILED PATTERN</a><br>
|
<br><a name="SEC20" href="#TOC1">INFORMATION ABOUT A COMPILED PATTERN</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre2_pattern_info(const pcre2 *<i>code</i>, uint32_t <i>what</i>, void *<i>where</i>);</b>
|
<b>int pcre2_pattern_info(const pcre2 *<i>code</i>, uint32_t <i>what</i>, void *<i>where</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
|
@ -1640,13 +1658,13 @@ getting memory in which to place the compiled data is the value returned by
|
||||||
this option plus the size of the <b>pcre2_code</b> structure. Processing a
|
this option plus the size of the <b>pcre2_code</b> structure. Processing a
|
||||||
pattern with the JIT compiler does not alter the value returned by this option.
|
pattern with the JIT compiler does not alter the value returned by this option.
|
||||||
<a name="matchdatablock"></a></P>
|
<a name="matchdatablock"></a></P>
|
||||||
<br><a name="SEC20" href="#TOC1">THE MATCH DATA BLOCK</a><br>
|
<br><a name="SEC21" href="#TOC1">THE MATCH DATA BLOCK</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre2_match_data_create(uint32_t <i>ovecsize</i>,</b>
|
<b>pcre2_match_data_create(uint32_t <i>ovecsize</i>,</b>
|
||||||
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
<b>pcre2_match_data_create_from_pattern(pcre2_code *<i>code</i>,</b>
|
<b>pcre2_match_data_create_from_pattern(const pcre2_code *<i>code</i>,</b>
|
||||||
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
|
@ -1690,7 +1708,7 @@ and
|
||||||
<a href="#matchotherdata">other match data</a>
|
<a href="#matchotherdata">other match data</a>
|
||||||
below.
|
below.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC21" href="#TOC1">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a><br>
|
<br><a name="SEC22" href="#TOC1">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre2_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
<b>int pcre2_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
||||||
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
||||||
|
@ -1902,7 +1920,7 @@ examples, in the
|
||||||
<a href="pcre2partial.html"><b>pcre2partial</b></a>
|
<a href="pcre2partial.html"><b>pcre2partial</b></a>
|
||||||
documentation.
|
documentation.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC22" href="#TOC1">NEWLINE HANDLING WHEN MATCHING</a><br>
|
<br><a name="SEC23" href="#TOC1">NEWLINE HANDLING WHEN MATCHING</a><br>
|
||||||
<P>
|
<P>
|
||||||
When PCRE2 is built, a default newline convention is set; this is usually the
|
When PCRE2 is built, a default newline convention is set; this is usually the
|
||||||
standard convention for the operating system. The default can be overridden in
|
standard convention for the operating system. The default can be overridden in
|
||||||
|
@ -1940,7 +1958,7 @@ the characters that it matches).
|
||||||
Notwithstanding the above, anomalous effects may still occur when CRLF is a
|
Notwithstanding the above, anomalous effects may still occur when CRLF is a
|
||||||
valid newline sequence and explicit \r or \n escapes appear in the pattern.
|
valid newline sequence and explicit \r or \n escapes appear in the pattern.
|
||||||
<a name="matchedstrings"></a></P>
|
<a name="matchedstrings"></a></P>
|
||||||
<br><a name="SEC23" href="#TOC1">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a><br>
|
<br><a name="SEC24" href="#TOC1">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>uint32_t pcre2_get_ovector_count(pcre2_match_data *<i>match_data</i>);</b>
|
<b>uint32_t pcre2_get_ovector_count(pcre2_match_data *<i>match_data</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
|
@ -2167,7 +2185,7 @@ time.
|
||||||
</pre>
|
</pre>
|
||||||
The internal recursion limit was reached.
|
The internal recursion limit was reached.
|
||||||
<a name="extractbynumber"></a></P>
|
<a name="extractbynumber"></a></P>
|
||||||
<br><a name="SEC24" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a><br>
|
<br><a name="SEC25" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre2_substring_length_bynumber(pcre2_match_data *<i>match_data</i>,</b>
|
<b>int pcre2_substring_length_bynumber(pcre2_match_data *<i>match_data</i>,</b>
|
||||||
<b> unsigned int <i>number</i>, PCRE2_SIZE *<i>length</i>);</b>
|
<b> unsigned int <i>number</i>, PCRE2_SIZE *<i>length</i>);</b>
|
||||||
|
@ -2239,7 +2257,7 @@ no capturing group of that number in the pattern, or because the group with
|
||||||
that number did not participate in the match, or because the ovector was too
|
that number did not participate in the match, or because the ovector was too
|
||||||
small to capture that group.
|
small to capture that group.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC25" href="#TOC1">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a><br>
|
<br><a name="SEC26" href="#TOC1">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre2_substring_list_get(pcre2_match_data *<i>match_data</i>,</b>
|
<b>int pcre2_substring_list_get(pcre2_match_data *<i>match_data</i>,</b>
|
||||||
<b>" PCRE2_UCHAR ***<i>listptr</i>, PCRE2_SIZE **<i>lengthsptr</i>);</b>
|
<b>" PCRE2_UCHAR ***<i>listptr</i>, PCRE2_SIZE **<i>lengthsptr</i>);</b>
|
||||||
|
@ -2273,7 +2291,7 @@ can be distinguished from a genuine zero-length substring by inspecting the
|
||||||
appropriate offset in the ovector, which contains PCRE2_UNSET for unset
|
appropriate offset in the ovector, which contains PCRE2_UNSET for unset
|
||||||
substrings.
|
substrings.
|
||||||
<a name="extractbynname"></a></P>
|
<a name="extractbynname"></a></P>
|
||||||
<br><a name="SEC26" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a><br>
|
<br><a name="SEC27" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre2_substring_number_from_name(const pcre2_code *<i>code</i>,</b>
|
<b>int pcre2_substring_number_from_name(const pcre2_code *<i>code</i>,</b>
|
||||||
<b> PCRE2_SPTR <i>name</i>);</b>
|
<b> PCRE2_SPTR <i>name</i>);</b>
|
||||||
|
@ -2326,7 +2344,67 @@ names are not included in the compiled code. The matching process uses only
|
||||||
numbers. For this reason, the use of different names for subpatterns of the
|
numbers. For this reason, the use of different names for subpatterns of the
|
||||||
same number causes an error at compile time.
|
same number causes an error at compile time.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC27" href="#TOC1">DUPLICATE SUBPATTERN NAMES</a><br>
|
<br><a name="SEC28" href="#TOC1">CREATING A NEW STRING WITH SUBSTITUTIONS</a><br>
|
||||||
|
<P>
|
||||||
|
<b>int pcre2_substitute(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
||||||
|
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
||||||
|
<b> uint32_t <i>options</i>, pcre2_match_data *<i>match_data</i>,</b>
|
||||||
|
<b> pcre2_match_context *<i>mcontext</i>, PCRE2_SPTR \fIreplacementzfP,</b>
|
||||||
|
<b> PCRE2_SIZE <i>rlength</i>, PCRE2_UCHAR *\fIoutputbuffer\zfP,</b>
|
||||||
|
<b> PCRE2_SIZE *<i>outlengthptr</i>);</b>
|
||||||
|
This function calls <b>pcre2_match()</b> and then makes a copy of the subject
|
||||||
|
string in <i>outputbuffer</i>, replacing the part that was matched with the
|
||||||
|
<i>replacement</i> string, whose length is supplied in <b>rlength</b>. This can
|
||||||
|
be given as PCRE2_ZERO_TERMINATED for a zero-terminated string.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
In the replacement string, which is interpreted as a UTF string in UTF mode, a
|
||||||
|
dollar character is an escape character that can specify the insertion of
|
||||||
|
characters from capturing groups in the pattern. The following forms are
|
||||||
|
recognized:
|
||||||
|
<pre>
|
||||||
|
$$ insert a dollar character
|
||||||
|
$<n> insert the contents of group <n>
|
||||||
|
${<n>} insert the contents of group <n>
|
||||||
|
</pre>
|
||||||
|
Either a group number or a group name can be given for <n>. Curly brackets are
|
||||||
|
required only if the following character would be interpreted as part of the
|
||||||
|
number or name. The number may be zero to include the entire matched string.
|
||||||
|
For example, if the pattern a(b)c is matched with "[abc]" and the replacement
|
||||||
|
string "+$1$0$1+", the result is "[+babcb+]". Group insertion is done by
|
||||||
|
calling <b>pcre2_copy_byname()</b> or <b>pcre2_copy_bynumber()</b> as
|
||||||
|
appropriate.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The first seven arguments of <b>pcre2_substitute()</b> are the same as for
|
||||||
|
<b>pcre2_match()</b>, except that the partial matching options are not
|
||||||
|
permitted, and <i>match_data</i> may be passed as NULL, in which case a match
|
||||||
|
data block is obtained and freed within this function, using memory management
|
||||||
|
functions from the match context, if provided, or else those that were used to
|
||||||
|
allocate memory for the compiled code.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
There is one additional option, PCRE2_SUBSTITUTE_GLOBAL, which causes the
|
||||||
|
function to iterate over the subject string, replacing every matching
|
||||||
|
substring. If this is not set, only the first matching substring is replaced.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The <i>outlengthptr</i> argument must point to a variable that contains the
|
||||||
|
length, in code units, of the output buffer. It is updated to contain the
|
||||||
|
length of the new string, excluding the trailing zero that is automatically
|
||||||
|
added.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The function returns the number of replacements that were made. This may be
|
||||||
|
zero if no matches were found, and is never greater than 1 unless
|
||||||
|
PCRE2_SUBSTITUTE_GLOBAL is set. In the event of an error, a negative error code
|
||||||
|
is returned. Except for PCRE2_ERROR_NOMATCH (which is never returned), any
|
||||||
|
errors from <b>pcre2_match()</b> or the substring copying functions are passed
|
||||||
|
straight back. PCRE2_ERROR_BADREPLACEMENT is returned for an invalid
|
||||||
|
replacement string (unrecognized sequence following a dollar sign), and
|
||||||
|
PCRE2_ERROR_NOMEMORY is returned if the output buffer is not big enough.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC29" href="#TOC1">DUPLICATE SUBPATTERN NAMES</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre2_substring_nametable_scan(const pcre2_code *<i>code</i>,</b>
|
<b>int pcre2_substring_nametable_scan(const pcre2_code *<i>code</i>,</b>
|
||||||
<b> PCRE2_SPTR <i>name</i>, PCRE2_SPTR *<i>first</i>, PCRE2_SPTR *<i>last</i>);</b>
|
<b> PCRE2_SPTR <i>name</i>, PCRE2_SPTR *<i>first</i>, PCRE2_SPTR *<i>last</i>);</b>
|
||||||
|
@ -2369,7 +2447,7 @@ The format of the name table is described above in the section entitled
|
||||||
Given all the relevant entries for the name, you can extract each of their
|
Given all the relevant entries for the name, you can extract each of their
|
||||||
numbers, and hence the captured data.
|
numbers, and hence the captured data.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC28" href="#TOC1">FINDING ALL POSSIBLE MATCHES</a><br>
|
<br><a name="SEC30" href="#TOC1">FINDING ALL POSSIBLE MATCHES</a><br>
|
||||||
<P>
|
<P>
|
||||||
The traditional matching function uses a similar algorithm to Perl, which stops
|
The traditional matching function uses a similar algorithm to Perl, which stops
|
||||||
when it finds the first match, starting at a given point in the subject. If you
|
when it finds the first match, starting at a given point in the subject. If you
|
||||||
|
@ -2387,7 +2465,7 @@ substring. Then return 1, which forces <b>pcre2_match()</b> to backtrack and try
|
||||||
other alternatives. Ultimately, when it runs out of matches,
|
other alternatives. Ultimately, when it runs out of matches,
|
||||||
<b>pcre2_match()</b> will yield PCRE2_ERROR_NOMATCH.
|
<b>pcre2_match()</b> will yield PCRE2_ERROR_NOMATCH.
|
||||||
<a name="dfamatch"></a></P>
|
<a name="dfamatch"></a></P>
|
||||||
<br><a name="SEC29" href="#TOC1">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a><br>
|
<br><a name="SEC31" href="#TOC1">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre2_dfa_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
<b>int pcre2_dfa_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
||||||
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
||||||
|
@ -2562,13 +2640,13 @@ some plausibility checks are made on the contents of the workspace, which
|
||||||
should contain data about the previous partial match. If any of these checks
|
should contain data about the previous partial match. If any of these checks
|
||||||
fail, this error is given.
|
fail, this error is given.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC30" href="#TOC1">SEE ALSO</a><br>
|
<br><a name="SEC32" href="#TOC1">SEE ALSO</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre2build</b>(3), <b>pcre2libs</b>(3), <b>pcre2callout</b>(3),
|
<b>pcre2build</b>(3), <b>pcre2libs</b>(3), <b>pcre2callout</b>(3),
|
||||||
<b>pcre2matching</b>(3), <b>pcre2partial</b>(3), <b>pcre2posix</b>(3),
|
<b>pcre2matching</b>(3), <b>pcre2partial</b>(3), <b>pcre2posix</b>(3),
|
||||||
<b>pcre2demo(3)</b>, <b>pcre2sample</b>(3), <b>pcre2stack</b>(3).
|
<b>pcre2demo(3)</b>, <b>pcre2sample</b>(3), <b>pcre2stack</b>(3).
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC31" href="#TOC1">AUTHOR</a><br>
|
<br><a name="SEC33" href="#TOC1">AUTHOR</a><br>
|
||||||
<P>
|
<P>
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
<br>
|
<br>
|
||||||
|
@ -2577,9 +2655,9 @@ University Computing Service
|
||||||
Cambridge CB2 3QH, England.
|
Cambridge CB2 3QH, England.
|
||||||
<br>
|
<br>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC32" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC34" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 03 November 2014
|
Last updated: 11 November 2014
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2014 University of Cambridge.
|
Copyright © 1997-2014 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -178,19 +178,20 @@ pattern.
|
||||||
The <b>pcre2_jit_stack_assign()</b> function specifies which stack JIT code
|
The <b>pcre2_jit_stack_assign()</b> function specifies which stack JIT code
|
||||||
should use. Its arguments are as follows:
|
should use. Its arguments are as follows:
|
||||||
<pre>
|
<pre>
|
||||||
pcre2_code *code
|
pcre2_match_context *mcontext
|
||||||
pcre2_jit_callback callback
|
pcre2_jit_callback callback
|
||||||
void *data
|
void *data
|
||||||
</pre>
|
</pre>
|
||||||
The <i>code</i> argument is a pointer to a compiled pattern, after it has been
|
The first argument is a pointer to a match context. When this is subsequently
|
||||||
processed by <b>pcre2_jit_compile()</b>. There are three cases for the values of
|
passed to a matching function, its information determines which JIT stack is
|
||||||
the other two options:
|
used. There are three cases for the values of the other two options:
|
||||||
<pre>
|
<pre>
|
||||||
(1) If <i>callback</i> is NULL and <i>data</i> is NULL, an internal 32K block
|
(1) If <i>callback</i> is NULL and <i>data</i> is NULL, an internal 32K block
|
||||||
on the machine stack is used.
|
on the machine stack is used.
|
||||||
|
|
||||||
(2) If <i>callback</i> is NULL and <i>data</i> is not NULL, <i>data</i> must be
|
(2) If <i>callback</i> is NULL and <i>data</i> is not NULL, <i>data</i> must be
|
||||||
a valid JIT stack, the result of calling <b>pcre2_jit_stack_create()</b>.
|
a pointer to a valid JIT stack, the result of calling
|
||||||
|
<b>pcre2_jit_stack_create()</b>.
|
||||||
|
|
||||||
(3) If <i>callback</i> is not NULL, it must point to a function that is
|
(3) If <i>callback</i> is not NULL, it must point to a function that is
|
||||||
called with <i>data</i> as an argument at the start of matching, in
|
called with <i>data</i> as an argument at the start of matching, in
|
||||||
|
@ -215,11 +216,11 @@ each thread so that the application is thread-safe.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Strictly speaking, even more is allowed. You can assign the same non-NULL stack
|
Strictly speaking, even more is allowed. You can assign the same non-NULL stack
|
||||||
to any number of patterns as long as they are not used for matching by multiple
|
to a match context that is used by any number of patterns, as long as they are
|
||||||
threads at the same time. For example, you can assign the same stack to all
|
not used for matching by multiple threads at the same time. For example, you
|
||||||
compiled patterns, and use a global mutex in the callback to wait until the
|
could use the same stack in all compiled patterns, with a global mutex in the
|
||||||
stack is available for use. However, this is an inefficient solution, and not
|
callback to wait until the stack is available for use. However, this is an
|
||||||
recommended.
|
inefficient solution, and not recommended.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
This is a suggestion for how a multithreaded program that needs to set up
|
This is a suggestion for how a multithreaded program that needs to set up
|
||||||
|
@ -234,10 +235,7 @@ non-default JIT stacks might operate:
|
||||||
Use a one-line callback function
|
Use a one-line callback function
|
||||||
return thread_local_var
|
return thread_local_var
|
||||||
</pre>
|
</pre>
|
||||||
All the functions described in this section do nothing if JIT is not available,
|
All the functions described in this section do nothing if JIT is not available.
|
||||||
and <b>pcre2_jit_stack_assign()</b> does nothing unless the <b>code</b> argument
|
|
||||||
is non-NULL and points to a <b>pcre2_code</b> block that has been successfully
|
|
||||||
processed by <b>pcre2_jit_compile()</b>.
|
|
||||||
<a name="stackfaq"></a></P>
|
<a name="stackfaq"></a></P>
|
||||||
<br><a name="SEC7" href="#TOC1">JIT STACK FAQ</a><br>
|
<br><a name="SEC7" href="#TOC1">JIT STACK FAQ</a><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -267,26 +265,26 @@ grow up to 1M anytime if needed.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
The owner of the stack is the user program, not the JIT studied pattern or
|
The owner of the stack is the user program, not the JIT studied pattern or
|
||||||
anything else. The user program must ensure that if a stack is used by
|
anything else. The user program must ensure that if a stack is being used by
|
||||||
<b>pcre2_match()</b>, (that is, it is assigned to the pattern currently
|
<b>pcre2_match()</b>, (that is, it is assigned to a match context that is passed
|
||||||
running), that stack must not be used by any other threads (to avoid
|
to the pattern currently running), that stack must not be used by any other
|
||||||
overwriting the same memory area). The best practice for multithreaded programs
|
threads (to avoid overwriting the same memory area). The best practice for
|
||||||
is to allocate a stack for each thread, and return this stack through the JIT
|
multithreaded programs is to allocate a stack for each thread, and return this
|
||||||
callback function.
|
stack through the JIT callback function.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
(4) When should a JIT stack be freed?
|
(4) When should a JIT stack be freed?
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
You can free a JIT stack at any time, as long as it will not be used by
|
You can free a JIT stack at any time, as long as it will not be used by
|
||||||
<b>pcre2_match()</b> again. When you assign the stack to a pattern, only a
|
<b>pcre2_match()</b> again. When you assign the stack to a match context, only a
|
||||||
pointer is set. There is no reference counting or any other magic. You can free
|
pointer is set. There is no reference counting or any other magic. You can free
|
||||||
the patterns and stacks in any order, anytime. Just <i>do not</i> call
|
compiled patterns, contexts, and stacks in any order, anytime. Just \fIdo
|
||||||
<b>pcre2_match()</b> with a pattern pointing to an already freed stack, as that
|
not\fP call <b>pcre2_match()</b> with a match context pointing to an already
|
||||||
will cause SEGFAULT. (Also, do not free a stack currently used by
|
freed stack, as that will cause SEGFAULT. (Also, do not free a stack currently
|
||||||
<b>pcre2_match()</b> in another thread). You can also replace the stack for a
|
used by <b>pcre2_match()</b> in another thread). You can also replace the stack
|
||||||
pattern at any time. You can even free the previous stack before assigning a
|
in a context at any time when it is not in use. You can also free the previous
|
||||||
replacement.
|
stack before assigning a replacement.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
(5) Should I allocate/free a stack every time before/after calling
|
(5) Should I allocate/free a stack every time before/after calling
|
||||||
|
@ -296,7 +294,7 @@ replacement.
|
||||||
No, because this is too costly in terms of resources. However, you could
|
No, because this is too costly in terms of resources. However, you could
|
||||||
implement some clever idea which release the stack if it is not used in let's
|
implement some clever idea which release the stack if it is not used in let's
|
||||||
say two minutes. The JIT callback can help to achieve this without keeping a
|
say two minutes. The JIT callback can help to achieve this without keeping a
|
||||||
list of the currently JIT studied patterns.
|
list of patterns.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
(6) OK, the stack is for long term memory allocation. But what happens if a
|
(6) OK, the stack is for long term memory allocation. But what happens if a
|
||||||
|
@ -333,25 +331,28 @@ memory management, or NULL for standard memory management.
|
||||||
<br><a name="SEC9" href="#TOC1">EXAMPLE CODE</a><br>
|
<br><a name="SEC9" href="#TOC1">EXAMPLE CODE</a><br>
|
||||||
<P>
|
<P>
|
||||||
This is a single-threaded example that specifies a JIT stack without using a
|
This is a single-threaded example that specifies a JIT stack without using a
|
||||||
callback.
|
callback. A real program should include error checking after all the function
|
||||||
|
calls.
|
||||||
<pre>
|
<pre>
|
||||||
int rc;
|
int rc;
|
||||||
pcre2_code *re;
|
pcre2_code *re;
|
||||||
pcre2_match_data *match_data;
|
pcre2_match_data *match_data;
|
||||||
|
pcre2_match_context *mcontext;
|
||||||
pcre2_jit_stack *jit_stack;
|
pcre2_jit_stack *jit_stack;
|
||||||
|
|
||||||
re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0,
|
re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0,
|
||||||
&errornumber, &erroffset, NULL);
|
&errornumber, &erroffset, NULL);
|
||||||
/* Check for errors */
|
|
||||||
rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
|
rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
|
||||||
/* Check for errors */
|
mcontext = pcre2_match_context_create(NULL);
|
||||||
jit_stack = pcre2_jit_stack_create(NULL, 32*1024, 512*1024);
|
jit_stack = pcre2_jit_stack_create(NULL, 32*1024, 512*1024);
|
||||||
/* Check for error (NULL) */
|
pcre2_jit_stack_assign(mcontext, NULL, jit_stack);
|
||||||
pcre2_jit_stack_assign(re, NULL, jit_stack);
|
|
||||||
match_data = pcre2_match_data_create(re, 10);
|
match_data = pcre2_match_data_create(re, 10);
|
||||||
rc = pcre2_match(re, subject, length, 0, 0, match_data, NULL);
|
rc = pcre2_match(re, subject, length, 0, 0, match_data, mcontext);
|
||||||
/* Check results */
|
/* Process result */
|
||||||
pcre2_free(re);
|
|
||||||
|
pcre2_code_free(re);
|
||||||
|
pcre2_match_data_free(match_data);
|
||||||
|
pcre2_match_context_free(mcontext);
|
||||||
pcre2_jit_stack_free(jit_stack);
|
pcre2_jit_stack_free(jit_stack);
|
||||||
|
|
||||||
</PRE>
|
</PRE>
|
||||||
|
@ -369,13 +370,10 @@ processed by <b>pcre2_jit_compile()</b>).
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The fast path function is called <b>pcre2_jit_match()</b>, and it takes exactly
|
The fast path function is called <b>pcre2_jit_match()</b>, and it takes exactly
|
||||||
the same arguments as <b>pcre2_match()</b>, plus one additional argument that
|
the same arguments as <b>pcre2_match()</b>. The return values are also the same,
|
||||||
must either point to a JIT stack or be NULL. In the latter case, if a callback
|
plus PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial or complete) is
|
||||||
function has been set up by <b>pcre2_jit_stack_assign()</b>, it is called.
|
requested that was not compiled. Unsupported option bits (for example,
|
||||||
Otherwise the system stack is used. The return values are the same as for
|
PCRE2_ANCHORED) are ignored.
|
||||||
<b>pcre2_match()</b>, plus PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial
|
|
||||||
or complete) is requested that was not compiled. Unsupported option bits (for
|
|
||||||
example, PCRE2_ANCHORED) are ignored.
|
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
When you call <b>pcre2_match()</b>, as well as testing for invalid options, a
|
When you call <b>pcre2_match()</b>, as well as testing for invalid options, a
|
||||||
|
@ -404,7 +402,7 @@ Cambridge CB2 3QH, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC13" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC13" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 03 November 2014
|
Last updated: 08 November 2014
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2014 University of Cambridge.
|
Copyright © 1997-2014 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -488,7 +488,6 @@ about the pattern:
|
||||||
posix use the POSIX API
|
posix use the POSIX API
|
||||||
stackguard=<number> test the stackguard feature
|
stackguard=<number> test the stackguard feature
|
||||||
tables=[0|1|2] select internal tables
|
tables=[0|1|2] select internal tables
|
||||||
use_length use the pattern's length
|
|
||||||
</pre>
|
</pre>
|
||||||
The effects of these modifiers are described in the following sections.
|
The effects of these modifiers are described in the following sections.
|
||||||
FIXME: Give more examples.
|
FIXME: Give more examples.
|
||||||
|
@ -542,15 +541,10 @@ pairs. For example:
|
||||||
/ab 32 59/hex
|
/ab 32 59/hex
|
||||||
</pre>
|
</pre>
|
||||||
This feature is provided as a way of creating patterns that contain binary zero
|
This feature is provided as a way of creating patterns that contain binary zero
|
||||||
characters. When <b>hex</b> is set, it implies <b>use_length</b>.
|
characters. By default, <b>pcre2test</b> passes patterns as zero-terminated
|
||||||
</P>
|
strings to <b>pcre2_compile()</b>, giving the length as PCRE2_ZERO_TERMINATED.
|
||||||
<br><b>
|
However, for patterns specified in hexadecimal, the length of the pattern is
|
||||||
Using the pattern's length
|
passed.
|
||||||
</b><br>
|
|
||||||
<P>
|
|
||||||
By default, <b>pcre2test</b> passes patterns as zero-terminated strings to
|
|
||||||
<b>pcre2_compile()</b>, giving the length as -1. If <b>use_length</b> is set, the
|
|
||||||
length of the pattern is passed. This is implied if <b>hex</b> is set.
|
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
JIT compilation
|
JIT compilation
|
||||||
|
@ -766,6 +760,7 @@ pattern.
|
||||||
ovector=<n> set size of output vector
|
ovector=<n> set size of output vector
|
||||||
recursion_limit=<n> set a recursion limit
|
recursion_limit=<n> set a recursion limit
|
||||||
startchar show startchar when relevant
|
startchar show startchar when relevant
|
||||||
|
zero_terminate pass the subject as zero-terminated
|
||||||
</pre>
|
</pre>
|
||||||
The effects of these modifiers are described in the following sections.
|
The effects of these modifiers are described in the following sections.
|
||||||
FIXME: Give more examples.
|
FIXME: Give more examples.
|
||||||
|
@ -979,6 +974,20 @@ match block of exactly the right size for the pattern. (It is not possible to
|
||||||
create a match block with a zero-length ovector; there is always one pair of
|
create a match block with a zero-length ovector; there is always one pair of
|
||||||
offsets.)
|
offsets.)
|
||||||
</P>
|
</P>
|
||||||
|
<br><b>
|
||||||
|
Passing the subject as zero-terminated
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
By default, the subject string is passed to a native API matching function with
|
||||||
|
its correct length. In order to test the facility for passing a zero-terminated
|
||||||
|
string, the <b>zero_terminate</b> modifier is provided. It causes the length to
|
||||||
|
be passed as PCRE2_ZERO_TERMINATED. (When matching via the POSIX interface,
|
||||||
|
this modifier has no effect, as there is no facility for passing a length.)
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
When testing <b>pcre2_substitute</b>, this modifier also has the effect of
|
||||||
|
passing the replacement string as zero-terminated.
|
||||||
|
</P>
|
||||||
<br><a name="SEC12" href="#TOC1">THE ALTERNATIVE MATCHING FUNCTION</a><br>
|
<br><a name="SEC12" href="#TOC1">THE ALTERNATIVE MATCHING FUNCTION</a><br>
|
||||||
<P>
|
<P>
|
||||||
By default, <b>pcre2test</b> uses the standard PCRE2 matching function,
|
By default, <b>pcre2test</b> uses the standard PCRE2 matching function,
|
||||||
|
@ -1224,7 +1233,7 @@ Cambridge CB2 3QH, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC20" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC20" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 02 November 2014
|
Last updated: 09 November 2014
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2014 University of Cambridge.
|
Copyright © 1997-2014 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -204,6 +204,10 @@ in the library.
|
||||||
<tr><td><a href="pcre2_set_recursion_memory_management.html">pcre2_set_recursion_memory_management</a></td>
|
<tr><td><a href="pcre2_set_recursion_memory_management.html">pcre2_set_recursion_memory_management</a></td>
|
||||||
<td> Set match recursion memory management</td></tr>
|
<td> Set match recursion memory management</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre2_substitute.html">pcre2_substitute</a></td>
|
||||||
|
<td> Match a compiled pattern to a subject string and do
|
||||||
|
substitutions</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre2_substring_copy_byname.html">pcre2_substring_copy_byname</a></td>
|
<tr><td><a href="pcre2_substring_copy_byname.html">pcre2_substring_copy_byname</a></td>
|
||||||
<td> Extract named substring into given buffer</td></tr>
|
<td> Extract named substring into given buffer</td></tr>
|
||||||
|
|
||||||
|
|
459
doc/pcre2.txt
459
doc/pcre2.txt
|
@ -186,7 +186,7 @@ PCRE2 NATIVE API BASIC FUNCTIONS
|
||||||
pcre2_match_data_create(uint32_t ovecsize,
|
pcre2_match_data_create(uint32_t ovecsize,
|
||||||
pcre2_general_context *gcontext);
|
pcre2_general_context *gcontext);
|
||||||
|
|
||||||
pcre2_match_data_create_from_pattern(pcre2_code *code,
|
pcre2_match_data_create_from_pattern(const pcre2_code *code,
|
||||||
pcre2_general_context *gcontext);
|
pcre2_general_context *gcontext);
|
||||||
|
|
||||||
int pcre2_match(const pcre2_code *code, PCRE2_SPTR subject,
|
int pcre2_match(const pcre2_code *code, PCRE2_SPTR subject,
|
||||||
|
@ -314,6 +314,16 @@ PCRE2 NATIVE API STRING EXTRACTION FUNCTIONS
|
||||||
PCRE2_UCHAR ***listptr, PCRE2_SIZE **lengthsptr);
|
PCRE2_UCHAR ***listptr, PCRE2_SIZE **lengthsptr);
|
||||||
|
|
||||||
|
|
||||||
|
PCRE2 NATIVE API STRING SUBSTITUTION FUNCTION
|
||||||
|
|
||||||
|
int pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject,
|
||||||
|
PCRE2_SIZE length, PCRE2_SIZE startoffset,
|
||||||
|
uint32_t options, pcre2_match_data *match_data,
|
||||||
|
pcre2_match_context *mcontext, PCRE2_SPTR replacementzfP,
|
||||||
|
PCRE2_SIZE rlength, PCRE2_UCHAR *outputbuffer,
|
||||||
|
PCRE2_SIZE *outlengthptr);
|
||||||
|
|
||||||
|
|
||||||
PCRE2 NATIVE API JIT FUNCTIONS
|
PCRE2 NATIVE API JIT FUNCTIONS
|
||||||
|
|
||||||
int pcre2_jit_compile(pcre2_code *code, uint32_t options);
|
int pcre2_jit_compile(pcre2_code *code, uint32_t options);
|
||||||
|
@ -321,14 +331,14 @@ PCRE2 NATIVE API JIT FUNCTIONS
|
||||||
int pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject,
|
int pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject,
|
||||||
PCRE2_SIZE length, PCRE2_SIZE startoffset,
|
PCRE2_SIZE length, PCRE2_SIZE startoffset,
|
||||||
uint32_t options, pcre2_match_data *match_data,
|
uint32_t options, pcre2_match_data *match_data,
|
||||||
pcre2_match_context *mcontext, pcre2_jit_stack *jit_stack);
|
pcre2_match_context *mcontext);
|
||||||
|
|
||||||
void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext);
|
void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext);
|
||||||
|
|
||||||
pcre2_jit_stack *pcre2_jit_stack_create(pcre2_general_context *gcontext,
|
pcre2_jit_stack *pcre2_jit_stack_create(pcre2_general_context *gcontext,
|
||||||
PCRE2_SIZE startsize, PCRE2_SIZE maxsize);
|
PCRE2_SIZE startsize, PCRE2_SIZE maxsize);
|
||||||
|
|
||||||
void pcre2_jit_stack_assign(const pcre2_code *code,
|
void pcre2_jit_stack_assign(pcre2_match_context *mcontext,
|
||||||
pcre2_jit_callback callback_function, void *callback_data);
|
pcre2_jit_callback callback_function, void *callback_data);
|
||||||
|
|
||||||
void pcre2_jit_stack_free(pcre2_jit_stack *jit_stack);
|
void pcre2_jit_stack_free(pcre2_jit_stack *jit_stack);
|
||||||
|
@ -459,7 +469,7 @@ PCRE2 API OVERVIEW
|
||||||
|
|
||||||
In addition to the main compiling and matching functions, there are
|
In addition to the main compiling and matching functions, there are
|
||||||
convenience functions for extracting captured substrings from a subject
|
convenience functions for extracting captured substrings from a subject
|
||||||
string that is matched by pcre2_match(). They are:
|
string that has been matched by pcre2_match(). They are:
|
||||||
|
|
||||||
pcre2_substring_copy_byname()
|
pcre2_substring_copy_byname()
|
||||||
pcre2_substring_copy_bynumber()
|
pcre2_substring_copy_bynumber()
|
||||||
|
@ -474,9 +484,13 @@ PCRE2 API OVERVIEW
|
||||||
pcre2_substring_free() and pcre2_substring_list_free() are also pro-
|
pcre2_substring_free() and pcre2_substring_list_free() are also pro-
|
||||||
vided, to free the memory used for extracted strings.
|
vided, to free the memory used for extracted strings.
|
||||||
|
|
||||||
There are functions for finding out information about a compiled pat-
|
The function pcre2_substitute() can be called to match a pattern and
|
||||||
tern (pcre2_pattern_info()) and about the configuration with which
|
return a copy of the subject string with substitutions for parts that
|
||||||
PCRE2 was built (pcre2_config()).
|
were matched.
|
||||||
|
|
||||||
|
Finally, there are functions for finding out information about a com-
|
||||||
|
piled pattern (pcre2_pattern_info()) and about the configuration with
|
||||||
|
which PCRE2 was built (pcre2_config()).
|
||||||
|
|
||||||
|
|
||||||
NEWLINES
|
NEWLINES
|
||||||
|
@ -862,33 +876,34 @@ CHECKING BUILD-TIME OPTIONS
|
||||||
contains the name of the architecture for which the JIT compiler is
|
contains the name of the architecture for which the JIT compiler is
|
||||||
configured, for example "x86 32bit (little endian + unaligned)". If JIT
|
configured, for example "x86 32bit (little endian + unaligned)". If JIT
|
||||||
support is not available, PCRE2_ERROR_BADOPTION is returned, otherwise
|
support is not available, PCRE2_ERROR_BADOPTION is returned, otherwise
|
||||||
the length of the string, in code units, is returned.
|
the number of code units used is returned. This is the length of the
|
||||||
|
string, plus one unit for the terminating zero.
|
||||||
|
|
||||||
PCRE2_CONFIG_LINKSIZE
|
PCRE2_CONFIG_LINKSIZE
|
||||||
|
|
||||||
The output is an integer that contains the number of bytes used for
|
The output is an integer that contains the number of bytes used for
|
||||||
internal linkage in compiled regular expressions. When PCRE2 is config-
|
internal linkage in compiled regular expressions. When PCRE2 is config-
|
||||||
ured, the value can be set to 2, 3, or 4, with the default being 2.
|
ured, the value can be set to 2, 3, or 4, with the default being 2.
|
||||||
This is the value that is returned by pcre2_config(). However, when the
|
This is the value that is returned by pcre2_config(). However, when the
|
||||||
16-bit library is compiled, a value of 3 is rounded up to 4, and when
|
16-bit library is compiled, a value of 3 is rounded up to 4, and when
|
||||||
the 32-bit library is compiled, internal linkages always use 4 bytes,
|
the 32-bit library is compiled, internal linkages always use 4 bytes,
|
||||||
so the configured value is not relevant.
|
so the configured value is not relevant.
|
||||||
|
|
||||||
The default value of 2 for the 8-bit and 16-bit libraries is sufficient
|
The default value of 2 for the 8-bit and 16-bit libraries is sufficient
|
||||||
for all but the most massive patterns, since it allows the size of the
|
for all but the most massive patterns, since it allows the size of the
|
||||||
compiled pattern to be up to 64K code units. Larger values allow larger
|
compiled pattern to be up to 64K code units. Larger values allow larger
|
||||||
regular expressions to be compiled by those two libraries, but at the
|
regular expressions to be compiled by those two libraries, but at the
|
||||||
expense of slower matching.
|
expense of slower matching.
|
||||||
|
|
||||||
PCRE2_CONFIG_MATCHLIMIT
|
PCRE2_CONFIG_MATCHLIMIT
|
||||||
|
|
||||||
The output is an unsigned long integer that gives the default limit for
|
The output is an unsigned long integer that gives the default limit for
|
||||||
the number of internal matching function calls in a pcre2_match() exe-
|
the number of internal matching function calls in a pcre2_match() exe-
|
||||||
cution. Further details are given with pcre2_match() below.
|
cution. Further details are given with pcre2_match() below.
|
||||||
|
|
||||||
PCRE2_CONFIG_NEWLINE
|
PCRE2_CONFIG_NEWLINE
|
||||||
|
|
||||||
The output is an integer whose value specifies the default character
|
The output is an integer whose value specifies the default character
|
||||||
sequence that is recognized as meaning "newline". The values are:
|
sequence that is recognized as meaning "newline". The values are:
|
||||||
|
|
||||||
1 Carriage return (CR)
|
1 Carriage return (CR)
|
||||||
|
@ -897,57 +912,58 @@ CHECKING BUILD-TIME OPTIONS
|
||||||
4 Any Unicode line ending
|
4 Any Unicode line ending
|
||||||
5 Any of CR, LF, or CRLF
|
5 Any of CR, LF, or CRLF
|
||||||
|
|
||||||
The default should normally correspond to the standard sequence for
|
The default should normally correspond to the standard sequence for
|
||||||
your operating system.
|
your operating system.
|
||||||
|
|
||||||
PCRE2_CONFIG_PARENSLIMIT
|
PCRE2_CONFIG_PARENSLIMIT
|
||||||
|
|
||||||
The output is an unsigned long integer that gives the maximum depth of
|
The output is an unsigned long integer that gives the maximum depth of
|
||||||
nesting of parentheses (of any kind) in a pattern. This limit is
|
nesting of parentheses (of any kind) in a pattern. This limit is
|
||||||
imposed to cap the amount of system stack used when a pattern is com-
|
imposed to cap the amount of system stack used when a pattern is com-
|
||||||
piled. It is specified when PCRE2 is built; the default is 250. This
|
piled. It is specified when PCRE2 is built; the default is 250. This
|
||||||
limit does not take into account the stack that may already be used by
|
limit does not take into account the stack that may already be used by
|
||||||
the calling application. For finer control over compilation stack
|
the calling application. For finer control over compilation stack
|
||||||
usage, see pcre2_set_compile_recursion_guard().
|
usage, see pcre2_set_compile_recursion_guard().
|
||||||
|
|
||||||
PCRE2_CONFIG_RECURSIONLIMIT
|
PCRE2_CONFIG_RECURSIONLIMIT
|
||||||
|
|
||||||
The output is an unsigned long integer that gives the default limit for
|
The output is an unsigned long integer that gives the default limit for
|
||||||
the depth of recursion when calling the internal matching function in a
|
the depth of recursion when calling the internal matching function in a
|
||||||
pcre2_match() execution. Further details are given with pcre2_match()
|
pcre2_match() execution. Further details are given with pcre2_match()
|
||||||
below.
|
below.
|
||||||
|
|
||||||
PCRE2_CONFIG_STACKRECURSE
|
PCRE2_CONFIG_STACKRECURSE
|
||||||
|
|
||||||
The output is an integer that is set to one if internal recursion when
|
The output is an integer that is set to one if internal recursion when
|
||||||
running pcre2_match() is implemented by recursive function calls that
|
running pcre2_match() is implemented by recursive function calls that
|
||||||
use the system stack to remember their state. This is the usual way
|
use the system stack to remember their state. This is the usual way
|
||||||
that PCRE2 is compiled. The output is zero if PCRE2 was compiled to use
|
that PCRE2 is compiled. The output is zero if PCRE2 was compiled to use
|
||||||
blocks of data on the heap instead of recursive function calls.
|
blocks of data on the heap instead of recursive function calls.
|
||||||
|
|
||||||
PCRE2_CONFIG_UNICODE_VERSION
|
PCRE2_CONFIG_UNICODE_VERSION
|
||||||
|
|
||||||
The where argument should point to a buffer that is at least 24 code
|
The where argument should point to a buffer that is at least 24 code
|
||||||
units long. (The exact length needed can be found by calling pcre2_con-
|
units long. (The exact length needed can be found by calling pcre2_con-
|
||||||
fig() with where set to NULL.) If PCRE2 has been compiled without Uni-
|
fig() with where set to NULL.) If PCRE2 has been compiled without Uni-
|
||||||
code support, the buffer is filled with the text "Unicode not sup-
|
code support, the buffer is filled with the text "Unicode not sup-
|
||||||
ported". Otherwise, the Unicode version string (for example, "7.0.0")
|
ported". Otherwise, the Unicode version string (for example, "7.0.0")
|
||||||
is inserted. The string is zero-terminated. The function returns the
|
is inserted. The number of code units used is returned. This is the
|
||||||
length of the string in code units.
|
length of the string plus one unit for the terminating zero.
|
||||||
|
|
||||||
PCRE2_CONFIG_UNICODE
|
PCRE2_CONFIG_UNICODE
|
||||||
|
|
||||||
The output is an integer that is set to one if Unicode support is
|
The output is an integer that is set to one if Unicode support is
|
||||||
available; otherwise it is set to zero. Unicode support implies UTF
|
available; otherwise it is set to zero. Unicode support implies UTF
|
||||||
support.
|
support.
|
||||||
|
|
||||||
PCRE2_CONFIG_VERSION
|
PCRE2_CONFIG_VERSION
|
||||||
|
|
||||||
The where argument should point to a buffer that is at least 12 code
|
The where argument should point to a buffer that is at least 12 code
|
||||||
units long. (The exact length needed can be found by calling pcre2_con-
|
units long. (The exact length needed can be found by calling pcre2_con-
|
||||||
fig() with where set to NULL.) The buffer is filled with the PCRE2 ver-
|
fig() with where set to NULL.) The buffer is filled with the PCRE2 ver-
|
||||||
sion string, zero-terminated. The length of the string (in code units)
|
sion string, zero-terminated. The number of code units used is
|
||||||
is returned.
|
returned. This is the length of the string plus one unit for the termi-
|
||||||
|
nating zero.
|
||||||
|
|
||||||
|
|
||||||
COMPILING A PATTERN
|
COMPILING A PATTERN
|
||||||
|
@ -1300,14 +1316,14 @@ JUST-IN-TIME (JIT) COMPILATION
|
||||||
int pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject,
|
int pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject,
|
||||||
PCRE2_SIZE length, PCRE2_SIZE startoffset,
|
PCRE2_SIZE length, PCRE2_SIZE startoffset,
|
||||||
uint32_t options, pcre2_match_data *match_data,
|
uint32_t options, pcre2_match_data *match_data,
|
||||||
pcre2_match_context *mcontext, pcre2_jit_stack *jit_stack);
|
pcre2_match_context *mcontext);
|
||||||
|
|
||||||
void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext);
|
void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext);
|
||||||
|
|
||||||
pcre2_jit_stack *pcre2_jit_stack_create(pcre2_general_context *gcontext,
|
pcre2_jit_stack *pcre2_jit_stack_create(pcre2_general_context *gcontext,
|
||||||
PCRE2_SIZE startsize, PCRE2_SIZE maxsize);
|
PCRE2_SIZE startsize, PCRE2_SIZE maxsize);
|
||||||
|
|
||||||
void pcre2_jit_stack_assign(const pcre2_code *code,
|
void pcre2_jit_stack_assign(pcre2_match_context *mcontext,
|
||||||
pcre2_jit_callback callback_function, void *callback_data);
|
pcre2_jit_callback callback_function, void *callback_data);
|
||||||
|
|
||||||
void pcre2_jit_stack_free(pcre2_jit_stack *jit_stack);
|
void pcre2_jit_stack_free(pcre2_jit_stack *jit_stack);
|
||||||
|
@ -1674,7 +1690,7 @@ THE MATCH DATA BLOCK
|
||||||
pcre2_match_data_create(uint32_t ovecsize,
|
pcre2_match_data_create(uint32_t ovecsize,
|
||||||
pcre2_general_context *gcontext);
|
pcre2_general_context *gcontext);
|
||||||
|
|
||||||
pcre2_match_data_create_from_pattern(pcre2_code *code,
|
pcre2_match_data_create_from_pattern(const pcre2_code *code,
|
||||||
pcre2_general_context *gcontext);
|
pcre2_general_context *gcontext);
|
||||||
|
|
||||||
void pcre2_match_data_free(pcre2_match_data *match_data);
|
void pcre2_match_data_free(pcre2_match_data *match_data);
|
||||||
|
@ -2290,59 +2306,117 @@ EXTRACTING CAPTURED SUBSTRINGS BY NAME
|
||||||
causes an error at compile time.
|
causes an error at compile time.
|
||||||
|
|
||||||
|
|
||||||
|
CREATING A NEW STRING WITH SUBSTITUTIONS
|
||||||
|
|
||||||
|
int pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject,
|
||||||
|
PCRE2_SIZE length, PCRE2_SIZE startoffset,
|
||||||
|
uint32_t options, pcre2_match_data *match_data,
|
||||||
|
pcre2_match_context *mcontext, PCRE2_SPTR replacementzfP,
|
||||||
|
PCRE2_SIZE rlength, PCRE2_UCHAR *outputbufferP,
|
||||||
|
PCRE2_SIZE *outlengthptr);
|
||||||
|
This function calls pcre2_match() and then makes a copy of the subject
|
||||||
|
string in outputbuffer, replacing the part that was matched with the
|
||||||
|
replacement string, whose length is supplied in rlength. This can be
|
||||||
|
given as PCRE2_ZERO_TERMINATED for a zero-terminated string.
|
||||||
|
|
||||||
|
In the replacement string, which is interpreted as a UTF string in UTF
|
||||||
|
mode, a dollar character is an escape character that can specify the
|
||||||
|
insertion of characters from capturing groups in the pattern. The fol-
|
||||||
|
lowing forms are recognized:
|
||||||
|
|
||||||
|
$$ insert a dollar character
|
||||||
|
$<n> insert the contents of group <n>
|
||||||
|
${<n>} insert the contents of group <n>
|
||||||
|
|
||||||
|
Either a group number or a group name can be given for <n>. Curly
|
||||||
|
brackets are required only if the following character would be inter-
|
||||||
|
preted as part of the number or name. The number may be zero to include
|
||||||
|
the entire matched string. For example, if the pattern a(b)c is
|
||||||
|
matched with "[abc]" and the replacement string "+$1$0$1+", the result
|
||||||
|
is "[+babcb+]". Group insertion is done by calling pcre2_copy_byname()
|
||||||
|
or pcre2_copy_bynumber() as appropriate.
|
||||||
|
|
||||||
|
The first seven arguments of pcre2_substitute() are the same as for
|
||||||
|
pcre2_match(), except that the partial matching options are not permit-
|
||||||
|
ted, and match_data may be passed as NULL, in which case a match data
|
||||||
|
block is obtained and freed within this function, using memory manage-
|
||||||
|
ment functions from the match context, if provided, or else those that
|
||||||
|
were used to allocate memory for the compiled code.
|
||||||
|
|
||||||
|
There is one additional option, PCRE2_SUBSTITUTE_GLOBAL, which causes
|
||||||
|
the function to iterate over the subject string, replacing every match-
|
||||||
|
ing substring. If this is not set, only the first matching substring is
|
||||||
|
replaced.
|
||||||
|
|
||||||
|
The outlengthptr argument must point to a variable that contains the
|
||||||
|
length, in code units, of the output buffer. It is updated to contain
|
||||||
|
the length of the new string, excluding the trailing zero that is auto-
|
||||||
|
matically added.
|
||||||
|
|
||||||
|
The function returns the number of replacements that were made. This
|
||||||
|
may be zero if no matches were found, and is never greater than 1
|
||||||
|
unless PCRE2_SUBSTITUTE_GLOBAL is set. In the event of an error, a neg-
|
||||||
|
ative error code is returned. Except for PCRE2_ERROR_NOMATCH (which is
|
||||||
|
never returned), any errors from pcre2_match() or the substring copying
|
||||||
|
functions are passed straight back. PCRE2_ERROR_BADREPLACEMENT is
|
||||||
|
returned for an invalid replacement string (unrecognized sequence fol-
|
||||||
|
lowing a dollar sign), and PCRE2_ERROR_NOMEMORY is returned if the out-
|
||||||
|
put buffer is not big enough.
|
||||||
|
|
||||||
|
|
||||||
DUPLICATE SUBPATTERN NAMES
|
DUPLICATE SUBPATTERN NAMES
|
||||||
|
|
||||||
int pcre2_substring_nametable_scan(const pcre2_code *code,
|
int pcre2_substring_nametable_scan(const pcre2_code *code,
|
||||||
PCRE2_SPTR name, PCRE2_SPTR *first, PCRE2_SPTR *last);
|
PCRE2_SPTR name, PCRE2_SPTR *first, PCRE2_SPTR *last);
|
||||||
|
|
||||||
When a pattern is compiled with the PCRE2_DUPNAMES option, names for
|
When a pattern is compiled with the PCRE2_DUPNAMES option, names for
|
||||||
subpatterns are not required to be unique. Duplicate names are always
|
subpatterns are not required to be unique. Duplicate names are always
|
||||||
allowed for subpatterns with the same number, created by using the (?|
|
allowed for subpatterns with the same number, created by using the (?|
|
||||||
feature. Indeed, if such subpatterns are named, they are required to
|
feature. Indeed, if such subpatterns are named, they are required to
|
||||||
use the same names.
|
use the same names.
|
||||||
|
|
||||||
Normally, patterns with duplicate names are such that in any one match,
|
Normally, patterns with duplicate names are such that in any one match,
|
||||||
only one of the named subpatterns participates. An example is shown in
|
only one of the named subpatterns participates. An example is shown in
|
||||||
the pcre2pattern documentation.
|
the pcre2pattern documentation.
|
||||||
|
|
||||||
When duplicates are present, pcre2_substring_copy_byname() and
|
When duplicates are present, pcre2_substring_copy_byname() and
|
||||||
pcre2_substring_get_byname() return the first substring corresponding
|
pcre2_substring_get_byname() return the first substring corresponding
|
||||||
to the given name that is set. If none are set, PCRE2_ERROR_NOSUBSTRING
|
to the given name that is set. If none are set, PCRE2_ERROR_NOSUBSTRING
|
||||||
is returned. The pcre2_substring_number_from_name() function returns
|
is returned. The pcre2_substring_number_from_name() function returns
|
||||||
one of the numbers that are associated with the name, but it is not
|
one of the numbers that are associated with the name, but it is not
|
||||||
defined which it is.
|
defined which it is.
|
||||||
|
|
||||||
If you want to get full details of all captured substrings for a given
|
If you want to get full details of all captured substrings for a given
|
||||||
name, you must use the pcre2_substring_nametable_scan() function. The
|
name, you must use the pcre2_substring_nametable_scan() function. The
|
||||||
first argument is the compiled pattern, and the second is the name. If
|
first argument is the compiled pattern, and the second is the name. If
|
||||||
the third and fourth arguments are NULL, the function returns a group
|
the third and fourth arguments are NULL, the function returns a group
|
||||||
number (it is not defined which). Otherwise, the third and fourth argu-
|
number (it is not defined which). Otherwise, the third and fourth argu-
|
||||||
ments must be pointers to variables that are updated by the function.
|
ments must be pointers to variables that are updated by the function.
|
||||||
After it has run, they point to the first and last entries in the name-
|
After it has run, they point to the first and last entries in the name-
|
||||||
to-number table for the given name, and the function returns the length
|
to-number table for the given name, and the function returns the length
|
||||||
of each entry. In both cases, PCRE2_ERROR_NOSUBSTRING is returned if
|
of each entry. In both cases, PCRE2_ERROR_NOSUBSTRING is returned if
|
||||||
there are no entries for the given name.
|
there are no entries for the given name.
|
||||||
|
|
||||||
The format of the name table is described above in the section entitled
|
The format of the name table is described above in the section entitled
|
||||||
Information about a pattern above. Given all the relevant entries for
|
Information about a pattern above. Given all the relevant entries for
|
||||||
the name, you can extract each of their numbers, and hence the captured
|
the name, you can extract each of their numbers, and hence the captured
|
||||||
data.
|
data.
|
||||||
|
|
||||||
|
|
||||||
FINDING ALL POSSIBLE MATCHES
|
FINDING ALL POSSIBLE MATCHES
|
||||||
|
|
||||||
The traditional matching function uses a similar algorithm to Perl,
|
The traditional matching function uses a similar algorithm to Perl,
|
||||||
which stops when it finds the first match, starting at a given point in
|
which stops when it finds the first match, starting at a given point in
|
||||||
the subject. If you want to find all possible matches, or the longest
|
the subject. If you want to find all possible matches, or the longest
|
||||||
possible match at a given position, consider using the alternative
|
possible match at a given position, consider using the alternative
|
||||||
matching function (see below) instead. If you cannot use the alterna-
|
matching function (see below) instead. If you cannot use the alterna-
|
||||||
tive function, you can kludge it up by making use of the callout facil-
|
tive function, you can kludge it up by making use of the callout facil-
|
||||||
ity, which is described in the pcre2callout documentation.
|
ity, which is described in the pcre2callout documentation.
|
||||||
|
|
||||||
What you have to do is to insert a callout right at the end of the pat-
|
What you have to do is to insert a callout right at the end of the pat-
|
||||||
tern. When your callout function is called, extract and save the cur-
|
tern. When your callout function is called, extract and save the cur-
|
||||||
rent matched substring. Then return 1, which forces pcre2_match() to
|
rent matched substring. Then return 1, which forces pcre2_match() to
|
||||||
backtrack and try other alternatives. Ultimately, when it runs out of
|
backtrack and try other alternatives. Ultimately, when it runs out of
|
||||||
matches, pcre2_match() will yield PCRE2_ERROR_NOMATCH.
|
matches, pcre2_match() will yield PCRE2_ERROR_NOMATCH.
|
||||||
|
|
||||||
|
|
||||||
|
@ -2354,26 +2428,26 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
|
||||||
pcre2_match_context *mcontext,
|
pcre2_match_context *mcontext,
|
||||||
int *workspace, PCRE2_SIZE wscount);
|
int *workspace, PCRE2_SIZE wscount);
|
||||||
|
|
||||||
The function pcre2_dfa_match() is called to match a subject string
|
The function pcre2_dfa_match() is called to match a subject string
|
||||||
against a compiled pattern, using a matching algorithm that scans the
|
against a compiled pattern, using a matching algorithm that scans the
|
||||||
subject string just once, and does not backtrack. This has different
|
subject string just once, and does not backtrack. This has different
|
||||||
characteristics to the normal algorithm, and is not compatible with
|
characteristics to the normal algorithm, and is not compatible with
|
||||||
Perl. Some of the features of PCRE2 patterns are not supported. Never-
|
Perl. Some of the features of PCRE2 patterns are not supported. Never-
|
||||||
theless, there are times when this kind of matching can be useful. For
|
theless, there are times when this kind of matching can be useful. For
|
||||||
a discussion of the two matching algorithms, and a list of features
|
a discussion of the two matching algorithms, and a list of features
|
||||||
that pcre2_dfa_match() does not support, see the pcre2matching documen-
|
that pcre2_dfa_match() does not support, see the pcre2matching documen-
|
||||||
tation.
|
tation.
|
||||||
|
|
||||||
The arguments for the pcre2_dfa_match() function are the same as for
|
The arguments for the pcre2_dfa_match() function are the same as for
|
||||||
pcre2_match(), plus two extras. The ovector within the match data block
|
pcre2_match(), plus two extras. The ovector within the match data block
|
||||||
is used in a different way, and this is described below. The other com-
|
is used in a different way, and this is described below. The other com-
|
||||||
mon arguments are used in the same way as for pcre2_match(), so their
|
mon arguments are used in the same way as for pcre2_match(), so their
|
||||||
description is not repeated here.
|
description is not repeated here.
|
||||||
|
|
||||||
The two additional arguments provide workspace for the function. The
|
The two additional arguments provide workspace for the function. The
|
||||||
workspace vector should contain at least 20 elements. It is used for
|
workspace vector should contain at least 20 elements. It is used for
|
||||||
keeping track of multiple paths through the pattern tree. More
|
keeping track of multiple paths through the pattern tree. More
|
||||||
workspace is needed for patterns and subjects where there are a lot of
|
workspace is needed for patterns and subjects where there are a lot of
|
||||||
potential matches.
|
potential matches.
|
||||||
|
|
||||||
Here is an example of a simple call to pcre2_dfa_match():
|
Here is an example of a simple call to pcre2_dfa_match():
|
||||||
|
@ -2393,45 +2467,45 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
|
||||||
|
|
||||||
Option bits for pcre_dfa_match()
|
Option bits for pcre_dfa_match()
|
||||||
|
|
||||||
The unused bits of the options argument for pcre2_dfa_match() must be
|
The unused bits of the options argument for pcre2_dfa_match() must be
|
||||||
zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_NOTBOL,
|
zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_NOTBOL,
|
||||||
PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
|
PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
|
||||||
PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, PCRE2_PARTIAL_SOFT,
|
PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, PCRE2_PARTIAL_SOFT,
|
||||||
PCRE2_DFA_SHORTEST, and PCRE2_DFA_RESTART. All but the last four of
|
PCRE2_DFA_SHORTEST, and PCRE2_DFA_RESTART. All but the last four of
|
||||||
these are exactly the same as for pcre2_match(), so their description
|
these are exactly the same as for pcre2_match(), so their description
|
||||||
is not repeated here.
|
is not repeated here.
|
||||||
|
|
||||||
PCRE2_PARTIAL_HARD
|
PCRE2_PARTIAL_HARD
|
||||||
PCRE2_PARTIAL_SOFT
|
PCRE2_PARTIAL_SOFT
|
||||||
|
|
||||||
These have the same general effect as they do for pcre2_match(), but
|
These have the same general effect as they do for pcre2_match(), but
|
||||||
the details are slightly different. When PCRE2_PARTIAL_HARD is set for
|
the details are slightly different. When PCRE2_PARTIAL_HARD is set for
|
||||||
pcre2_dfa_match(), it returns PCRE2_ERROR_PARTIAL if the end of the
|
pcre2_dfa_match(), it returns PCRE2_ERROR_PARTIAL if the end of the
|
||||||
subject is reached and there is still at least one matching possibility
|
subject is reached and there is still at least one matching possibility
|
||||||
that requires additional characters. This happens even if some complete
|
that requires additional characters. This happens even if some complete
|
||||||
matches have already been found. When PCRE2_PARTIAL_SOFT is set, the
|
matches have already been found. When PCRE2_PARTIAL_SOFT is set, the
|
||||||
return code PCRE2_ERROR_NOMATCH is converted into PCRE2_ERROR_PARTIAL
|
return code PCRE2_ERROR_NOMATCH is converted into PCRE2_ERROR_PARTIAL
|
||||||
if the end of the subject is reached, there have been no complete
|
if the end of the subject is reached, there have been no complete
|
||||||
matches, but there is still at least one matching possibility. The por-
|
matches, but there is still at least one matching possibility. The por-
|
||||||
tion of the string that was inspected when the longest partial match
|
tion of the string that was inspected when the longest partial match
|
||||||
was found is set as the first matching string in both cases. There is a
|
was found is set as the first matching string in both cases. There is a
|
||||||
more detailed discussion of partial and multi-segment matching, with
|
more detailed discussion of partial and multi-segment matching, with
|
||||||
examples, in the pcre2partial documentation.
|
examples, in the pcre2partial documentation.
|
||||||
|
|
||||||
PCRE2_DFA_SHORTEST
|
PCRE2_DFA_SHORTEST
|
||||||
|
|
||||||
Setting the PCRE2_DFA_SHORTEST option causes the matching algorithm to
|
Setting the PCRE2_DFA_SHORTEST option causes the matching algorithm to
|
||||||
stop as soon as it has found one match. Because of the way the alterna-
|
stop as soon as it has found one match. Because of the way the alterna-
|
||||||
tive algorithm works, this is necessarily the shortest possible match
|
tive algorithm works, this is necessarily the shortest possible match
|
||||||
at the first possible matching point in the subject string.
|
at the first possible matching point in the subject string.
|
||||||
|
|
||||||
PCRE2_DFA_RESTART
|
PCRE2_DFA_RESTART
|
||||||
|
|
||||||
When pcre2_dfa_match() returns a partial match, it is possible to call
|
When pcre2_dfa_match() returns a partial match, it is possible to call
|
||||||
it again, with additional subject characters, and have it continue with
|
it again, with additional subject characters, and have it continue with
|
||||||
the same match. The PCRE2_DFA_RESTART option requests this action; when
|
the same match. The PCRE2_DFA_RESTART option requests this action; when
|
||||||
it is set, the workspace and wscount options must reference the same
|
it is set, the workspace and wscount options must reference the same
|
||||||
vector as before because data about the match so far is left in them
|
vector as before because data about the match so far is left in them
|
||||||
after a partial match. There is more discussion of this facility in the
|
after a partial match. There is more discussion of this facility in the
|
||||||
pcre2partial documentation.
|
pcre2partial documentation.
|
||||||
|
|
||||||
|
@ -2439,8 +2513,8 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
|
||||||
|
|
||||||
When pcre2_dfa_match() succeeds, it may have matched more than one sub-
|
When pcre2_dfa_match() succeeds, it may have matched more than one sub-
|
||||||
string in the subject. Note, however, that all the matches from one run
|
string in the subject. Note, however, that all the matches from one run
|
||||||
of the function start at the same point in the subject. The shorter
|
of the function start at the same point in the subject. The shorter
|
||||||
matches are all initial substrings of the longer matches. For example,
|
matches are all initial substrings of the longer matches. For example,
|
||||||
if the pattern
|
if the pattern
|
||||||
|
|
||||||
<.*>
|
<.*>
|
||||||
|
@ -2455,66 +2529,66 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
|
||||||
<something> <something else>
|
<something> <something else>
|
||||||
<something> <something else> <something further>
|
<something> <something else> <something further>
|
||||||
|
|
||||||
On success, the yield of the function is a number greater than zero,
|
On success, the yield of the function is a number greater than zero,
|
||||||
which is the number of matched substrings. The offsets of the sub-
|
which is the number of matched substrings. The offsets of the sub-
|
||||||
strings are returned in the ovector, and can be extracted in the same
|
strings are returned in the ovector, and can be extracted in the same
|
||||||
way as for pcre2_match(). They are returned in reverse order of
|
way as for pcre2_match(). They are returned in reverse order of
|
||||||
length; that is, the longest matching string is given first. If there
|
length; that is, the longest matching string is given first. If there
|
||||||
were too many matches to fit into the ovector, the yield of the func-
|
were too many matches to fit into the ovector, the yield of the func-
|
||||||
tion is zero, and the vector is filled with the longest matches.
|
tion is zero, and the vector is filled with the longest matches.
|
||||||
|
|
||||||
NOTE: PCRE2's "auto-possessification" optimization usually applies to
|
NOTE: PCRE2's "auto-possessification" optimization usually applies to
|
||||||
character repeats at the end of a pattern (as well as internally). For
|
character repeats at the end of a pattern (as well as internally). For
|
||||||
example, the pattern "a\d+" is compiled as if it were "a\d++" because
|
example, the pattern "a\d+" is compiled as if it were "a\d++" because
|
||||||
there is no point in backtracking into the repeated digits. For DFA
|
there is no point in backtracking into the repeated digits. For DFA
|
||||||
matching, this means that only one possible match is found. If you
|
matching, this means that only one possible match is found. If you
|
||||||
really do want multiple matches in such cases, either use an ungreedy
|
really do want multiple matches in such cases, either use an ungreedy
|
||||||
repeat ("a\d+?") or set the PCRE2_NO_AUTO_POSSESS option when compil-
|
repeat ("a\d+?") or set the PCRE2_NO_AUTO_POSSESS option when compil-
|
||||||
ing.
|
ing.
|
||||||
|
|
||||||
Error returns from pcre2_dfa_match()
|
Error returns from pcre2_dfa_match()
|
||||||
|
|
||||||
The pcre2_dfa_match() function returns a negative number when it fails.
|
The pcre2_dfa_match() function returns a negative number when it fails.
|
||||||
Many of the errors are the same as for pcre2_match(), as described
|
Many of the errors are the same as for pcre2_match(), as described
|
||||||
above. There are in addition the following errors that are specific to
|
above. There are in addition the following errors that are specific to
|
||||||
pcre2_dfa_match():
|
pcre2_dfa_match():
|
||||||
|
|
||||||
PCRE2_ERROR_DFA_UITEM
|
PCRE2_ERROR_DFA_UITEM
|
||||||
|
|
||||||
This return is given if pcre2_dfa_match() encounters an item in the
|
This return is given if pcre2_dfa_match() encounters an item in the
|
||||||
pattern that it does not support, for instance, the use of \C or a back
|
pattern that it does not support, for instance, the use of \C or a back
|
||||||
reference.
|
reference.
|
||||||
|
|
||||||
PCRE2_ERROR_DFA_UCOND
|
PCRE2_ERROR_DFA_UCOND
|
||||||
|
|
||||||
This return is given if pcre2_dfa_match() encounters a condition item
|
This return is given if pcre2_dfa_match() encounters a condition item
|
||||||
that uses a back reference for the condition, or a test for recursion
|
that uses a back reference for the condition, or a test for recursion
|
||||||
in a specific group. These are not supported.
|
in a specific group. These are not supported.
|
||||||
|
|
||||||
PCRE2_ERROR_DFA_WSSIZE
|
PCRE2_ERROR_DFA_WSSIZE
|
||||||
|
|
||||||
This return is given if pcre2_dfa_match() runs out of space in the
|
This return is given if pcre2_dfa_match() runs out of space in the
|
||||||
workspace vector.
|
workspace vector.
|
||||||
|
|
||||||
PCRE2_ERROR_DFA_RECURSE
|
PCRE2_ERROR_DFA_RECURSE
|
||||||
|
|
||||||
When a recursive subpattern is processed, the matching function calls
|
When a recursive subpattern is processed, the matching function calls
|
||||||
itself recursively, using private memory for the ovector and workspace.
|
itself recursively, using private memory for the ovector and workspace.
|
||||||
This error is given if the internal ovector is not large enough. This
|
This error is given if the internal ovector is not large enough. This
|
||||||
should be extremely rare, as a vector of size 1000 is used.
|
should be extremely rare, as a vector of size 1000 is used.
|
||||||
|
|
||||||
PCRE2_ERROR_DFA_BADRESTART
|
PCRE2_ERROR_DFA_BADRESTART
|
||||||
|
|
||||||
When pcre2_dfa_match() is called with the pcre2_dfa_RESTART option,
|
When pcre2_dfa_match() is called with the pcre2_dfa_RESTART option,
|
||||||
some plausibility checks are made on the contents of the workspace,
|
some plausibility checks are made on the contents of the workspace,
|
||||||
which should contain data about the previous partial match. If any of
|
which should contain data about the previous partial match. If any of
|
||||||
these checks fail, this error is given.
|
these checks fail, this error is given.
|
||||||
|
|
||||||
|
|
||||||
SEE ALSO
|
SEE ALSO
|
||||||
|
|
||||||
pcre2build(3), pcre2libs(3), pcre2callout(3), pcre2matching(3),
|
pcre2build(3), pcre2libs(3), pcre2callout(3), pcre2matching(3),
|
||||||
pcre2partial(3), pcre2posix(3), pcre2demo(3), pcre2sample(3),
|
pcre2partial(3), pcre2posix(3), pcre2demo(3), pcre2sample(3),
|
||||||
pcre2stack(3).
|
pcre2stack(3).
|
||||||
|
|
||||||
|
|
||||||
|
@ -2527,7 +2601,7 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 03 November 2014
|
Last updated: 11 November 2014
|
||||||
Copyright (c) 1997-2014 University of Cambridge.
|
Copyright (c) 1997-2014 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@ -3553,20 +3627,21 @@ CONTROLLING THE JIT STACK
|
||||||
The pcre2_jit_stack_assign() function specifies which stack JIT code
|
The pcre2_jit_stack_assign() function specifies which stack JIT code
|
||||||
should use. Its arguments are as follows:
|
should use. Its arguments are as follows:
|
||||||
|
|
||||||
pcre2_code *code
|
pcre2_match_context *mcontext
|
||||||
pcre2_jit_callback callback
|
pcre2_jit_callback callback
|
||||||
void *data
|
void *data
|
||||||
|
|
||||||
The code argument is a pointer to a compiled pattern, after it has been
|
The first argument is a pointer to a match context. When this is subse-
|
||||||
processed by pcre2_jit_compile(). There are three cases for the values
|
quently passed to a matching function, its information determines which
|
||||||
of the other two options:
|
JIT stack is used. There are three cases for the values of the other
|
||||||
|
two options:
|
||||||
|
|
||||||
(1) If callback is NULL and data is NULL, an internal 32K block
|
(1) If callback is NULL and data is NULL, an internal 32K block
|
||||||
on the machine stack is used.
|
on the machine stack is used.
|
||||||
|
|
||||||
(2) If callback is NULL and data is not NULL, data must be
|
(2) If callback is NULL and data is not NULL, data must be
|
||||||
a valid JIT stack, the result of calling pcre2_jit_stack_cre-
|
a pointer to a valid JIT stack, the result of calling
|
||||||
ate().
|
pcre2_jit_stack_create().
|
||||||
|
|
||||||
(3) If callback is not NULL, it must point to a function that is
|
(3) If callback is not NULL, it must point to a function that is
|
||||||
called with data as an argument at the start of matching, in
|
called with data as an argument at the start of matching, in
|
||||||
|
@ -3591,13 +3666,14 @@ CONTROLLING THE JIT STACK
|
||||||
application is thread-safe.
|
application is thread-safe.
|
||||||
|
|
||||||
Strictly speaking, even more is allowed. You can assign the same non-
|
Strictly speaking, even more is allowed. You can assign the same non-
|
||||||
NULL stack to any number of patterns as long as they are not used for
|
NULL stack to a match context that is used by any number of patterns,
|
||||||
matching by multiple threads at the same time. For example, you can
|
as long as they are not used for matching by multiple threads at the
|
||||||
assign the same stack to all compiled patterns, and use a global mutex
|
same time. For example, you could use the same stack in all compiled
|
||||||
in the callback to wait until the stack is available for use. However,
|
patterns, with a global mutex in the callback to wait until the stack
|
||||||
this is an inefficient solution, and not recommended.
|
is available for use. However, this is an inefficient solution, and not
|
||||||
|
recommended.
|
||||||
|
|
||||||
This is a suggestion for how a multithreaded program that needs to set
|
This is a suggestion for how a multithreaded program that needs to set
|
||||||
up non-default JIT stacks might operate:
|
up non-default JIT stacks might operate:
|
||||||
|
|
||||||
During thread initalization
|
During thread initalization
|
||||||
|
@ -3609,10 +3685,8 @@ CONTROLLING THE JIT STACK
|
||||||
Use a one-line callback function
|
Use a one-line callback function
|
||||||
return thread_local_var
|
return thread_local_var
|
||||||
|
|
||||||
All the functions described in this section do nothing if JIT is not
|
All the functions described in this section do nothing if JIT is not
|
||||||
available, and pcre2_jit_stack_assign() does nothing unless the code
|
available.
|
||||||
argument is non-NULL and points to a pcre2_code block that has been
|
|
||||||
successfully processed by pcre2_jit_compile().
|
|
||||||
|
|
||||||
|
|
||||||
JIT STACK FAQ
|
JIT STACK FAQ
|
||||||
|
@ -3620,66 +3694,66 @@ JIT STACK FAQ
|
||||||
(1) Why do we need JIT stacks?
|
(1) Why do we need JIT stacks?
|
||||||
|
|
||||||
PCRE2 (and JIT) is a recursive, depth-first engine, so it needs a stack
|
PCRE2 (and JIT) is a recursive, depth-first engine, so it needs a stack
|
||||||
where the local data of the current node is pushed before checking its
|
where the local data of the current node is pushed before checking its
|
||||||
child nodes. Allocating real machine stack on some platforms is diffi-
|
child nodes. Allocating real machine stack on some platforms is diffi-
|
||||||
cult. For example, the stack chain needs to be updated every time if we
|
cult. For example, the stack chain needs to be updated every time if we
|
||||||
extend the stack on PowerPC. Although it is possible, its updating
|
extend the stack on PowerPC. Although it is possible, its updating
|
||||||
time overhead decreases performance. So we do the recursion in memory.
|
time overhead decreases performance. So we do the recursion in memory.
|
||||||
|
|
||||||
(2) Why don't we simply allocate blocks of memory with malloc()?
|
(2) Why don't we simply allocate blocks of memory with malloc()?
|
||||||
|
|
||||||
Modern operating systems have a nice feature: they can reserve an
|
Modern operating systems have a nice feature: they can reserve an
|
||||||
address space instead of allocating memory. We can safely allocate mem-
|
address space instead of allocating memory. We can safely allocate mem-
|
||||||
ory pages inside this address space, so the stack could grow without
|
ory pages inside this address space, so the stack could grow without
|
||||||
moving memory data (this is important because of pointers). Thus we can
|
moving memory data (this is important because of pointers). Thus we can
|
||||||
allocate 1M address space, and use only a single memory page (usually
|
allocate 1M address space, and use only a single memory page (usually
|
||||||
4K) if that is enough. However, we can still grow up to 1M anytime if
|
4K) if that is enough. However, we can still grow up to 1M anytime if
|
||||||
needed.
|
needed.
|
||||||
|
|
||||||
(3) Who "owns" a JIT stack?
|
(3) Who "owns" a JIT stack?
|
||||||
|
|
||||||
The owner of the stack is the user program, not the JIT studied pattern
|
The owner of the stack is the user program, not the JIT studied pattern
|
||||||
or anything else. The user program must ensure that if a stack is used
|
or anything else. The user program must ensure that if a stack is being
|
||||||
by pcre2_match(), (that is, it is assigned to the pattern currently
|
used by pcre2_match(), (that is, it is assigned to a match context that
|
||||||
running), that stack must not be used by any other threads (to avoid
|
is passed to the pattern currently running), that stack must not be
|
||||||
overwriting the same memory area). The best practice for multithreaded
|
used by any other threads (to avoid overwriting the same memory area).
|
||||||
programs is to allocate a stack for each thread, and return this stack
|
The best practice for multithreaded programs is to allocate a stack for
|
||||||
through the JIT callback function.
|
each thread, and return this stack through the JIT callback function.
|
||||||
|
|
||||||
(4) When should a JIT stack be freed?
|
(4) When should a JIT stack be freed?
|
||||||
|
|
||||||
You can free a JIT stack at any time, as long as it will not be used by
|
You can free a JIT stack at any time, as long as it will not be used by
|
||||||
pcre2_match() again. When you assign the stack to a pattern, only a
|
pcre2_match() again. When you assign the stack to a match context, only
|
||||||
pointer is set. There is no reference counting or any other magic. You
|
a pointer is set. There is no reference counting or any other magic.
|
||||||
can free the patterns and stacks in any order, anytime. Just do not
|
You can free compiled patterns, contexts, and stacks in any order, any-
|
||||||
call pcre2_match() with a pattern pointing to an already freed stack,
|
time. Just do not call pcre2_match() with a match context pointing to
|
||||||
as that will cause SEGFAULT. (Also, do not free a stack currently used
|
an already freed stack, as that will cause SEGFAULT. (Also, do not free
|
||||||
by pcre2_match() in another thread). You can also replace the stack for
|
a stack currently used by pcre2_match() in another thread). You can
|
||||||
a pattern at any time. You can even free the previous stack before
|
also replace the stack in a context at any time when it is not in use.
|
||||||
assigning a replacement.
|
You can also free the previous stack before assigning a replacement.
|
||||||
|
|
||||||
(5) Should I allocate/free a stack every time before/after calling
|
(5) Should I allocate/free a stack every time before/after calling
|
||||||
pcre2_match()?
|
pcre2_match()?
|
||||||
|
|
||||||
No, because this is too costly in terms of resources. However, you
|
No, because this is too costly in terms of resources. However, you
|
||||||
could implement some clever idea which release the stack if it is not
|
could implement some clever idea which release the stack if it is not
|
||||||
used in let's say two minutes. The JIT callback can help to achieve
|
used in let's say two minutes. The JIT callback can help to achieve
|
||||||
this without keeping a list of the currently JIT studied patterns.
|
this without keeping a list of patterns.
|
||||||
|
|
||||||
(6) OK, the stack is for long term memory allocation. But what happens
|
(6) OK, the stack is for long term memory allocation. But what happens
|
||||||
if a pattern causes stack overflow with a stack of 1M? Is that 1M kept
|
if a pattern causes stack overflow with a stack of 1M? Is that 1M kept
|
||||||
until the stack is freed?
|
until the stack is freed?
|
||||||
|
|
||||||
Especially on embedded sytems, it might be a good idea to release mem-
|
Especially on embedded sytems, it might be a good idea to release mem-
|
||||||
ory sometimes without freeing the stack. There is no API for this at
|
ory sometimes without freeing the stack. There is no API for this at
|
||||||
the moment. Probably a function call which returns with the currently
|
the moment. Probably a function call which returns with the currently
|
||||||
allocated memory for any stack and another which allows releasing mem-
|
allocated memory for any stack and another which allows releasing mem-
|
||||||
ory (shrinking the stack) would be a good idea if someone needs this.
|
ory (shrinking the stack) would be a good idea if someone needs this.
|
||||||
|
|
||||||
(7) This is too much of a headache. Isn't there any better solution for
|
(7) This is too much of a headache. Isn't there any better solution for
|
||||||
JIT stack handling?
|
JIT stack handling?
|
||||||
|
|
||||||
No, thanks to Windows. If POSIX threads were used everywhere, we could
|
No, thanks to Windows. If POSIX threads were used everywhere, we could
|
||||||
throw out this complicated API.
|
throw out this complicated API.
|
||||||
|
|
||||||
|
|
||||||
|
@ -3688,36 +3762,39 @@ FREEING JIT SPECULATIVE MEMORY
|
||||||
void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext);
|
void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext);
|
||||||
|
|
||||||
The JIT executable allocator does not free all memory when it is possi-
|
The JIT executable allocator does not free all memory when it is possi-
|
||||||
ble. It expects new allocations, and keeps some free memory around to
|
ble. It expects new allocations, and keeps some free memory around to
|
||||||
improve allocation speed. However, in low memory conditions, it might
|
improve allocation speed. However, in low memory conditions, it might
|
||||||
be better to free all possible memory. You can cause this to happen by
|
be better to free all possible memory. You can cause this to happen by
|
||||||
calling pcre2_jit_free_unused_memory(). Its argument is a general con-
|
calling pcre2_jit_free_unused_memory(). Its argument is a general con-
|
||||||
text, for custom memory management, or NULL for standard memory manage-
|
text, for custom memory management, or NULL for standard memory manage-
|
||||||
ment.
|
ment.
|
||||||
|
|
||||||
|
|
||||||
EXAMPLE CODE
|
EXAMPLE CODE
|
||||||
|
|
||||||
This is a single-threaded example that specifies a JIT stack without
|
This is a single-threaded example that specifies a JIT stack without
|
||||||
using a callback.
|
using a callback. A real program should include error checking after
|
||||||
|
all the function calls.
|
||||||
|
|
||||||
int rc;
|
int rc;
|
||||||
pcre2_code *re;
|
pcre2_code *re;
|
||||||
pcre2_match_data *match_data;
|
pcre2_match_data *match_data;
|
||||||
|
pcre2_match_context *mcontext;
|
||||||
pcre2_jit_stack *jit_stack;
|
pcre2_jit_stack *jit_stack;
|
||||||
|
|
||||||
re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0,
|
re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0,
|
||||||
&errornumber, &erroffset, NULL);
|
&errornumber, &erroffset, NULL);
|
||||||
/* Check for errors */
|
|
||||||
rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
|
rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
|
||||||
/* Check for errors */
|
mcontext = pcre2_match_context_create(NULL);
|
||||||
jit_stack = pcre2_jit_stack_create(NULL, 32*1024, 512*1024);
|
jit_stack = pcre2_jit_stack_create(NULL, 32*1024, 512*1024);
|
||||||
/* Check for error (NULL) */
|
pcre2_jit_stack_assign(mcontext, NULL, jit_stack);
|
||||||
pcre2_jit_stack_assign(re, NULL, jit_stack);
|
|
||||||
match_data = pcre2_match_data_create(re, 10);
|
match_data = pcre2_match_data_create(re, 10);
|
||||||
rc = pcre2_match(re, subject, length, 0, 0, match_data, NULL);
|
rc = pcre2_match(re, subject, length, 0, 0, match_data, mcontext);
|
||||||
/* Check results */
|
/* Process result */
|
||||||
pcre2_free(re);
|
|
||||||
|
pcre2_code_free(re);
|
||||||
|
pcre2_match_data_free(match_data);
|
||||||
|
pcre2_match_context_free(mcontext);
|
||||||
pcre2_jit_stack_free(jit_stack);
|
pcre2_jit_stack_free(jit_stack);
|
||||||
|
|
||||||
|
|
||||||
|
@ -3733,14 +3810,10 @@ JIT FAST PATH API
|
||||||
patterns that have been successfully processed by pcre2_jit_compile()).
|
patterns that have been successfully processed by pcre2_jit_compile()).
|
||||||
|
|
||||||
The fast path function is called pcre2_jit_match(), and it takes
|
The fast path function is called pcre2_jit_match(), and it takes
|
||||||
exactly the same arguments as pcre2_match(), plus one additional argu-
|
exactly the same arguments as pcre2_match(). The return values are also
|
||||||
ment that must either point to a JIT stack or be NULL. In the latter
|
the same, plus PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial or
|
||||||
case, if a callback function has been set up by
|
complete) is requested that was not compiled. Unsupported option bits
|
||||||
pcre2_jit_stack_assign(), it is called. Otherwise the system stack is
|
(for example, PCRE2_ANCHORED) are ignored.
|
||||||
used. The return values are the same as for pcre2_match(), plus
|
|
||||||
PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial or complete) is
|
|
||||||
requested that was not compiled. Unsupported option bits (for example,
|
|
||||||
PCRE2_ANCHORED) are ignored.
|
|
||||||
|
|
||||||
When you call pcre2_match(), as well as testing for invalid options, a
|
When you call pcre2_match(), as well as testing for invalid options, a
|
||||||
number of other sanity checks are performed on the arguments. For exam-
|
number of other sanity checks are performed on the arguments. For exam-
|
||||||
|
@ -3767,7 +3840,7 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 03 November 2014
|
Last updated: 08 November 2014
|
||||||
Copyright (c) 1997-2014 University of Cambridge.
|
Copyright (c) 1997-2014 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,73 @@
|
||||||
|
.TH PCRE2_SUBSTITUTE 3 "11 November 2014" "PCRE2 10.00"
|
||||||
|
.SH NAME
|
||||||
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
|
.SH SYNOPSIS
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
.B #include <pcre2.h>
|
||||||
|
.PP
|
||||||
|
.nf
|
||||||
|
.B int pcre2_substitute(const pcre2_code *\fIcode\fP, PCRE2_SPTR \fIsubject\fP,
|
||||||
|
.B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP,"
|
||||||
|
.B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP,"
|
||||||
|
.B " pcre2_match_context *\fImcontext\fP, PCRE2_SPTR \fIreplacementzfP,"
|
||||||
|
.B " PCRE2_SIZE \fIrlength\fP, PCRE2_UCHAR *\fIoutputbuffer\fP,"
|
||||||
|
.B " PCRE2_SIZE *\fIoutlengthptr\fP);"
|
||||||
|
.fi
|
||||||
|
.
|
||||||
|
.SH DESCRIPTION
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
This function matches a compiled regular expression against a given subject
|
||||||
|
string, using a matching algorithm that is similar to Perl's. It then makes a
|
||||||
|
copy of the subject, substituting a replacement string for what was matched.
|
||||||
|
Its arguments are:
|
||||||
|
.sp
|
||||||
|
\fIcode\fP Points to the compiled pattern
|
||||||
|
\fIsubject\fP Points to the subject string
|
||||||
|
\fIlength\fP Length of the subject string
|
||||||
|
\fIstartoffset\fP Offset in the subject at which to start matching
|
||||||
|
\fIoptions\fP Option bits
|
||||||
|
\fImatch_data\fP Points to a match data block, or is NULL
|
||||||
|
\fImcontext\fP Points to a match context, or is NULL
|
||||||
|
\fIreplacement\fP Points to the replacement string
|
||||||
|
\fIrlength\fP Length of the replacement string
|
||||||
|
\fIoutputbuffer\fP Points to the output buffer
|
||||||
|
\fIoutlengthptr\fP Points to the length of the output buffer
|
||||||
|
.sp
|
||||||
|
A match context is needed only if you want to:
|
||||||
|
.sp
|
||||||
|
Set up a callout function
|
||||||
|
Change the limit for calling the internal function \fImatch()\fP
|
||||||
|
Change the limit for calling \fImatch()\fP recursively
|
||||||
|
Set custom memory management when the heap is used for recursion
|
||||||
|
.sp
|
||||||
|
The \fIlength\fP, \fIstartoffset\fP and \fIrlength\fP values are code
|
||||||
|
units, not characters, as is the contents of the variable pointed at by
|
||||||
|
\fIoutlengthptr\fP, which is updated to the actual length of the new string.
|
||||||
|
The options are:
|
||||||
|
.sp
|
||||||
|
PCRE2_ANCHORED Match only at the first position
|
||||||
|
PCRE2_NOTBOL Subject string is not the beginning of a line
|
||||||
|
PCRE2_NOTEOL Subject string is not the end of a line
|
||||||
|
PCRE2_NOTEMPTY An empty string is not a valid match
|
||||||
|
PCRE2_NOTEMPTY_ATSTART An empty string at the start of the subject
|
||||||
|
is not a valid match
|
||||||
|
PCRE2_NO_UTF_CHECK Do not check the subject or replacement for
|
||||||
|
UTF validity (only relevant if PCRE2_UTF
|
||||||
|
was set at compile time)
|
||||||
|
PCRE2_SUBSTITUTE_GLOBAL Replace all occurrences in the subject
|
||||||
|
.sp
|
||||||
|
The function returns the number of substitutions, which may be zero if there
|
||||||
|
were no matches. The result can be greater than one only when
|
||||||
|
PCRE2_SUBSTITUTE_GLOBAL is set.
|
||||||
|
.P
|
||||||
|
There is a complete description of the PCRE2 native API in the
|
||||||
|
.\" HREF
|
||||||
|
\fBpcre2api\fP
|
||||||
|
.\"
|
||||||
|
page and a description of the POSIX API in the
|
||||||
|
.\" HREF
|
||||||
|
\fBpcre2posix\fP
|
||||||
|
.\"
|
||||||
|
page.
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2API 3 "08 November 2014" "PCRE2 10.00"
|
.TH PCRE2API 3 "11 November 2014" "PCRE2 10.00"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.sp
|
.sp
|
||||||
|
@ -169,6 +169,19 @@ document for an overview of all the PCRE2 documentation.
|
||||||
.fi
|
.fi
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
.SH "PCRE2 NATIVE API STRING SUBSTITUTION FUNCTION"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
.nf
|
||||||
|
.B int pcre2_substitute(const pcre2_code *\fIcode\fP, PCRE2_SPTR \fIsubject\fP,
|
||||||
|
.B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP,"
|
||||||
|
.B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP,"
|
||||||
|
.B " pcre2_match_context *\fImcontext\fP, PCRE2_SPTR \fIreplacementzfP,"
|
||||||
|
.B " PCRE2_SIZE \fIrlength\fP, PCRE2_UCHAR *\fIoutputbuffer\fP,"
|
||||||
|
.B " PCRE2_SIZE *\fIoutlengthptr\fP);"
|
||||||
|
.fi
|
||||||
|
.
|
||||||
|
.
|
||||||
.SH "PCRE2 NATIVE API JIT FUNCTIONS"
|
.SH "PCRE2 NATIVE API JIT FUNCTIONS"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
@ -332,8 +345,8 @@ and disadvantages is given in the
|
||||||
documentation. There is no JIT support for \fBpcre2_dfa_match()\fP.
|
documentation. There is no JIT support for \fBpcre2_dfa_match()\fP.
|
||||||
.P
|
.P
|
||||||
In addition to the main compiling and matching functions, there are convenience
|
In addition to the main compiling and matching functions, there are convenience
|
||||||
functions for extracting captured substrings from a subject string that is
|
functions for extracting captured substrings from a subject string that has
|
||||||
matched by \fBpcre2_match()\fP. They are:
|
been matched by \fBpcre2_match()\fP. They are:
|
||||||
.sp
|
.sp
|
||||||
\fBpcre2_substring_copy_byname()\fP
|
\fBpcre2_substring_copy_byname()\fP
|
||||||
\fBpcre2_substring_copy_bynumber()\fP
|
\fBpcre2_substring_copy_bynumber()\fP
|
||||||
|
@ -348,9 +361,13 @@ matched by \fBpcre2_match()\fP. They are:
|
||||||
\fBpcre2_substring_free()\fP and \fBpcre2_substring_list_free()\fP are also
|
\fBpcre2_substring_free()\fP and \fBpcre2_substring_list_free()\fP are also
|
||||||
provided, to free the memory used for extracted strings.
|
provided, to free the memory used for extracted strings.
|
||||||
.P
|
.P
|
||||||
There are functions for finding out information about a compiled pattern
|
The function \fBpcre2_substitute()\fP can be called to match a pattern and
|
||||||
(\fBpcre2_pattern_info()\fP) and about the configuration with which PCRE2 was
|
return a copy of the subject string with substitutions for parts that were
|
||||||
built (\fBpcre2_config()\fP).
|
matched.
|
||||||
|
.P
|
||||||
|
Finally, there are functions for finding out information about a compiled
|
||||||
|
pattern (\fBpcre2_pattern_info()\fP) and about the configuration with which
|
||||||
|
PCRE2 was built (\fBpcre2_config()\fP).
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.\" HTML <a name="newlines"></a>
|
.\" HTML <a name="newlines"></a>
|
||||||
|
@ -2361,6 +2378,66 @@ numbers. For this reason, the use of different names for subpatterns of the
|
||||||
same number causes an error at compile time.
|
same number causes an error at compile time.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
.SH "CREATING A NEW STRING WITH SUBSTITUTIONS"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
.nf
|
||||||
|
.B int pcre2_substitute(const pcre2_code *\fIcode\fP, PCRE2_SPTR \fIsubject\fP,
|
||||||
|
.B " PCRE2_SIZE \fIlength\fP, PCRE2_SIZE \fIstartoffset\fP,"
|
||||||
|
.B " uint32_t \fIoptions\fP, pcre2_match_data *\fImatch_data\fP,"
|
||||||
|
.B " pcre2_match_context *\fImcontext\fP, PCRE2_SPTR \fIreplacementzfP,"
|
||||||
|
.B " PCRE2_SIZE \fIrlength\fP, PCRE2_UCHAR *\fIoutputbuffer\zfP,"
|
||||||
|
.B " PCRE2_SIZE *\fIoutlengthptr\fP);"
|
||||||
|
.fi
|
||||||
|
This function calls \fBpcre2_match()\fP and then makes a copy of the subject
|
||||||
|
string in \fIoutputbuffer\fP, replacing the part that was matched with the
|
||||||
|
\fIreplacement\fP string, whose length is supplied in \fBrlength\fP. This can
|
||||||
|
be given as PCRE2_ZERO_TERMINATED for a zero-terminated string.
|
||||||
|
.P
|
||||||
|
In the replacement string, which is interpreted as a UTF string in UTF mode,
|
||||||
|
and is checked for UTF validity unless the PCRE2_NO_UTF_CHECK option is set, a
|
||||||
|
dollar character is an escape character that can specify the insertion of
|
||||||
|
characters from capturing groups in the pattern. The following forms are
|
||||||
|
recognized:
|
||||||
|
.sp
|
||||||
|
$$ insert a dollar character
|
||||||
|
$<n> insert the contents of group <n>
|
||||||
|
${<n>} insert the contents of group <n>
|
||||||
|
.sp
|
||||||
|
Either a group number or a group name can be given for <n>. Curly brackets are
|
||||||
|
required only if the following character would be interpreted as part of the
|
||||||
|
number or name. The number may be zero to include the entire matched string.
|
||||||
|
For example, if the pattern a(b)c is matched with "[abc]" and the replacement
|
||||||
|
string "+$1$0$1+", the result is "[+babcb+]". Group insertion is done by
|
||||||
|
calling \fBpcre2_copy_byname()\fP or \fBpcre2_copy_bynumber()\fP as
|
||||||
|
appropriate.
|
||||||
|
.P
|
||||||
|
The first seven arguments of \fBpcre2_substitute()\fP are the same as for
|
||||||
|
\fBpcre2_match()\fP, except that the partial matching options are not
|
||||||
|
permitted, and \fImatch_data\fP may be passed as NULL, in which case a match
|
||||||
|
data block is obtained and freed within this function, using memory management
|
||||||
|
functions from the match context, if provided, or else those that were used to
|
||||||
|
allocate memory for the compiled code.
|
||||||
|
.P
|
||||||
|
There is one additional option, PCRE2_SUBSTITUTE_GLOBAL, which causes the
|
||||||
|
function to iterate over the subject string, replacing every matching
|
||||||
|
substring. If this is not set, only the first matching substring is replaced.
|
||||||
|
.P
|
||||||
|
The \fIoutlengthptr\fP argument must point to a variable that contains the
|
||||||
|
length, in code units, of the output buffer. It is updated to contain the
|
||||||
|
length of the new string, excluding the trailing zero that is automatically
|
||||||
|
added.
|
||||||
|
.P
|
||||||
|
The function returns the number of replacements that were made. This may be
|
||||||
|
zero if no matches were found, and is never greater than 1 unless
|
||||||
|
PCRE2_SUBSTITUTE_GLOBAL is set. In the event of an error, a negative error code
|
||||||
|
is returned. Except for PCRE2_ERROR_NOMATCH (which is never returned), any
|
||||||
|
errors from \fBpcre2_match()\fP or the substring copying functions are passed
|
||||||
|
straight back. PCRE2_ERROR_BADREPLACEMENT is returned for an invalid
|
||||||
|
replacement string (unrecognized sequence following a dollar sign), and
|
||||||
|
PCRE2_ERROR_NOMEMORY is returned if the output buffer is not big enough.
|
||||||
|
.
|
||||||
|
.
|
||||||
.SH "DUPLICATE SUBPATTERN NAMES"
|
.SH "DUPLICATE SUBPATTERN NAMES"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
@ -2633,6 +2710,6 @@ Cambridge CB2 3QH, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 08 November 2014
|
Last updated: 11 November 2014
|
||||||
Copyright (c) 1997-2014 University of Cambridge.
|
Copyright (c) 1997-2014 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -435,7 +435,6 @@ PATTERN MODIFIERS
|
||||||
posix use the POSIX API
|
posix use the POSIX API
|
||||||
stackguard=<number> test the stackguard feature
|
stackguard=<number> test the stackguard feature
|
||||||
tables=[0|1|2] select internal tables
|
tables=[0|1|2] select internal tables
|
||||||
use_length use the pattern's length
|
|
||||||
|
|
||||||
The effects of these modifiers are described in the following sections.
|
The effects of these modifiers are described in the following sections.
|
||||||
FIXME: Give more examples.
|
FIXME: Give more examples.
|
||||||
|
@ -480,17 +479,14 @@ PATTERN MODIFIERS
|
||||||
/ab 32 59/hex
|
/ab 32 59/hex
|
||||||
|
|
||||||
This feature is provided as a way of creating patterns that contain
|
This feature is provided as a way of creating patterns that contain
|
||||||
binary zero characters. When hex is set, it implies use_length.
|
binary zero characters. By default, pcre2test passes patterns as zero-
|
||||||
|
terminated strings to pcre2_compile(), giving the length as
|
||||||
Using the pattern's length
|
PCRE2_ZERO_TERMINATED. However, for patterns specified in hexadecimal,
|
||||||
|
the length of the pattern is passed.
|
||||||
By default, pcre2test passes patterns as zero-terminated strings to
|
|
||||||
pcre2_compile(), giving the length as -1. If use_length is set, the
|
|
||||||
length of the pattern is passed. This is implied if hex is set.
|
|
||||||
|
|
||||||
JIT compilation
|
JIT compilation
|
||||||
|
|
||||||
The /jit modifier may optionally be followed by and equals sign and a
|
The /jit modifier may optionally be followed by and equals sign and a
|
||||||
number in the range 0 to 7:
|
number in the range 0 to 7:
|
||||||
|
|
||||||
0 disable JIT
|
0 disable JIT
|
||||||
|
@ -501,23 +497,23 @@ PATTERN MODIFIERS
|
||||||
6 use JIT for soft and hard partial match
|
6 use JIT for soft and hard partial match
|
||||||
7 all three modes
|
7 all three modes
|
||||||
|
|
||||||
If no number is given, 7 is assumed. If JIT compilation is successful,
|
If no number is given, 7 is assumed. If JIT compilation is successful,
|
||||||
the compiled JIT code will automatically be used when pcre2_match() is
|
the compiled JIT code will automatically be used when pcre2_match() is
|
||||||
run for the appropriate type of match, except when incompatible run-
|
run for the appropriate type of match, except when incompatible run-
|
||||||
time options are specified. For more details, see the pcre2jit documen-
|
time options are specified. For more details, see the pcre2jit documen-
|
||||||
tation. See also the jitstack modifier below for a way of setting the
|
tation. See also the jitstack modifier below for a way of setting the
|
||||||
size of the JIT stack.
|
size of the JIT stack.
|
||||||
|
|
||||||
If the jitfast modifier is specified, matching is done using the JIT
|
If the jitfast modifier is specified, matching is done using the JIT
|
||||||
"fast path" interface (pcre2_jit_match()), which skips some of the san-
|
"fast path" interface (pcre2_jit_match()), which skips some of the san-
|
||||||
ity checks that are done by pcre2_match(), and of course does not work
|
ity checks that are done by pcre2_match(), and of course does not work
|
||||||
when JIT is not supported. If jitfast is specified without jit, jit=7
|
when JIT is not supported. If jitfast is specified without jit, jit=7
|
||||||
is assumed.
|
is assumed.
|
||||||
|
|
||||||
If the jitverify modifier is specified, information about the compiled
|
If the jitverify modifier is specified, information about the compiled
|
||||||
pattern shows whether JIT compilation was or was not successful. If
|
pattern shows whether JIT compilation was or was not successful. If
|
||||||
jitverify is specified without jit, jit=7 is assumed. If JIT compila-
|
jitverify is specified without jit, jit=7 is assumed. If JIT compila-
|
||||||
tion is successful when jitverify is set, the text "(JIT)" is added to
|
tion is successful when jitverify is set, the text "(JIT)" is added to
|
||||||
the first output line after a match or non match when JIT-compiled code
|
the first output line after a match or non match when JIT-compiled code
|
||||||
was actually used.
|
was actually used.
|
||||||
|
|
||||||
|
@ -528,33 +524,33 @@ PATTERN MODIFIERS
|
||||||
/pattern/locale=fr_FR
|
/pattern/locale=fr_FR
|
||||||
|
|
||||||
The given locale is set, pcre2_maketables() is called to build a set of
|
The given locale is set, pcre2_maketables() is called to build a set of
|
||||||
character tables for the locale, and this is then passed to pcre2_com-
|
character tables for the locale, and this is then passed to pcre2_com-
|
||||||
pile() when compiling the regular expression. The same tables are used
|
pile() when compiling the regular expression. The same tables are used
|
||||||
when matching the following subject lines. The /locale modifier applies
|
when matching the following subject lines. The /locale modifier applies
|
||||||
only to the pattern on which it appears, but can be given in a #pattern
|
only to the pattern on which it appears, but can be given in a #pattern
|
||||||
command if a default is needed. Setting a locale and alternate charac-
|
command if a default is needed. Setting a locale and alternate charac-
|
||||||
ter tables are mutually exclusive.
|
ter tables are mutually exclusive.
|
||||||
|
|
||||||
Showing pattern memory
|
Showing pattern memory
|
||||||
|
|
||||||
The /memory modifier causes the size in bytes of the memory block used
|
The /memory modifier causes the size in bytes of the memory block used
|
||||||
to hold the compiled pattern to be output. This does not include the
|
to hold the compiled pattern to be output. This does not include the
|
||||||
size of the pcre2_code block; it is just the actual compiled data. If
|
size of the pcre2_code block; it is just the actual compiled data. If
|
||||||
the pattern is subsequently passed to the JIT compiler, the size of the
|
the pattern is subsequently passed to the JIT compiler, the size of the
|
||||||
JIT compiled code is also output.
|
JIT compiled code is also output.
|
||||||
|
|
||||||
Limiting nested parentheses
|
Limiting nested parentheses
|
||||||
|
|
||||||
The parens_nest_limit modifier sets a limit on the depth of nested
|
The parens_nest_limit modifier sets a limit on the depth of nested
|
||||||
parentheses in a pattern. Breaching the limit causes a compilation
|
parentheses in a pattern. Breaching the limit causes a compilation
|
||||||
error. The default for the library is set when PCRE2 is built, but
|
error. The default for the library is set when PCRE2 is built, but
|
||||||
pcre2test sets its own default of 220, which is required for running
|
pcre2test sets its own default of 220, which is required for running
|
||||||
the standard test suite.
|
the standard test suite.
|
||||||
|
|
||||||
Using the POSIX wrapper API
|
Using the POSIX wrapper API
|
||||||
|
|
||||||
The /posix modifier causes pcre2test to call PCRE2 via the POSIX wrap-
|
The /posix modifier causes pcre2test to call PCRE2 via the POSIX wrap-
|
||||||
per API rather than its native API. This supports only the 8-bit
|
per API rather than its native API. This supports only the 8-bit
|
||||||
library. When the POSIX API is being used, the following pattern modi-
|
library. When the POSIX API is being used, the following pattern modi-
|
||||||
fiers set options for the regcomp() function:
|
fiers set options for the regcomp() function:
|
||||||
|
|
||||||
|
@ -566,25 +562,25 @@ PATTERN MODIFIERS
|
||||||
ucp REG_UCP ) the POSIX standard
|
ucp REG_UCP ) the POSIX standard
|
||||||
utf REG_UTF8 )
|
utf REG_UTF8 )
|
||||||
|
|
||||||
The aftertext and allaftertext subject modifiers work as described
|
The aftertext and allaftertext subject modifiers work as described
|
||||||
below. All other modifiers cause an error.
|
below. All other modifiers cause an error.
|
||||||
|
|
||||||
Testing the stack guard feature
|
Testing the stack guard feature
|
||||||
|
|
||||||
The /stackguard modifier is used to test the use of pcre2_set_com-
|
The /stackguard modifier is used to test the use of pcre2_set_com-
|
||||||
pile_recursion_guard(), a function that is provided to enable stack
|
pile_recursion_guard(), a function that is provided to enable stack
|
||||||
availability to be checked during compilation (see the pcre2api docu-
|
availability to be checked during compilation (see the pcre2api docu-
|
||||||
mentation for details). If the number specified by the modifier is
|
mentation for details). If the number specified by the modifier is
|
||||||
greater than zero, pcre2_set_compile_recursion_guard() is called to set
|
greater than zero, pcre2_set_compile_recursion_guard() is called to set
|
||||||
up callback from pcre2_compile() to a local function. The argument it
|
up callback from pcre2_compile() to a local function. The argument it
|
||||||
is passed is the current nesting parenthesis depth; if this is greater
|
is passed is the current nesting parenthesis depth; if this is greater
|
||||||
than the value given by the modifier, non-zero is returned, causing the
|
than the value given by the modifier, non-zero is returned, causing the
|
||||||
compilation to be aborted.
|
compilation to be aborted.
|
||||||
|
|
||||||
Using alternative character tables
|
Using alternative character tables
|
||||||
|
|
||||||
The /tables modifier must be followed by a single digit. It causes a
|
The /tables modifier must be followed by a single digit. It causes a
|
||||||
specific set of built-in character tables to be passed to pcre2_com-
|
specific set of built-in character tables to be passed to pcre2_com-
|
||||||
pile(). This is used in the PCRE2 tests to check behaviour with differ-
|
pile(). This is used in the PCRE2 tests to check behaviour with differ-
|
||||||
ent character tables. The digit specifies the tables as follows:
|
ent character tables. The digit specifies the tables as follows:
|
||||||
|
|
||||||
|
@ -593,15 +589,15 @@ PATTERN MODIFIERS
|
||||||
pcre2_chartables.c.dist
|
pcre2_chartables.c.dist
|
||||||
2 a set of tables defining ISO 8859 characters
|
2 a set of tables defining ISO 8859 characters
|
||||||
|
|
||||||
In table 2, some characters whose codes are greater than 128 are iden-
|
In table 2, some characters whose codes are greater than 128 are iden-
|
||||||
tified as letters, digits, spaces, etc. Setting alternate character
|
tified as letters, digits, spaces, etc. Setting alternate character
|
||||||
tables and a locale are mutually exclusive.
|
tables and a locale are mutually exclusive.
|
||||||
|
|
||||||
Setting certain match controls
|
Setting certain match controls
|
||||||
|
|
||||||
The following modifiers are really subject modifiers, and are described
|
The following modifiers are really subject modifiers, and are described
|
||||||
below. However, they may be included in a pattern's modifier list, in
|
below. However, they may be included in a pattern's modifier list, in
|
||||||
which case they are applied to every subject line that is processed
|
which case they are applied to every subject line that is processed
|
||||||
with that pattern. They do not affect the compilation process.
|
with that pattern. They do not affect the compilation process.
|
||||||
|
|
||||||
aftertext show text after match
|
aftertext show text after match
|
||||||
|
@ -612,7 +608,7 @@ PATTERN MODIFIERS
|
||||||
mark show mark values
|
mark show mark values
|
||||||
startchar show starting character when relevant
|
startchar show starting character when relevant
|
||||||
|
|
||||||
These modifiers may not appear in a #pattern command. If you want them
|
These modifiers may not appear in a #pattern command. If you want them
|
||||||
as defaults, set them in a #subject command.
|
as defaults, set them in a #subject command.
|
||||||
|
|
||||||
|
|
||||||
|
@ -623,7 +619,7 @@ SUBJECT MODIFIERS
|
||||||
|
|
||||||
Setting match options
|
Setting match options
|
||||||
|
|
||||||
The following modifiers set options for pcre2_match() or
|
The following modifiers set options for pcre2_match() or
|
||||||
pcre2_dfa_match(). See pcreapi for a description of their effects.
|
pcre2_dfa_match(). See pcreapi for a description of their effects.
|
||||||
|
|
||||||
anchored set PCRE2_ANCHORED
|
anchored set PCRE2_ANCHORED
|
||||||
|
@ -637,20 +633,20 @@ SUBJECT MODIFIERS
|
||||||
partial_hard (or ph) set PCRE2_PARTIAL_HARD
|
partial_hard (or ph) set PCRE2_PARTIAL_HARD
|
||||||
partial_soft (or ps) set PCRE2_PARTIAL_SOFT
|
partial_soft (or ps) set PCRE2_PARTIAL_SOFT
|
||||||
|
|
||||||
The partial matching modifiers are provided with abbreviations because
|
The partial matching modifiers are provided with abbreviations because
|
||||||
they appear frequently in tests.
|
they appear frequently in tests.
|
||||||
|
|
||||||
If the /posix modifier was present on the pattern, causing the POSIX
|
If the /posix modifier was present on the pattern, causing the POSIX
|
||||||
wrapper API to be used, the only option-setting modifiers that have any
|
wrapper API to be used, the only option-setting modifiers that have any
|
||||||
effect are notbol, notempty, and noteol, causing REG_NOTBOL,
|
effect are notbol, notempty, and noteol, causing REG_NOTBOL,
|
||||||
REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to regexec().
|
REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to regexec().
|
||||||
Any other modifiers cause an error.
|
Any other modifiers cause an error.
|
||||||
|
|
||||||
Setting match controls
|
Setting match controls
|
||||||
|
|
||||||
The following modifiers affect the matching process or request addi-
|
The following modifiers affect the matching process or request addi-
|
||||||
tional information. Some of them may also be specified on a pattern
|
tional information. Some of them may also be specified on a pattern
|
||||||
line (see above), in which case they apply to every subject line that
|
line (see above), in which case they apply to every subject line that
|
||||||
is matched against that pattern.
|
is matched against that pattern.
|
||||||
|
|
||||||
aftertext show text after match
|
aftertext show text after match
|
||||||
|
@ -676,29 +672,30 @@ SUBJECT MODIFIERS
|
||||||
ovector=<n> set size of output vector
|
ovector=<n> set size of output vector
|
||||||
recursion_limit=<n> set a recursion limit
|
recursion_limit=<n> set a recursion limit
|
||||||
startchar show startchar when relevant
|
startchar show startchar when relevant
|
||||||
|
zero_terminate pass the subject as zero-terminated
|
||||||
|
|
||||||
The effects of these modifiers are described in the following sections.
|
The effects of these modifiers are described in the following sections.
|
||||||
FIXME: Give more examples.
|
FIXME: Give more examples.
|
||||||
|
|
||||||
Showing more text
|
Showing more text
|
||||||
|
|
||||||
The aftertext modifier requests that as well as outputting the sub-
|
The aftertext modifier requests that as well as outputting the sub-
|
||||||
string that matched the entire pattern, pcre2test should in addition
|
string that matched the entire pattern, pcre2test should in addition
|
||||||
output the remainder of the subject string. This is useful for tests
|
output the remainder of the subject string. This is useful for tests
|
||||||
where the subject contains multiple copies of the same substring. The
|
where the subject contains multiple copies of the same substring. The
|
||||||
allaftertext modifier requests the same action for captured substrings
|
allaftertext modifier requests the same action for captured substrings
|
||||||
as well as the main matched substring. In each case the remainder is
|
as well as the main matched substring. In each case the remainder is
|
||||||
output on the following line with a plus character following the cap-
|
output on the following line with a plus character following the cap-
|
||||||
ture number.
|
ture number.
|
||||||
|
|
||||||
The allusedtext modifier requests that all the text that was consulted
|
The allusedtext modifier requests that all the text that was consulted
|
||||||
during a successful pattern match by the interpreter should be shown.
|
during a successful pattern match by the interpreter should be shown.
|
||||||
This feature is not supported for JIT matching, and if requested with
|
This feature is not supported for JIT matching, and if requested with
|
||||||
JIT it is ignored (with a warning message). Setting this modifier
|
JIT it is ignored (with a warning message). Setting this modifier
|
||||||
affects the output if there is a lookbehind at the start of a match, or
|
affects the output if there is a lookbehind at the start of a match, or
|
||||||
a lookahead at the end, or if \K is used in the pattern. Characters
|
a lookahead at the end, or if \K is used in the pattern. Characters
|
||||||
that precede or follow the start and end of the actual match are indi-
|
that precede or follow the start and end of the actual match are indi-
|
||||||
cated in the output by '<' or '>' characters underneath them. Here is
|
cated in the output by '<' or '>' characters underneath them. Here is
|
||||||
an example:
|
an example:
|
||||||
|
|
||||||
re> /(?<=pqr)abc(?=xyz)/
|
re> /(?<=pqr)abc(?=xyz)/
|
||||||
|
@ -706,15 +703,15 @@ SUBJECT MODIFIERS
|
||||||
0: pqrabcxyz
|
0: pqrabcxyz
|
||||||
<<< >>>
|
<<< >>>
|
||||||
|
|
||||||
This shows that the matched string is "abc", with the preceding and
|
This shows that the matched string is "abc", with the preceding and
|
||||||
following strings "pqr" and "xyz" also consulted during the match.
|
following strings "pqr" and "xyz" also consulted during the match.
|
||||||
|
|
||||||
The startchar modifier requests that the starting character for the
|
The startchar modifier requests that the starting character for the
|
||||||
match be indicated, if it is different to the start of the matched
|
match be indicated, if it is different to the start of the matched
|
||||||
string. The only time when this occurs is when \K has been processed as
|
string. The only time when this occurs is when \K has been processed as
|
||||||
part of the match. In this situation, the output for the matched string
|
part of the match. In this situation, the output for the matched string
|
||||||
is displayed from the starting character instead of from the match
|
is displayed from the starting character instead of from the match
|
||||||
point, with circumflex characters under the earlier characters. For
|
point, with circumflex characters under the earlier characters. For
|
||||||
example:
|
example:
|
||||||
|
|
||||||
re> /abc\Kxyz/
|
re> /abc\Kxyz/
|
||||||
|
@ -722,7 +719,7 @@ SUBJECT MODIFIERS
|
||||||
0: abcxyz
|
0: abcxyz
|
||||||
^^^
|
^^^
|
||||||
|
|
||||||
Unlike allusedtext, the startchar modifier can be used with JIT. How-
|
Unlike allusedtext, the startchar modifier can be used with JIT. How-
|
||||||
ever, these two modifiers are mutually exclusive.
|
ever, these two modifiers are mutually exclusive.
|
||||||
|
|
||||||
Showing the value of all capture groups
|
Showing the value of all capture groups
|
||||||
|
@ -730,171 +727,183 @@ SUBJECT MODIFIERS
|
||||||
The allcaptures modifier requests that the values of all potential cap-
|
The allcaptures modifier requests that the values of all potential cap-
|
||||||
tured parentheses be output after a match. By default, only those up to
|
tured parentheses be output after a match. By default, only those up to
|
||||||
the highest one actually used in the match are output (corresponding to
|
the highest one actually used in the match are output (corresponding to
|
||||||
the return code from pcre2_match()). Groups that did not take part in
|
the return code from pcre2_match()). Groups that did not take part in
|
||||||
the match are output as "<unset>".
|
the match are output as "<unset>".
|
||||||
|
|
||||||
Testing callouts
|
Testing callouts
|
||||||
|
|
||||||
A callout function is supplied when pcre2test calls the library match-
|
A callout function is supplied when pcre2test calls the library match-
|
||||||
ing functions, unless callout_none is specified. If callout_capture is
|
ing functions, unless callout_none is specified. If callout_capture is
|
||||||
set, the current captured groups are output when a callout occurs.
|
set, the current captured groups are output when a callout occurs.
|
||||||
|
|
||||||
The callout_fail modifier can be given one or two numbers. If there is
|
The callout_fail modifier can be given one or two numbers. If there is
|
||||||
only one number, 1 is returned instead of 0 when a callout of that num-
|
only one number, 1 is returned instead of 0 when a callout of that num-
|
||||||
ber is reached. If two numbers are given, 1 is returned when callout
|
ber is reached. If two numbers are given, 1 is returned when callout
|
||||||
<n> is reached for the <m>th time.
|
<n> is reached for the <m>th time.
|
||||||
|
|
||||||
The callout_data modifier can be given an unsigned or a negative num-
|
The callout_data modifier can be given an unsigned or a negative num-
|
||||||
ber. Any value other than zero is used as a return from pcre2test's
|
ber. Any value other than zero is used as a return from pcre2test's
|
||||||
callout function.
|
callout function.
|
||||||
|
|
||||||
Testing substring extraction functions
|
Testing substring extraction functions
|
||||||
|
|
||||||
The copy and get modifiers can be used to test the pcre2_sub-
|
The copy and get modifiers can be used to test the pcre2_sub-
|
||||||
string_copy_xxx() and pcre2_substring_get_xxx() functions. They can be
|
string_copy_xxx() and pcre2_substring_get_xxx() functions. They can be
|
||||||
given more than once, and each can specify a group name or number, for
|
given more than once, and each can specify a group name or number, for
|
||||||
example:
|
example:
|
||||||
|
|
||||||
abcd\=copy=1,copy=3,get=G1
|
abcd\=copy=1,copy=3,get=G1
|
||||||
|
|
||||||
If the #subject command is used to set default copy and get lists,
|
If the #subject command is used to set default copy and get lists,
|
||||||
these can be unset by specifying a negative number for numbered groups
|
these can be unset by specifying a negative number for numbered groups
|
||||||
and an empty name for named groups.
|
and an empty name for named groups.
|
||||||
|
|
||||||
The getall modifier tests pcre2_substring_list_get(), which extracts
|
The getall modifier tests pcre2_substring_list_get(), which extracts
|
||||||
all captured substrings.
|
all captured substrings.
|
||||||
|
|
||||||
If the subject line is successfully matched, the substrings extracted
|
If the subject line is successfully matched, the substrings extracted
|
||||||
by the convenience functions are output with C, G, or L after the
|
by the convenience functions are output with C, G, or L after the
|
||||||
string number instead of a colon. This is in addition to the normal
|
string number instead of a colon. This is in addition to the normal
|
||||||
full list. The string length (that is, the return from the extraction
|
full list. The string length (that is, the return from the extraction
|
||||||
function) is given in parentheses after each substring.
|
function) is given in parentheses after each substring.
|
||||||
|
|
||||||
Finding all matches in a string
|
Finding all matches in a string
|
||||||
|
|
||||||
Searching for all possible matches within a subject can be requested by
|
Searching for all possible matches within a subject can be requested by
|
||||||
the global or /altglobal modifier. After finding a match, the matching
|
the global or /altglobal modifier. After finding a match, the matching
|
||||||
function is called again to search the remainder of the subject. The
|
function is called again to search the remainder of the subject. The
|
||||||
difference between global and altglobal is that the former uses the
|
difference between global and altglobal is that the former uses the
|
||||||
start_offset argument to pcre2_match() or pcre2_dfa_match() to start
|
start_offset argument to pcre2_match() or pcre2_dfa_match() to start
|
||||||
searching at a new point within the entire string (which is what Perl
|
searching at a new point within the entire string (which is what Perl
|
||||||
does), whereas the latter passes over a shortened substring. This makes
|
does), whereas the latter passes over a shortened substring. This makes
|
||||||
a difference to the matching process if the pattern begins with a look-
|
a difference to the matching process if the pattern begins with a look-
|
||||||
behind assertion (including \b or \B).
|
behind assertion (including \b or \B).
|
||||||
|
|
||||||
If an empty string is matched, the next match is done with the
|
If an empty string is matched, the next match is done with the
|
||||||
PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set, in order to search
|
PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set, in order to search
|
||||||
for another, non-empty, match at the same point in the subject. If this
|
for another, non-empty, match at the same point in the subject. If this
|
||||||
match fails, the start offset is advanced, and the normal match is
|
match fails, the start offset is advanced, and the normal match is
|
||||||
retried. This imitates the way Perl handles such cases when using the
|
retried. This imitates the way Perl handles such cases when using the
|
||||||
/g modifier or the split() function. Normally, the start offset is
|
/g modifier or the split() function. Normally, the start offset is
|
||||||
advanced by one character, but if the newline convention recognizes
|
advanced by one character, but if the newline convention recognizes
|
||||||
CRLF as a newline, and the current character is CR followed by LF, an
|
CRLF as a newline, and the current character is CR followed by LF, an
|
||||||
advance of two is used.
|
advance of two is used.
|
||||||
|
|
||||||
Setting the JIT stack size
|
Setting the JIT stack size
|
||||||
|
|
||||||
The jitstack modifier provides a way of setting the maximum stack size
|
The jitstack modifier provides a way of setting the maximum stack size
|
||||||
that is used by the just-in-time optimization code. It is ignored if
|
that is used by the just-in-time optimization code. It is ignored if
|
||||||
JIT optimization is not being used. The value is a number of kilobytes.
|
JIT optimization is not being used. The value is a number of kilobytes.
|
||||||
Providing a stack that is larger than the default 32K is necessary only
|
Providing a stack that is larger than the default 32K is necessary only
|
||||||
for very complicated patterns.
|
for very complicated patterns.
|
||||||
|
|
||||||
Setting match and recursion limits
|
Setting match and recursion limits
|
||||||
|
|
||||||
The match_limit and recursion_limit modifiers set the appropriate lim-
|
The match_limit and recursion_limit modifiers set the appropriate lim-
|
||||||
its in the match context. These values are ignored when the find_limits
|
its in the match context. These values are ignored when the find_limits
|
||||||
modifier is specified.
|
modifier is specified.
|
||||||
|
|
||||||
Finding minimum limits
|
Finding minimum limits
|
||||||
|
|
||||||
If the find_limits modifier is present, pcre2test calls pcre2_match()
|
If the find_limits modifier is present, pcre2test calls pcre2_match()
|
||||||
several times, setting different values in the match context via
|
several times, setting different values in the match context via
|
||||||
pcre2_set_match_limit() and pcre2_set_recursion_limit() until it finds
|
pcre2_set_match_limit() and pcre2_set_recursion_limit() until it finds
|
||||||
the minimum values for each parameter that allow pcre2_match() to com-
|
the minimum values for each parameter that allow pcre2_match() to com-
|
||||||
plete without error.
|
plete without error.
|
||||||
|
|
||||||
If JIT is being used, only the match limit is relevant. If DFA matching
|
If JIT is being used, only the match limit is relevant. If DFA matching
|
||||||
is being used, neither limit is relevant, and this modifier is ignored
|
is being used, neither limit is relevant, and this modifier is ignored
|
||||||
(with a warning message).
|
(with a warning message).
|
||||||
|
|
||||||
The match_limit number is a measure of the amount of backtracking that
|
The match_limit number is a measure of the amount of backtracking that
|
||||||
takes place, and learning the minimum value can be instructive. For
|
takes place, and learning the minimum value can be instructive. For
|
||||||
most simple matches, the number is quite small, but for patterns with
|
most simple matches, the number is quite small, but for patterns with
|
||||||
very large numbers of matching possibilities, it can become large very
|
very large numbers of matching possibilities, it can become large very
|
||||||
quickly with increasing length of subject string. The
|
quickly with increasing length of subject string. The
|
||||||
match_limit_recursion number is a measure of how much stack (or, if
|
match_limit_recursion number is a measure of how much stack (or, if
|
||||||
PCRE2 is compiled with NO_RECURSE, how much heap) memory is needed to
|
PCRE2 is compiled with NO_RECURSE, how much heap) memory is needed to
|
||||||
complete the match attempt.
|
complete the match attempt.
|
||||||
|
|
||||||
Showing MARK names
|
Showing MARK names
|
||||||
|
|
||||||
|
|
||||||
The mark modifier causes the names from backtracking control verbs that
|
The mark modifier causes the names from backtracking control verbs that
|
||||||
are returned from calls to pcre2_match() to be displayed. If a mark is
|
are returned from calls to pcre2_match() to be displayed. If a mark is
|
||||||
returned for a match, non-match, or partial match, pcre2test shows it.
|
returned for a match, non-match, or partial match, pcre2test shows it.
|
||||||
For a match, it is on a line by itself, tagged with "MK:". Otherwise,
|
For a match, it is on a line by itself, tagged with "MK:". Otherwise,
|
||||||
it is added to the non-match message.
|
it is added to the non-match message.
|
||||||
|
|
||||||
Showing memory usage
|
Showing memory usage
|
||||||
|
|
||||||
The memory modifier causes pcre2test to log all memory allocation and
|
The memory modifier causes pcre2test to log all memory allocation and
|
||||||
freeing calls that occur during a match operation.
|
freeing calls that occur during a match operation.
|
||||||
|
|
||||||
Setting a starting offset
|
Setting a starting offset
|
||||||
|
|
||||||
The offset modifier sets an offset in the subject string at which
|
The offset modifier sets an offset in the subject string at which
|
||||||
matching starts. Its value is a number of code units, not characters.
|
matching starts. Its value is a number of code units, not characters.
|
||||||
|
|
||||||
Setting the size of the output vector
|
Setting the size of the output vector
|
||||||
|
|
||||||
The ovector modifier applies only to the subject line in which it
|
The ovector modifier applies only to the subject line in which it
|
||||||
appears, though of course it can also be used to set a default in a
|
appears, though of course it can also be used to set a default in a
|
||||||
#subject command. It specifies the number of pairs of offsets that are
|
#subject command. It specifies the number of pairs of offsets that are
|
||||||
available for storing matching information. The default is 15.
|
available for storing matching information. The default is 15.
|
||||||
|
|
||||||
A value of zero is useful when testing the POSIX API because it causes
|
A value of zero is useful when testing the POSIX API because it causes
|
||||||
regexec() to be called with a NULL capture vector. When not testing the
|
regexec() to be called with a NULL capture vector. When not testing the
|
||||||
POSIX API, a value of zero is used to cause pcre2_match_data_cre-
|
POSIX API, a value of zero is used to cause pcre2_match_data_cre-
|
||||||
ate_from_pattern to be called, in order to create a match block of
|
ate_from_pattern to be called, in order to create a match block of
|
||||||
exactly the right size for the pattern. (It is not possible to create a
|
exactly the right size for the pattern. (It is not possible to create a
|
||||||
match block with a zero-length ovector; there is always one pair of
|
match block with a zero-length ovector; there is always one pair of
|
||||||
offsets.)
|
offsets.)
|
||||||
|
|
||||||
|
Passing the subject as zero-terminated
|
||||||
|
|
||||||
|
By default, the subject string is passed to a native API matching func-
|
||||||
|
tion with its correct length. In order to test the facility for passing
|
||||||
|
a zero-terminated string, the zero_terminate modifier is provided. It
|
||||||
|
causes the length to be passed as PCRE2_ZERO_TERMINATED. (When matching
|
||||||
|
via the POSIX interface, this modifier has no effect, as there is no
|
||||||
|
facility for passing a length.)
|
||||||
|
|
||||||
|
When testing pcre2_substitute, this modifier also has the effect of
|
||||||
|
passing the replacement string as zero-terminated.
|
||||||
|
|
||||||
|
|
||||||
THE ALTERNATIVE MATCHING FUNCTION
|
THE ALTERNATIVE MATCHING FUNCTION
|
||||||
|
|
||||||
By default, pcre2test uses the standard PCRE2 matching function,
|
By default, pcre2test uses the standard PCRE2 matching function,
|
||||||
pcre2_match() to match each subject line. PCRE2 also supports an alter-
|
pcre2_match() to match each subject line. PCRE2 also supports an alter-
|
||||||
native matching function, pcre2_dfa_match(), which operates in a dif-
|
native matching function, pcre2_dfa_match(), which operates in a dif-
|
||||||
ferent way, and has some restrictions. The differences between the two
|
ferent way, and has some restrictions. The differences between the two
|
||||||
functions are described in the pcre2matching documentation.
|
functions are described in the pcre2matching documentation.
|
||||||
|
|
||||||
If the dfa modifier is set, the alternative matching function is used.
|
If the dfa modifier is set, the alternative matching function is used.
|
||||||
This function finds all possible matches at a given point in the sub-
|
This function finds all possible matches at a given point in the sub-
|
||||||
ject. If, however, the dfa_shortest modifier is set, processing stops
|
ject. If, however, the dfa_shortest modifier is set, processing stops
|
||||||
after the first match is found. This is always the shortest possible
|
after the first match is found. This is always the shortest possible
|
||||||
match.
|
match.
|
||||||
|
|
||||||
|
|
||||||
DEFAULT OUTPUT FROM pcre2test
|
DEFAULT OUTPUT FROM pcre2test
|
||||||
|
|
||||||
This section describes the output when the normal matching function,
|
This section describes the output when the normal matching function,
|
||||||
pcre2_match(), is being used.
|
pcre2_match(), is being used.
|
||||||
|
|
||||||
When a match succeeds, pcre2test outputs the list of captured sub-
|
When a match succeeds, pcre2test outputs the list of captured sub-
|
||||||
strings, starting with number 0 for the string that matched the whole
|
strings, starting with number 0 for the string that matched the whole
|
||||||
pattern. Otherwise, it outputs "No match" when the return is
|
pattern. Otherwise, it outputs "No match" when the return is
|
||||||
PCRE2_ERROR_NOMATCH, or "Partial match:" followed by the partially
|
PCRE2_ERROR_NOMATCH, or "Partial match:" followed by the partially
|
||||||
matching substring when the return is PCRE2_ERROR_PARTIAL. (Note that
|
matching substring when the return is PCRE2_ERROR_PARTIAL. (Note that
|
||||||
this is the entire substring that was inspected during the partial
|
this is the entire substring that was inspected during the partial
|
||||||
match; it may include characters before the actual match start if a
|
match; it may include characters before the actual match start if a
|
||||||
lookbehind assertion, \K, \b, or \B was involved.)
|
lookbehind assertion, \K, \b, or \B was involved.)
|
||||||
|
|
||||||
For any other return, pcre2test outputs the PCRE2 negative error number
|
For any other return, pcre2test outputs the PCRE2 negative error number
|
||||||
and a short descriptive phrase. If the error is a failed UTF string
|
and a short descriptive phrase. If the error is a failed UTF string
|
||||||
check, the offset of the start of the failing character and the reason
|
check, the offset of the start of the failing character and the reason
|
||||||
code are also output. Here is an example of an interactive pcre2test
|
code are also output. Here is an example of an interactive pcre2test
|
||||||
run.
|
run.
|
||||||
|
|
||||||
$ pcre2test
|
$ pcre2test
|
||||||
|
@ -908,10 +917,10 @@ DEFAULT OUTPUT FROM pcre2test
|
||||||
No match
|
No match
|
||||||
|
|
||||||
Unset capturing substrings that are not followed by one that is set are
|
Unset capturing substrings that are not followed by one that is set are
|
||||||
not returned by pcre2_match(), and are not shown by pcre2test. In the
|
not returned by pcre2_match(), and are not shown by pcre2test. In the
|
||||||
following example, there are two capturing substrings, but when the
|
following example, there are two capturing substrings, but when the
|
||||||
first data line is matched, the second, unset substring is not shown.
|
first data line is matched, the second, unset substring is not shown.
|
||||||
An "internal" unset substring is shown as "<unset>", as for the second
|
An "internal" unset substring is shown as "<unset>", as for the second
|
||||||
data line.
|
data line.
|
||||||
|
|
||||||
re> /(a)|(b)/
|
re> /(a)|(b)/
|
||||||
|
@ -923,11 +932,11 @@ DEFAULT OUTPUT FROM pcre2test
|
||||||
1: <unset>
|
1: <unset>
|
||||||
2: b
|
2: b
|
||||||
|
|
||||||
If the strings contain any non-printing characters, they are output as
|
If the strings contain any non-printing characters, they are output as
|
||||||
\xhh escapes if the value is less than 256 and UTF mode is not set.
|
\xhh escapes if the value is less than 256 and UTF mode is not set.
|
||||||
Otherwise they are output as \x{hh...} escapes. See below for the defi-
|
Otherwise they are output as \x{hh...} escapes. See below for the defi-
|
||||||
nition of non-printing characters. If the /aftertext modifier is set,
|
nition of non-printing characters. If the /aftertext modifier is set,
|
||||||
the output for substring 0 is followed by the the rest of the subject
|
the output for substring 0 is followed by the the rest of the subject
|
||||||
string, identified by "0+" like this:
|
string, identified by "0+" like this:
|
||||||
|
|
||||||
re> /cat/aftertext
|
re> /cat/aftertext
|
||||||
|
@ -935,7 +944,7 @@ DEFAULT OUTPUT FROM pcre2test
|
||||||
0: cat
|
0: cat
|
||||||
0+ aract
|
0+ aract
|
||||||
|
|
||||||
If global matching is requested, the results of successive matching
|
If global matching is requested, the results of successive matching
|
||||||
attempts are output in sequence, like this:
|
attempts are output in sequence, like this:
|
||||||
|
|
||||||
re> /\Bi(\w\w)/g
|
re> /\Bi(\w\w)/g
|
||||||
|
@ -947,8 +956,8 @@ DEFAULT OUTPUT FROM pcre2test
|
||||||
0: ipp
|
0: ipp
|
||||||
1: pp
|
1: pp
|
||||||
|
|
||||||
"No match" is output only if the first match attempt fails. Here is an
|
"No match" is output only if the first match attempt fails. Here is an
|
||||||
example of a failure message (the offset 4 that is specified by \>4 is
|
example of a failure message (the offset 4 that is specified by \>4 is
|
||||||
past the end of the subject string):
|
past the end of the subject string):
|
||||||
|
|
||||||
re> /xyz/
|
re> /xyz/
|
||||||
|
@ -956,7 +965,7 @@ DEFAULT OUTPUT FROM pcre2test
|
||||||
Error -24 (bad offset value)
|
Error -24 (bad offset value)
|
||||||
|
|
||||||
Note that whereas patterns can be continued over several lines (a plain
|
Note that whereas patterns can be continued over several lines (a plain
|
||||||
">" prompt is used for continuations), subject lines may not. However
|
">" prompt is used for continuations), subject lines may not. However
|
||||||
newlines can be included in a subject by means of the \n escape (or \r,
|
newlines can be included in a subject by means of the \n escape (or \r,
|
||||||
\r\n, etc., depending on the newline sequence setting).
|
\r\n, etc., depending on the newline sequence setting).
|
||||||
|
|
||||||
|
@ -964,7 +973,7 @@ DEFAULT OUTPUT FROM pcre2test
|
||||||
OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION
|
OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION
|
||||||
|
|
||||||
When the alternative matching function, pcre2_dfa_match(), is used, the
|
When the alternative matching function, pcre2_dfa_match(), is used, the
|
||||||
output consists of a list of all the matches that start at the first
|
output consists of a list of all the matches that start at the first
|
||||||
point in the subject where there is at least one match. For example:
|
point in the subject where there is at least one match. For example:
|
||||||
|
|
||||||
re> /(tang|tangerine|tan)/
|
re> /(tang|tangerine|tan)/
|
||||||
|
@ -973,11 +982,11 @@ OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION
|
||||||
1: tang
|
1: tang
|
||||||
2: tan
|
2: tan
|
||||||
|
|
||||||
(Using the normal matching function on this data finds only "tang".)
|
(Using the normal matching function on this data finds only "tang".)
|
||||||
The longest matching string is always given first (and numbered zero).
|
The longest matching string is always given first (and numbered zero).
|
||||||
After a PCRE2_ERROR_PARTIAL return, the output is "Partial match:",
|
After a PCRE2_ERROR_PARTIAL return, the output is "Partial match:",
|
||||||
followed by the partially matching substring. (Note that this is the
|
followed by the partially matching substring. (Note that this is the
|
||||||
entire substring that was inspected during the partial match; it may
|
entire substring that was inspected during the partial match; it may
|
||||||
include characters before the actual match start if a lookbehind asser-
|
include characters before the actual match start if a lookbehind asser-
|
||||||
tion, \K, \b, or \B was involved.)
|
tion, \K, \b, or \B was involved.)
|
||||||
|
|
||||||
|
@ -993,16 +1002,16 @@ OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION
|
||||||
1: tan
|
1: tan
|
||||||
0: tan
|
0: tan
|
||||||
|
|
||||||
The alternative matching function does not support substring capture,
|
The alternative matching function does not support substring capture,
|
||||||
so the modifiers that are concerned with captured substrings are not
|
so the modifiers that are concerned with captured substrings are not
|
||||||
relevant.
|
relevant.
|
||||||
|
|
||||||
|
|
||||||
RESTARTING AFTER A PARTIAL MATCH
|
RESTARTING AFTER A PARTIAL MATCH
|
||||||
|
|
||||||
When the alternative matching function has given the PCRE2_ERROR_PAR-
|
When the alternative matching function has given the PCRE2_ERROR_PAR-
|
||||||
TIAL return, indicating that the subject partially matched the pattern,
|
TIAL return, indicating that the subject partially matched the pattern,
|
||||||
you can restart the match with additional subject data by means of the
|
you can restart the match with additional subject data by means of the
|
||||||
dfa_restart modifier. For example:
|
dfa_restart modifier. For example:
|
||||||
|
|
||||||
re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
|
re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
|
||||||
|
@ -1011,29 +1020,29 @@ RESTARTING AFTER A PARTIAL MATCH
|
||||||
data> n05\=dfa,dfa_restart
|
data> n05\=dfa,dfa_restart
|
||||||
0: n05
|
0: n05
|
||||||
|
|
||||||
For further information about partial matching, see the pcre2partial
|
For further information about partial matching, see the pcre2partial
|
||||||
documentation.
|
documentation.
|
||||||
|
|
||||||
|
|
||||||
CALLOUTS
|
CALLOUTS
|
||||||
|
|
||||||
If the pattern contains any callout requests, pcre2test's callout func-
|
If the pattern contains any callout requests, pcre2test's callout func-
|
||||||
tion is called during matching. This works with both matching func-
|
tion is called during matching. This works with both matching func-
|
||||||
tions. By default, the called function displays the callout number, the
|
tions. By default, the called function displays the callout number, the
|
||||||
start and current positions in the text at the callout time, and the
|
start and current positions in the text at the callout time, and the
|
||||||
next pattern item to be tested. For example:
|
next pattern item to be tested. For example:
|
||||||
|
|
||||||
--->pqrabcdef
|
--->pqrabcdef
|
||||||
0 ^ ^ \d
|
0 ^ ^ \d
|
||||||
|
|
||||||
This output indicates that callout number 0 occurred for a match
|
This output indicates that callout number 0 occurred for a match
|
||||||
attempt starting at the fourth character of the subject string, when
|
attempt starting at the fourth character of the subject string, when
|
||||||
the pointer was at the seventh character, and when the next pattern
|
the pointer was at the seventh character, and when the next pattern
|
||||||
item was \d. Just one circumflex is output if the start and current
|
item was \d. Just one circumflex is output if the start and current
|
||||||
positions are the same.
|
positions are the same.
|
||||||
|
|
||||||
Callouts numbered 255 are assumed to be automatic callouts, inserted as
|
Callouts numbered 255 are assumed to be automatic callouts, inserted as
|
||||||
a result of the /auto_callout pattern modifier. In this case, instead
|
a result of the /auto_callout pattern modifier. In this case, instead
|
||||||
of showing the callout number, the offset in the pattern, preceded by a
|
of showing the callout number, the offset in the pattern, preceded by a
|
||||||
plus, is output. For example:
|
plus, is output. For example:
|
||||||
|
|
||||||
|
@ -1047,7 +1056,7 @@ CALLOUTS
|
||||||
0: E*
|
0: E*
|
||||||
|
|
||||||
If a pattern contains (*MARK) items, an additional line is output when-
|
If a pattern contains (*MARK) items, an additional line is output when-
|
||||||
ever a change of latest mark is passed to the callout function. For
|
ever a change of latest mark is passed to the callout function. For
|
||||||
example:
|
example:
|
||||||
|
|
||||||
re> /a(*MARK:X)bc/auto_callout
|
re> /a(*MARK:X)bc/auto_callout
|
||||||
|
@ -1061,30 +1070,30 @@ CALLOUTS
|
||||||
+12 ^ ^
|
+12 ^ ^
|
||||||
0: abc
|
0: abc
|
||||||
|
|
||||||
The mark changes between matching "a" and "b", but stays the same for
|
The mark changes between matching "a" and "b", but stays the same for
|
||||||
the rest of the match, so nothing more is output. If, as a result of
|
the rest of the match, so nothing more is output. If, as a result of
|
||||||
backtracking, the mark reverts to being unset, the text "<unset>" is
|
backtracking, the mark reverts to being unset, the text "<unset>" is
|
||||||
output.
|
output.
|
||||||
|
|
||||||
The callout function in pcre2test returns zero (carry on matching) by
|
The callout function in pcre2test returns zero (carry on matching) by
|
||||||
default, but you can use a callout_fail modifier in a subject line (as
|
default, but you can use a callout_fail modifier in a subject line (as
|
||||||
described above) to change this and other parameters of the callout.
|
described above) to change this and other parameters of the callout.
|
||||||
|
|
||||||
Inserting callouts can be helpful when using pcre2test to check compli-
|
Inserting callouts can be helpful when using pcre2test to check compli-
|
||||||
cated regular expressions. For further information about callouts, see
|
cated regular expressions. For further information about callouts, see
|
||||||
the pcre2callout documentation.
|
the pcre2callout documentation.
|
||||||
|
|
||||||
|
|
||||||
NON-PRINTING CHARACTERS
|
NON-PRINTING CHARACTERS
|
||||||
|
|
||||||
When pcre2test is outputting text in the compiled version of a pattern,
|
When pcre2test is outputting text in the compiled version of a pattern,
|
||||||
bytes other than 32-126 are always treated as non-printing characters
|
bytes other than 32-126 are always treated as non-printing characters
|
||||||
and are therefore shown as hex escapes.
|
and are therefore shown as hex escapes.
|
||||||
|
|
||||||
When pcre2test is outputting text that is a matched part of a subject
|
When pcre2test is outputting text that is a matched part of a subject
|
||||||
string, it behaves in the same way, unless a different locale has been
|
string, it behaves in the same way, unless a different locale has been
|
||||||
set for the pattern (using the /locale modifier). In this case, the
|
set for the pattern (using the /locale modifier). In this case, the
|
||||||
isprint() function is used to distinguish printing and non-printing
|
isprint() function is used to distinguish printing and non-printing
|
||||||
characters.
|
characters.
|
||||||
|
|
||||||
|
|
||||||
|
@ -1103,5 +1112,5 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 02 November 2014
|
Last updated: 09 November 2014
|
||||||
Copyright (c) 1997-2014 University of Cambridge.
|
Copyright (c) 1997-2014 University of Cambridge.
|
||||||
|
|
|
@ -67,8 +67,8 @@ Arguments:
|
||||||
buffer where to put the substituted string
|
buffer where to put the substituted string
|
||||||
blength points to length of buffer; updated to length of string
|
blength points to length of buffer; updated to length of string
|
||||||
|
|
||||||
Returns: > 0 number of substitutions made
|
Returns: >= 0 number of substitutions made
|
||||||
< 0 an error code, including PCRE2_ERROR_NOMATCH if no match
|
< 0 an error code
|
||||||
PCRE2_ERROR_BADREPLACEMENT means invalid use of $
|
PCRE2_ERROR_BADREPLACEMENT means invalid use of $
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
@ -78,8 +78,8 @@ pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
|
||||||
pcre2_match_context *mcontext, PCRE2_SPTR replacement, PCRE2_SIZE rlength,
|
pcre2_match_context *mcontext, PCRE2_SPTR replacement, PCRE2_SIZE rlength,
|
||||||
PCRE2_UCHAR *buffer, PCRE2_SIZE *blength)
|
PCRE2_UCHAR *buffer, PCRE2_SIZE *blength)
|
||||||
{
|
{
|
||||||
int rc = 0;
|
int rc;
|
||||||
int subs = 0;
|
int subs;
|
||||||
uint32_t ovector_count;
|
uint32_t ovector_count;
|
||||||
uint32_t goptions = 0;
|
uint32_t goptions = 0;
|
||||||
BOOL match_data_created = FALSE;
|
BOOL match_data_created = FALSE;
|
||||||
|
@ -106,6 +106,21 @@ if (match_data == NULL)
|
||||||
ovector = pcre2_get_ovector_pointer(match_data);
|
ovector = pcre2_get_ovector_pointer(match_data);
|
||||||
ovector_count = pcre2_get_ovector_count(match_data);
|
ovector_count = pcre2_get_ovector_count(match_data);
|
||||||
|
|
||||||
|
/* Check UTF replacement string if necessary. */
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
|
if ((code->overall_options & PCRE2_UTF) != 0 &&
|
||||||
|
(options & PCRE2_NO_UTF_CHECK) == 0)
|
||||||
|
{
|
||||||
|
rc = PRIV(valid_utf)(replacement, rlength, &(match_data->rightchar));
|
||||||
|
if (rc != 0)
|
||||||
|
{
|
||||||
|
match_data->leftchar = 0;
|
||||||
|
goto EXIT;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
/* Notice the global option and remove it from the options that are passed to
|
/* Notice the global option and remove it from the options that are passed to
|
||||||
pcre2_match(). */
|
pcre2_match(). */
|
||||||
|
|
||||||
|
@ -129,6 +144,7 @@ lengthleft = *blength - start_offset;
|
||||||
|
|
||||||
/* Loop for global substituting. */
|
/* Loop for global substituting. */
|
||||||
|
|
||||||
|
subs = 0;
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
PCRE2_SIZE i;
|
PCRE2_SIZE i;
|
||||||
|
@ -273,6 +289,7 @@ buffer[buff_offset] = 0;
|
||||||
|
|
||||||
EXIT:
|
EXIT:
|
||||||
if (match_data_created) pcre2_match_data_free(match_data);
|
if (match_data_created) pcre2_match_data_free(match_data);
|
||||||
|
else match_data->rc = rc;
|
||||||
return rc;
|
return rc;
|
||||||
|
|
||||||
NOROOM:
|
NOROOM:
|
||||||
|
|
|
@ -1547,7 +1547,7 @@ the three different cases. */
|
||||||
#define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_16(G(b,16))
|
#define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_16(G(b,16))
|
||||||
#define PCRE2_JIT_COMPILE(a,b) pcre2_jit_compile_16(G(a,16),b)
|
#define PCRE2_JIT_COMPILE(a,b) pcre2_jit_compile_16(G(a,16),b)
|
||||||
#define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_16(G(a,16))
|
#define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_16(G(a,16))
|
||||||
#define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h,i) \
|
#define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
|
||||||
a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16))
|
a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16))
|
||||||
#define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
|
#define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
|
||||||
a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d);
|
a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d);
|
||||||
|
@ -1628,7 +1628,7 @@ the three different cases. */
|
||||||
#define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_32(G(b,32))
|
#define PCRE2_GET_STARTCHAR(a,b) a = pcre2_get_startchar_32(G(b,32))
|
||||||
#define PCRE2_JIT_COMPILE(a,b) pcre2_jit_compile_32(G(a,32),b)
|
#define PCRE2_JIT_COMPILE(a,b) pcre2_jit_compile_32(G(a,32),b)
|
||||||
#define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_32(G(a,32))
|
#define PCRE2_JIT_FREE_UNUSED_MEMORY(a) pcre2_jit_free_unused_memory_32(G(a,32))
|
||||||
#define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h,i) \
|
#define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h) \
|
||||||
a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32))
|
a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32))
|
||||||
#define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
|
#define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
|
||||||
a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
|
a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
|
||||||
|
|
Loading…
Reference in New Issue