Changed jit_stack_alloc to jit_stack_create.
This commit is contained in:
parent
dea68c01fb
commit
08e3107cbe
|
@ -42,8 +42,8 @@ dist_html_DATA = \
|
||||||
doc/html/pcre2_jit_compile.html \
|
doc/html/pcre2_jit_compile.html \
|
||||||
doc/html/pcre2_jit_free_unused_memory.html \
|
doc/html/pcre2_jit_free_unused_memory.html \
|
||||||
doc/html/pcre2_jit_match.html \
|
doc/html/pcre2_jit_match.html \
|
||||||
doc/html/pcre2_jit_stack_alloc.html \
|
|
||||||
doc/html/pcre2_jit_stack_assign.html \
|
doc/html/pcre2_jit_stack_assign.html \
|
||||||
|
doc/html/pcre2_jit_stack_create.html \
|
||||||
doc/html/pcre2_jit_stack_free.html \
|
doc/html/pcre2_jit_stack_free.html \
|
||||||
doc/html/pcre2_maketables.html \
|
doc/html/pcre2_maketables.html \
|
||||||
doc/html/pcre2_match.html \
|
doc/html/pcre2_match.html \
|
||||||
|
@ -113,8 +113,8 @@ dist_man_MANS = \
|
||||||
doc/pcre2_jit_compile.3 \
|
doc/pcre2_jit_compile.3 \
|
||||||
doc/pcre2_jit_free_unused_memory.3 \
|
doc/pcre2_jit_free_unused_memory.3 \
|
||||||
doc/pcre2_jit_match.3 \
|
doc/pcre2_jit_match.3 \
|
||||||
doc/pcre2_jit_stack_alloc.3 \
|
|
||||||
doc/pcre2_jit_stack_assign.3 \
|
doc/pcre2_jit_stack_assign.3 \
|
||||||
|
doc/pcre2_jit_stack_create.3 \
|
||||||
doc/pcre2_jit_stack_free.3 \
|
doc/pcre2_jit_stack_free.3 \
|
||||||
doc/pcre2_maketables.3 \
|
doc/pcre2_maketables.3 \
|
||||||
doc/pcre2_match.3 \
|
doc/pcre2_match.3 \
|
||||||
|
|
|
@ -82,8 +82,8 @@ checkspecial()
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
# ------ Normal tests ------
|
# ------ Normal tests ------
|
||||||
|
|
||||||
echo "Testing pcre2grep main features"
|
echo "Testing pcre2grep main features"
|
||||||
|
|
||||||
|
@ -585,7 +585,7 @@ $cf $srcdir/testdata/grepoutputN testtrygrep
|
||||||
if [ $? != 0 ] ; then exit 1; fi
|
if [ $? != 0 ] ; then exit 1; fi
|
||||||
|
|
||||||
|
|
||||||
# Finally, some tests to exercise code that is not tested above, just to be
|
# Finally, some tests to exercise code that is not tested above, just to be
|
||||||
# sure that it runs OK. Doing this improves the coverage statistics. The output
|
# sure that it runs OK. Doing this improves the coverage statistics. The output
|
||||||
# is not checked.
|
# is not checked.
|
||||||
|
|
||||||
|
|
|
@ -574,7 +574,7 @@ a list of tests.
|
||||||
|
|
||||||
The first two tests can always be run, as they expect only plain text strings
|
The first two tests can always be run, as they expect only plain text strings
|
||||||
(not UTF) and make no use of Unicode properties. The first test file can be fed
|
(not UTF) and make no use of Unicode properties. The first test file can be fed
|
||||||
directly into the perltest.pl script to check that Perl gives the same results.
|
directly into the perltest.sh script to check that Perl gives the same results.
|
||||||
The only difference you should see is in the first few lines, where the Perl
|
The only difference you should see is in the first few lines, where the Perl
|
||||||
version is given instead of the PCRE2 version. The second set of tests check
|
version is given instead of the PCRE2 version. The second set of tests check
|
||||||
auxiliary functions, error detection, and run-time flags that are specific to
|
auxiliary functions, error detection, and run-time flags that are specific to
|
||||||
|
@ -609,7 +609,7 @@ of the French locale have been encountered. The test passes if its output
|
||||||
matches any one of them.
|
matches any one of them.
|
||||||
|
|
||||||
The fourth and fifth tests check UTF and Unicode property support, the fourth
|
The fourth and fifth tests check UTF and Unicode property support, the fourth
|
||||||
being compatible with the perltest.pl script, and the fifth checking
|
being compatible with the perltest.sh script, and the fifth checking
|
||||||
PCRE2-specific things.
|
PCRE2-specific things.
|
||||||
|
|
||||||
The sixth and seventh tests check the pcre2_dfa_match() alternative matching
|
The sixth and seventh tests check the pcre2_dfa_match() alternative matching
|
||||||
|
@ -781,7 +781,6 @@ The distribution should contain the files listed below.
|
||||||
doc/html/* HTML documentation
|
doc/html/* HTML documentation
|
||||||
doc/pcre2.txt plain text version of the man pages
|
doc/pcre2.txt plain text version of the man pages
|
||||||
doc/pcre2test.txt plain text documentation of test program
|
doc/pcre2test.txt plain text documentation of test program
|
||||||
doc/perltest.txt plain text documentation of Perl test program
|
|
||||||
install-sh a shell script for installing files
|
install-sh a shell script for installing files
|
||||||
libpcre2-8.pc.in template for libpcre2-8.pc for pkg-config
|
libpcre2-8.pc.in template for libpcre2-8.pc for pkg-config
|
||||||
libpcre2-16.pc.in template for libpcre2-16.pc for pkg-config
|
libpcre2-16.pc.in template for libpcre2-16.pc for pkg-config
|
||||||
|
@ -791,7 +790,7 @@ The distribution should contain the files listed below.
|
||||||
missing ) common stub for a few missing GNU programs while
|
missing ) common stub for a few missing GNU programs while
|
||||||
) installing, generated by automake
|
) installing, generated by automake
|
||||||
mkinstalldirs script for making install directories
|
mkinstalldirs script for making install directories
|
||||||
perltest.pl Perl test program
|
perltest.sh Script for running a Perl test program
|
||||||
pcre2-config.in source of script which retains PCRE2 information
|
pcre2-config.in source of script which retains PCRE2 information
|
||||||
pcre2_jit_test.c test program for the JIT compiler
|
pcre2_jit_test.c test program for the JIT compiler
|
||||||
testdata/testinput* test data for main library tests
|
testdata/testinput* test data for main library tests
|
||||||
|
@ -829,4 +828,4 @@ The distribution should contain the files listed below.
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
Email local part: ph10
|
Email local part: ph10
|
||||||
Email domain: cam.ac.uk
|
Email domain: cam.ac.uk
|
||||||
Last updated: 27 October 2014
|
Last updated: 25 October 2014
|
||||||
|
|
|
@ -140,12 +140,12 @@ in the library.
|
||||||
<tr><td><a href="pcre2_jit_match.html">pcre2_jit_match</a></td>
|
<tr><td><a href="pcre2_jit_match.html">pcre2_jit_match</a></td>
|
||||||
<td> Fast path interface to JIT matching</td></tr>
|
<td> Fast path interface to JIT matching</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre2_jit_stack_alloc.html">pcre2_jit_stack_alloc</a></td>
|
|
||||||
<td> Create a stack for JIT matching</td></tr>
|
|
||||||
|
|
||||||
<tr><td><a href="pcre2_jit_stack_assign.html">pcre2_jit_stack_assign</a></td>
|
<tr><td><a href="pcre2_jit_stack_assign.html">pcre2_jit_stack_assign</a></td>
|
||||||
<td> Assign stack for JIT matching</td></tr>
|
<td> Assign stack for JIT matching</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre2_jit_stack_create.html">pcre2_jit_stack_create</a></td>
|
||||||
|
<td> Create a stack for JIT matching</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre2_jit_stack_free.html">pcre2_jit_stack_free</a></td>
|
<tr><td><a href="pcre2_jit_stack_free.html">pcre2_jit_stack_free</a></td>
|
||||||
<td> Free a JIT matching stack</td></tr>
|
<td> Free a JIT matching stack</td></tr>
|
||||||
|
|
||||||
|
@ -162,7 +162,7 @@ in the library.
|
||||||
<tr><td><a href="pcre2_match_context_create.html">pcre2_match_context_create</a></td>
|
<tr><td><a href="pcre2_match_context_create.html">pcre2_match_context_create</a></td>
|
||||||
<td> Create a match context</td></tr>
|
<td> Create a match context</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre2_match_context_free.html">pcre2_match_contest_free</a></td>
|
<tr><td><a href="pcre2_match_context_free.html">pcre2_match_context_free</a></td>
|
||||||
<td> Free a match context</td></tr>
|
<td> Free a match context</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre2_match_data_create.html">pcre2_match_data_create</a></td>
|
<tr><td><a href="pcre2_match_data_create.html">pcre2_match_data_create</a></td>
|
||||||
|
|
|
@ -27,9 +27,11 @@ DESCRIPTION
|
||||||
<P>
|
<P>
|
||||||
After a successful call of <b>pcre2_match()</b> that was passed the match block
|
After a successful call of <b>pcre2_match()</b> that was passed the match block
|
||||||
that is this function's argument, this function returns the code unit offset of
|
that is this function's argument, this function returns the code unit offset of
|
||||||
the character at which the successful match started. This can be different to
|
the character at which the successful match started. For a non-partial match,
|
||||||
the value of <i>ovector[0]</i> if the pattern contains the \K escape sequence.
|
this can be different to the value of <i>ovector[0]</i> if the pattern contains
|
||||||
Note, however, that \K has no effect for a partial match.
|
the \K escape sequence. After a partial match, however, this value is always
|
||||||
|
the same as <i>ovector[0]</i> because \K does not affect the result of a
|
||||||
|
partial match.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
There is a complete description of the PCRE2 native API in the
|
There is a complete description of the PCRE2 native API in the
|
||||||
|
|
|
@ -27,7 +27,10 @@ DESCRIPTION
|
||||||
<P>
|
<P>
|
||||||
This function frees unused JIT executable memory. The argument is a general
|
This function frees unused JIT executable memory. The argument is a general
|
||||||
context, for custom memory management, or NULL for standard memory management.
|
context, for custom memory management, or NULL for standard memory management.
|
||||||
FIXME: more detail needed.
|
JIT memory allocation retains some memory in order to improve future JIT
|
||||||
|
compilation speed. In low memory conditions,
|
||||||
|
\fBpcre2_jit_free_unused_memory()\fB can be used to cause this memory to be
|
||||||
|
freed.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
There is a complete description of the PCRE2 native API in the
|
There is a complete description of the PCRE2 native API in the
|
||||||
|
|
|
@ -36,7 +36,7 @@ Its arguments are exactly the same as for
|
||||||
<a href="pcre2_match.html"><b>pcre2_match()</b></a>
|
<a href="pcre2_match.html"><b>pcre2_match()</b></a>
|
||||||
plus one additional argument that must either point to a JIT stack or be NULL.
|
plus one additional argument that must either point to a JIT stack or be NULL.
|
||||||
In the latter case, if a callback function has been set up by
|
In the latter case, if a callback function has been set up by
|
||||||
<b>pcre2_jit_stack_alloc()</b>, it is called. Otherwise the system stack is
|
<b>pcre2_jit_stack_create()</b>, it is called. Otherwise the system stack is
|
||||||
used.
|
used.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
|
|
@ -42,13 +42,13 @@ block on the machine stack is used.
|
||||||
<P>
|
<P>
|
||||||
If <i>callback</i> is NULL and <i>callback_data</i> is not NULL,
|
If <i>callback</i> is NULL and <i>callback_data</i> is not NULL,
|
||||||
<i>callback_data</i> must be a valid JIT stack, the result of calling
|
<i>callback_data</i> must be a valid JIT stack, the result of calling
|
||||||
<b>pcre2_jit_stack_alloc()</b>.
|
<b>pcre2_jit_stack_create()</b>.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If <i>callback</i> not NULL, it is called with <i>callback_data</i> as an
|
If <i>callback</i> not NULL, it is called with <i>callback_data</i> as an
|
||||||
argument at the start of matching, in order to set up a JIT stack. If the
|
argument at the start of matching, in order to set up a JIT stack. If the
|
||||||
result is NULL, the internal 32K stack is used; otherwise the return value must
|
result is NULL, the internal 32K stack is used; otherwise the return value must
|
||||||
be a valid JIT stack, the result of calling <b>pcre2_jit_stack_alloc()</b>.
|
be a valid JIT stack, the result of calling <b>pcre2_jit_stack_create()</b>.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
You may safely assign the same JIT stack to multiple patterns, as long as they
|
You may safely assign the same JIT stack to multiple patterns, as long as they
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
<html>
|
<html>
|
||||||
<head>
|
<head>
|
||||||
<title>pcre2_jit_stack_alloc specification</title>
|
<title>pcre2_jit_stack_create specification</title>
|
||||||
</head>
|
</head>
|
||||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||||
<h1>pcre2_jit_stack_alloc man page</h1>
|
<h1>pcre2_jit_stack_create man page</h1>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||||
</p>
|
</p>
|
||||||
|
@ -19,7 +19,7 @@ SYNOPSIS
|
||||||
<b>#include <pcre2.h></b>
|
<b>#include <pcre2.h></b>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre2_jit_stack *pcre2_jit_stack_alloc(pcre2_general_context *<i>gcontext</i>,</b>
|
<b>pcre2_jit_stack *pcre2_jit_stack_create(pcre2_general_context *<i>gcontext</i>,</b>
|
||||||
<b> PCRE2_SIZE <i>startsize</i>, PCRE2_SIZE <i>maxsize</i>);</b>
|
<b> PCRE2_SIZE <i>startsize</i>, PCRE2_SIZE <i>maxsize</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
|
@ -26,7 +26,7 @@ DESCRIPTION
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
This function is used to free a JIT stack that was created by
|
This function is used to free a JIT stack that was created by
|
||||||
<b>pcre2_jit_stack_alloc()</b> when it is no longer needed. For more details,
|
<b>pcre2_jit_stack_create()</b> when it is no longer needed. For more details,
|
||||||
see the
|
see the
|
||||||
<a href="pcre2jit.html"><b>pcre2jit</b></a>
|
<a href="pcre2jit.html"><b>pcre2jit</b></a>
|
||||||
page.
|
page.
|
||||||
|
|
|
@ -27,16 +27,17 @@ DESCRIPTION
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
This convenience function finds the number of a named substring capturing
|
This convenience function finds the number of a named substring capturing
|
||||||
parenthesis in a compiled pattern. Its arguments are:
|
parenthesis in a compiled pattern, provided that it is a unique name. The
|
||||||
|
function arguments are:
|
||||||
<pre>
|
<pre>
|
||||||
<i>code</i> Compiled regular expression
|
<i>code</i> Compiled regular expression
|
||||||
<i>name</i> Name whose number is required
|
<i>name</i> Name whose number is required
|
||||||
</pre>
|
</pre>
|
||||||
The yield of the function is the number of the parenthesis if the name is
|
The yield of the function is the number of the parenthesis if the name is
|
||||||
found, or PCRE2_ERROR_NOSUBSTRING otherwise. When duplicate names are allowed
|
found, or PCRE2_ERROR_NOSUBSTRING if it is not found. When duplicate names are
|
||||||
(PCRE2_DUPNAMES is set), it is not defined which of the numbers is returned.
|
allowed (PCRE2_DUPNAMES is set), if the name is not unique,
|
||||||
You can obtain the complete list by calling
|
PCRE2_ERROR_NOUNIQUESUBSTRING is returned. You can obtain the list of numbers
|
||||||
<b>pcre2_substring_nametable_scan()</b>.
|
with the same name by calling <b>pcre2_substring_nametable_scan()</b>.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
There is a complete description of the PCRE2 native API in the
|
There is a complete description of the PCRE2 native API in the
|
||||||
|
|
|
@ -236,7 +236,7 @@ document for an overview of all the PCRE2 documentation.
|
||||||
<b>void pcre2_jit_free_unused_memory(pcre2_general_context *<i>gcontext</i>);</b>
|
<b>void pcre2_jit_free_unused_memory(pcre2_general_context *<i>gcontext</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
<b>pcre2_jit_stack *pcre2_jit_stack_alloc(pcre2_general_context *<i>gcontext</i>,</b>
|
<b>pcre2_jit_stack *pcre2_jit_stack_create(pcre2_general_context *<i>gcontext</i>,</b>
|
||||||
<b> PCRE2_SIZE <i>startsize</i>, PCRE2_SIZE <i>maxsize</i>);</b>
|
<b> PCRE2_SIZE <i>startsize</i>, PCRE2_SIZE <i>maxsize</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
|
@ -363,7 +363,7 @@ support is not available.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
More complicated programs might need to make use of the specialist functions
|
More complicated programs might need to make use of the specialist functions
|
||||||
<b>pcre2_jit_stack_alloc()</b>, <b>pcre2_jit_stack_free()</b>, and
|
<b>pcre2_jit_stack_create()</b>, <b>pcre2_jit_stack_free()</b>, and
|
||||||
<b>pcre2_jit_stack_assign()</b> in order to control the JIT code's memory usage.
|
<b>pcre2_jit_stack_assign()</b> in order to control the JIT code's memory usage.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -1272,7 +1272,7 @@ textual error message from any error code.
|
||||||
<b>void pcre2_jit_free_unused_memory(pcre2_general_context *<i>gcontext</i>);</b>
|
<b>void pcre2_jit_free_unused_memory(pcre2_general_context *<i>gcontext</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
<b>pcre2_jit_stack *pcre2_jit_stack_alloc(pcre2_general_context *<i>gcontext</i>,</b>
|
<b>pcre2_jit_stack *pcre2_jit_stack_create(pcre2_general_context *<i>gcontext</i>,</b>
|
||||||
<b> PCRE2_SIZE <i>startsize</i>, PCRE2_SIZE <i>maxsize</i>);</b>
|
<b> PCRE2_SIZE <i>startsize</i>, PCRE2_SIZE <i>maxsize</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
|
@ -2050,10 +2050,11 @@ Otherwise NULL is returned. A (*MARK) name may be available after a failed
|
||||||
match or a partial match, as well as after a successful one.
|
match or a partial match, as well as after a successful one.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The offset of the character at which the successful match started is
|
The code unit offset of the character at which a successful match started is
|
||||||
returned by <b>pcre2_get_startchar()</b>. This can be different to the value of
|
returned by <b>pcre2_get_startchar()</b>. For a non-partial match, this can be
|
||||||
<i>ovector[0]</i> if the pattern contains the \K escape sequence. Note,
|
different to the value of <i>ovector[0]</i> if the pattern contains the \K
|
||||||
however, that \K has no effect for a partial match.
|
escape sequence. After a partial match, however, this value is always the same
|
||||||
|
as <i>ovector[0]</i> because \K does not affect the result of a partial match.
|
||||||
<a name="errorlist"></a></P>
|
<a name="errorlist"></a></P>
|
||||||
<br><b>
|
<br><b>
|
||||||
Error return values from <b>pcre2_match()</b>
|
Error return values from <b>pcre2_match()</b>
|
||||||
|
@ -2302,8 +2303,9 @@ the number of the subpattern called "xxx" is 2. If the name is known to be
|
||||||
unique (PCRE2_DUPNAMES was not set), you can find the number from the name by
|
unique (PCRE2_DUPNAMES was not set), you can find the number from the name by
|
||||||
calling <b>pcre2_substring_number_from_name()</b>. The first argument is the
|
calling <b>pcre2_substring_number_from_name()</b>. The first argument is the
|
||||||
compiled pattern, and the second is the name. The yield of the function is the
|
compiled pattern, and the second is the name. The yield of the function is the
|
||||||
subpattern number, or PCRE2_ERROR_NOSUBSTRING if there is no subpattern of that
|
subpattern number, PCRE2_ERROR_NOSUBSTRING if there is no subpattern of that
|
||||||
name.
|
name, or PCRE2_ERROR_NOUNIQUESUBSTRING if there is more than one subpattern of
|
||||||
|
that name.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Given the number, you can extract the substring directly, or use one of the
|
Given the number, you can extract the substring directly, or use one of the
|
||||||
|
@ -2577,7 +2579,7 @@ Cambridge CB2 3QH, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC32" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC32" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 16 October 2014
|
Last updated: 03 November 2014
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2014 University of Cambridge.
|
Copyright © 1997-2014 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -20,17 +20,15 @@ please consult the man page, in case the conversion went wrong.
|
||||||
<li><a name="TOC5" href="#SEC5">RETURN VALUES FROM JIT MATCHING</a>
|
<li><a name="TOC5" href="#SEC5">RETURN VALUES FROM JIT MATCHING</a>
|
||||||
<li><a name="TOC6" href="#SEC6">CONTROLLING THE JIT STACK</a>
|
<li><a name="TOC6" href="#SEC6">CONTROLLING THE JIT STACK</a>
|
||||||
<li><a name="TOC7" href="#SEC7">JIT STACK FAQ</a>
|
<li><a name="TOC7" href="#SEC7">JIT STACK FAQ</a>
|
||||||
<li><a name="TOC8" href="#SEC8">EXAMPLE CODE</a>
|
<li><a name="TOC8" href="#SEC8">FREEING JIT SPECULATIVE MEMORY</a>
|
||||||
<li><a name="TOC9" href="#SEC9">JIT FAST PATH API</a>
|
<li><a name="TOC9" href="#SEC9">EXAMPLE CODE</a>
|
||||||
<li><a name="TOC10" href="#SEC10">SEE ALSO</a>
|
<li><a name="TOC10" href="#SEC10">JIT FAST PATH API</a>
|
||||||
<li><a name="TOC11" href="#SEC11">AUTHOR</a>
|
<li><a name="TOC11" href="#SEC11">SEE ALSO</a>
|
||||||
<li><a name="TOC12" href="#SEC12">REVISION</a>
|
<li><a name="TOC12" href="#SEC12">AUTHOR</a>
|
||||||
|
<li><a name="TOC13" href="#SEC13">REVISION</a>
|
||||||
</ul>
|
</ul>
|
||||||
<br><a name="SEC1" href="#TOC1">PCRE2 JUST-IN-TIME COMPILER SUPPORT</a><br>
|
<br><a name="SEC1" href="#TOC1">PCRE2 JUST-IN-TIME COMPILER SUPPORT</a><br>
|
||||||
<P>
|
<P>
|
||||||
FIXME: This needs checking over once JIT support is implemented.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Just-in-time compiling is a heavyweight optimization that can greatly speed up
|
Just-in-time compiling is a heavyweight optimization that can greatly speed up
|
||||||
pattern matching. However, it comes at the cost of extra processing before the
|
pattern matching. However, it comes at the cost of extra processing before the
|
||||||
match is performed. Therefore, it is of most benefit when the same pattern is
|
match is performed. Therefore, it is of most benefit when the same pattern is
|
||||||
|
@ -79,9 +77,12 @@ second is a set of option bits, which must include at least one of
|
||||||
PCRE2_JIT_COMPLETE, PCRE2_JIT_PARTIAL_HARD, or PCRE2_JIT_PARTIAL_SOFT.
|
PCRE2_JIT_COMPLETE, PCRE2_JIT_PARTIAL_HARD, or PCRE2_JIT_PARTIAL_SOFT.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The returned value from <b>pcre2_jit_compile()</b> is zero on success, or a
|
If JIT support is not available, a call to <b>pcre2_jit_comple()</b> does
|
||||||
negative error code. In particular, PCRE2_ERROR_JIT_BADOPTION is returned if
|
nothing and returns PCRE2_ERROR_JIT_BADOPTION. Otherwise, the compiled pattern
|
||||||
JIT is not supported or if an unknown options bit is set.
|
is passed to the JIT compiler, which turns it into machine code that executes
|
||||||
|
much faster than the normal interpretive code, but yields exactly the same
|
||||||
|
results. The returned value from <b>pcre2_jit_compile()</b> is zero on success,
|
||||||
|
or a negative error code.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
PCRE2_JIT_COMPLETE requests the JIT compiler to generate code for complete
|
PCRE2_JIT_COMPLETE requests the JIT compiler to generate code for complete
|
||||||
|
@ -100,12 +101,6 @@ described in the section entitled
|
||||||
below.
|
below.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If JIT support is not available, a call to <b>pcre2_jit_comple()</b> does
|
|
||||||
nothing and returns FIXME. Otherwise, the compiled pattern is passed to the JIT
|
|
||||||
compiler, which turns it into machine code that executes much faster than the
|
|
||||||
normal interpretive code, but yields exactly the same results.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
There are some <b>pcre2_match()</b> options that are not supported by JIT, and
|
There are some <b>pcre2_match()</b> options that are not supported by JIT, and
|
||||||
there are also some pattern items that JIT cannot handle. Details are given
|
there are also some pattern items that JIT cannot handle. Details are given
|
||||||
below. In both cases, matching automatically falls back to the interpretive
|
below. In both cases, matching automatically falls back to the interpretive
|
||||||
|
@ -166,7 +161,7 @@ about the use of JIT stacks in the section entitled
|
||||||
below.
|
below.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <b>pcre2_jit_stack_alloc()</b> function creates a JIT stack. Its arguments
|
The <b>pcre2_jit_stack_create()</b> function creates a JIT stack. Its arguments
|
||||||
are a general context (for memory allocation functions, or NULL for standard
|
are a general context (for memory allocation functions, or NULL for standard
|
||||||
memory allocation), a starting size and a maximum size, and it returns a
|
memory allocation), a starting size and a maximum size, and it returns a
|
||||||
pointer to an opaque structure of type <b>pcre2_jit_stack</b>, or NULL if there
|
pointer to an opaque structure of type <b>pcre2_jit_stack</b>, or NULL if there
|
||||||
|
@ -195,14 +190,14 @@ the other two options:
|
||||||
on the machine stack is used.
|
on the machine stack is used.
|
||||||
|
|
||||||
(2) If <i>callback</i> is NULL and <i>data</i> is not NULL, <i>data</i> must be
|
(2) If <i>callback</i> is NULL and <i>data</i> is not NULL, <i>data</i> must be
|
||||||
a valid JIT stack, the result of calling <b>pcre2_jit_stack_alloc()</b>.
|
a valid JIT stack, the result of calling <b>pcre2_jit_stack_create()</b>.
|
||||||
|
|
||||||
(3) If <i>callback</i> is not NULL, it must point to a function that is
|
(3) If <i>callback</i> is not NULL, it must point to a function that is
|
||||||
called with <i>data</i> as an argument at the start of matching, in
|
called with <i>data</i> as an argument at the start of matching, in
|
||||||
order to set up a JIT stack. If the return from the callback
|
order to set up a JIT stack. If the return from the callback
|
||||||
function is NULL, the internal 32K stack is used; otherwise the
|
function is NULL, the internal 32K stack is used; otherwise the
|
||||||
return value must be a valid JIT stack, the result of calling
|
return value must be a valid JIT stack, the result of calling
|
||||||
<b>pcre2_jit_stack_alloc()</b>.
|
<b>pcre2_jit_stack_create()</b>.
|
||||||
</pre>
|
</pre>
|
||||||
A callback function is obeyed whenever JIT code is about to be run; it is not
|
A callback function is obeyed whenever JIT code is about to be run; it is not
|
||||||
obeyed when <b>pcre2_match()</b> is called with options that are incompatible
|
obeyed when <b>pcre2_match()</b> is called with options that are incompatible
|
||||||
|
@ -231,7 +226,7 @@ This is a suggestion for how a multithreaded program that needs to set up
|
||||||
non-default JIT stacks might operate:
|
non-default JIT stacks might operate:
|
||||||
<pre>
|
<pre>
|
||||||
During thread initalization
|
During thread initalization
|
||||||
thread_local_var = pcre2_jit_stack_alloc(...)
|
thread_local_var = pcre2_jit_stack_create(...)
|
||||||
|
|
||||||
During thread exit
|
During thread exit
|
||||||
pcre2_jit_stack_free(thread_local_var)
|
pcre2_jit_stack_free(thread_local_var)
|
||||||
|
@ -323,7 +318,19 @@ stack handling?
|
||||||
No, thanks to Windows. If POSIX threads were used everywhere, we could throw
|
No, thanks to Windows. If POSIX threads were used everywhere, we could throw
|
||||||
out this complicated API.
|
out this complicated API.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC8" href="#TOC1">EXAMPLE CODE</a><br>
|
<br><a name="SEC8" href="#TOC1">FREEING JIT SPECULATIVE MEMORY</a><br>
|
||||||
|
<P>
|
||||||
|
<b>void pcre2_jit_free_unused_memory(pcre2_general_context *<i>gcontext</i>);</b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The JIT executable allocator does not free all memory when it is possible.
|
||||||
|
It expects new allocations, and keeps some free memory around to improve
|
||||||
|
allocation speed. However, in low memory conditions, it might be better to free
|
||||||
|
all possible memory. You can cause this to happen by calling
|
||||||
|
pcre2_jit_free_unused_memory(). Its argument is a general context, for custom
|
||||||
|
memory management, or NULL for standard memory management.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC9" href="#TOC1">EXAMPLE CODE</a><br>
|
||||||
<P>
|
<P>
|
||||||
This is a single-threaded example that specifies a JIT stack without using a
|
This is a single-threaded example that specifies a JIT stack without using a
|
||||||
callback.
|
callback.
|
||||||
|
@ -338,7 +345,7 @@ callback.
|
||||||
/* Check for errors */
|
/* Check for errors */
|
||||||
rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
|
rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
|
||||||
/* Check for errors */
|
/* Check for errors */
|
||||||
jit_stack = pcre2_jit_stack_alloc(NULL, 32*1024, 512*1024);
|
jit_stack = pcre2_jit_stack_create(NULL, 32*1024, 512*1024);
|
||||||
/* Check for error (NULL) */
|
/* Check for error (NULL) */
|
||||||
pcre2_jit_stack_assign(re, NULL, jit_stack);
|
pcre2_jit_stack_assign(re, NULL, jit_stack);
|
||||||
match_data = pcre2_match_data_create(re, 10);
|
match_data = pcre2_match_data_create(re, 10);
|
||||||
|
@ -349,7 +356,7 @@ callback.
|
||||||
|
|
||||||
</PRE>
|
</PRE>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC9" href="#TOC1">JIT FAST PATH API</a><br>
|
<br><a name="SEC10" href="#TOC1">JIT FAST PATH API</a><br>
|
||||||
<P>
|
<P>
|
||||||
Because the API described above falls back to interpreted matching when JIT is
|
Because the API described above falls back to interpreted matching when JIT is
|
||||||
not available, it is convenient for programs that are written for general use
|
not available, it is convenient for programs that are written for general use
|
||||||
|
@ -364,11 +371,11 @@ processed by <b>pcre2_jit_compile()</b>).
|
||||||
The fast path function is called <b>pcre2_jit_match()</b>, and it takes exactly
|
The fast path function is called <b>pcre2_jit_match()</b>, and it takes exactly
|
||||||
the same arguments as <b>pcre2_match()</b>, plus one additional argument that
|
the same arguments as <b>pcre2_match()</b>, plus one additional argument that
|
||||||
must either point to a JIT stack or be NULL. In the latter case, if a callback
|
must either point to a JIT stack or be NULL. In the latter case, if a callback
|
||||||
function has been set up by <b>pcre2_jit_stack_alloc()</b>, it is called.
|
function has been set up by <b>pcre2_jit_stack_assign()</b>, it is called.
|
||||||
Otherwise the system stack is used. The return values are the same as for
|
Otherwise the system stack is used. The return values are the same as for
|
||||||
<b>pcre2_match()</b>, plus PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial
|
<b>pcre2_match()</b>, plus PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial
|
||||||
or complete) is requested that was not compiled. Unsupported option bits are
|
or complete) is requested that was not compiled. Unsupported option bits (for
|
||||||
ignored.
|
example, PCRE2_ANCHORED) are ignored.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
When you call <b>pcre2_match()</b>, as well as testing for invalid options, a
|
When you call <b>pcre2_match()</b>, as well as testing for invalid options, a
|
||||||
|
@ -382,11 +389,11 @@ invalid data is passed, the result is undefined.
|
||||||
Bypassing the sanity checks and the <b>pcre2_match()</b> wrapping can give
|
Bypassing the sanity checks and the <b>pcre2_match()</b> wrapping can give
|
||||||
speedups of more than 10%.
|
speedups of more than 10%.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC10" href="#TOC1">SEE ALSO</a><br>
|
<br><a name="SEC11" href="#TOC1">SEE ALSO</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre2api</b>(3)
|
<b>pcre2api</b>(3)
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC11" href="#TOC1">AUTHOR</a><br>
|
<br><a name="SEC12" href="#TOC1">AUTHOR</a><br>
|
||||||
<P>
|
<P>
|
||||||
Philip Hazel (FAQ by Zoltan Herczeg)
|
Philip Hazel (FAQ by Zoltan Herczeg)
|
||||||
<br>
|
<br>
|
||||||
|
@ -395,9 +402,9 @@ University Computing Service
|
||||||
Cambridge CB2 3QH, England.
|
Cambridge CB2 3QH, England.
|
||||||
<br>
|
<br>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC12" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC13" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 21 October 2014
|
Last updated: 03 November 2014
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2014 University of Cambridge.
|
Copyright © 1997-2014 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -223,10 +223,12 @@ Output the PCRE2 version number and then exit.
|
||||||
<br><a name="SEC5" href="#TOC1">DESCRIPTION</a><br>
|
<br><a name="SEC5" href="#TOC1">DESCRIPTION</a><br>
|
||||||
<P>
|
<P>
|
||||||
If <b>pcre2test</b> is given two filename arguments, it reads from the first and
|
If <b>pcre2test</b> is given two filename arguments, it reads from the first and
|
||||||
writes to the second. If it is given only one filename argument, it reads from
|
writes to the second. If the first name is "-", input is taken from the
|
||||||
|
standard input. If <b>pcre2test</b> is given only one argument, it reads from
|
||||||
that file and writes to stdout. Otherwise, it reads from stdin and writes to
|
that file and writes to stdout. Otherwise, it reads from stdin and writes to
|
||||||
stdout, and prompts for each line of input, using "re>" to prompt for regular
|
stdout. When the input is a terminal, it prompts for each line of input, using
|
||||||
expression patterns, and "data>" to prompt for subject lines.
|
"re>" to prompt for regular expression patterns, and "data>" to prompt for
|
||||||
|
subject lines.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
When <b>pcre2test</b> is built, a configuration option can specify that it
|
When <b>pcre2test</b> is built, a configuration option can specify that it
|
||||||
|
@ -476,6 +478,7 @@ about the pattern:
|
||||||
/I info show info about compiled pattern
|
/I info show info about compiled pattern
|
||||||
hex pattern is coded in hexadecimal
|
hex pattern is coded in hexadecimal
|
||||||
jit[=<number>] use JIT
|
jit[=<number>] use JIT
|
||||||
|
jitfast use JIT fast path
|
||||||
jitverify verify JIT use
|
jitverify verify JIT use
|
||||||
locale=<name> use this locale
|
locale=<name> use this locale
|
||||||
memory show memory used
|
memory show memory used
|
||||||
|
@ -573,6 +576,13 @@ documentation. See also the <b>jitstack</b> modifier below for a way of
|
||||||
setting the size of the JIT stack.
|
setting the size of the JIT stack.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
If the <b>jitfast</b> modifier is specified, matching is done using the JIT
|
||||||
|
"fast path" interface (\fBpcre2_jit_match()), which skips some of the sanity
|
||||||
|
checks that are done by <b>pcre2_match()</b>, and of course does not work when
|
||||||
|
JIT is not supported. If <b>jitfast</b> is specified without <b>jit</b>, jit=7 is
|
||||||
|
assumed.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
If the <b>jitverify</b> modifier is specified, information about the compiled
|
If the <b>jitverify</b> modifier is specified, information about the compiled
|
||||||
pattern shows whether JIT compilation was or was not successful. If
|
pattern shows whether JIT compilation was or was not successful. If
|
||||||
<b>jitverify</b> is specified without <b>jit</b>, jit=7 is assumed. If JIT
|
<b>jitverify</b> is specified without <b>jit</b>, jit=7 is assumed. If JIT
|
||||||
|
@ -612,6 +622,9 @@ Limiting nested parentheses
|
||||||
<P>
|
<P>
|
||||||
The <b>parens_nest_limit</b> modifier sets a limit on the depth of nested
|
The <b>parens_nest_limit</b> modifier sets a limit on the depth of nested
|
||||||
parentheses in a pattern. Breaching the limit causes a compilation error.
|
parentheses in a pattern. Breaching the limit causes a compilation error.
|
||||||
|
The default for the library is set when PCRE2 is built, but <b>pcre2test</b>
|
||||||
|
sets its own default of 220, which is required for running the standard test
|
||||||
|
suite.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
Using the POSIX wrapper API
|
Using the POSIX wrapper API
|
||||||
|
@ -675,12 +688,13 @@ However, they may be included in a pattern's modifier list, in which case they
|
||||||
are applied to every subject line that is processed with that pattern. They do
|
are applied to every subject line that is processed with that pattern. They do
|
||||||
not affect the compilation process.
|
not affect the compilation process.
|
||||||
<pre>
|
<pre>
|
||||||
aftertext show text after match
|
aftertext show text after match
|
||||||
allaftertext show text after captures
|
allaftertext show text after captures
|
||||||
allcaptures show all captures
|
allcaptures show all captures
|
||||||
allusedtext show all consulted text
|
allusedtext show all consulted text
|
||||||
/g global global matching
|
/g global global matching
|
||||||
mark show mark values
|
mark show mark values
|
||||||
|
startchar show starting character when relevant
|
||||||
</pre>
|
</pre>
|
||||||
These modifiers may not appear in a <b>#pattern</b> command. If you want them as
|
These modifiers may not appear in a <b>#pattern</b> command. If you want them as
|
||||||
defaults, set them in a <b>#subject</b> command.
|
defaults, set them in a <b>#subject</b> command.
|
||||||
|
@ -751,6 +765,7 @@ pattern.
|
||||||
offset=<n> set starting offset
|
offset=<n> set starting offset
|
||||||
ovector=<n> set size of output vector
|
ovector=<n> set size of output vector
|
||||||
recursion_limit=<n> set a recursion limit
|
recursion_limit=<n> set a recursion limit
|
||||||
|
startchar show startchar when relevant
|
||||||
</pre>
|
</pre>
|
||||||
The effects of these modifiers are described in the following sections.
|
The effects of these modifiers are described in the following sections.
|
||||||
FIXME: Give more examples.
|
FIXME: Give more examples.
|
||||||
|
@ -777,14 +792,30 @@ there is a lookbehind at the start of a match, or a lookahead at the end, or if
|
||||||
of the actual match are indicated in the output by '<' or '>' characters
|
of the actual match are indicated in the output by '<' or '>' characters
|
||||||
underneath them. Here is an example:
|
underneath them. Here is an example:
|
||||||
<pre>
|
<pre>
|
||||||
/(?<=pqr)abc(?=xyz)/
|
re> /(?<=pqr)abc(?=xyz)/
|
||||||
123pqrabcxyz456\=allusedtext
|
data> 123pqrabcxyz456\=allusedtext
|
||||||
0: pqrabcxyz
|
0: pqrabcxyz
|
||||||
<<< >>>
|
<<< >>>
|
||||||
</pre>
|
</pre>
|
||||||
This shows that the matched string is "abc", with the preceding and following
|
This shows that the matched string is "abc", with the preceding and following
|
||||||
strings "pqr" and "xyz" also consulted during the match.
|
strings "pqr" and "xyz" also consulted during the match.
|
||||||
</P>
|
</P>
|
||||||
|
<P>
|
||||||
|
The <b>startchar</b> modifier requests that the starting character for the match
|
||||||
|
be indicated, if it is different to the start of the matched string. The only
|
||||||
|
time when this occurs is when \K has been processed as part of the match. In
|
||||||
|
this situation, the output for the matched string is displayed from the
|
||||||
|
starting character instead of from the match point, with circumflex characters
|
||||||
|
under the earlier characters. For example:
|
||||||
|
<pre>
|
||||||
|
re> /abc\Kxyz/
|
||||||
|
data> abcxyz\=startchar
|
||||||
|
0: abcxyz
|
||||||
|
^^^
|
||||||
|
</pre>
|
||||||
|
Unlike <b>allusedtext</b>, the <b>startchar</b> modifier can be used with JIT.
|
||||||
|
However, these two modifiers are mutually exclusive.
|
||||||
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
Showing the value of all capture groups
|
Showing the value of all capture groups
|
||||||
</b><br>
|
</b><br>
|
||||||
|
@ -870,8 +901,9 @@ Setting the JIT stack size
|
||||||
<P>
|
<P>
|
||||||
The <b>jitstack</b> modifier provides a way of setting the maximum stack size
|
The <b>jitstack</b> modifier provides a way of setting the maximum stack size
|
||||||
that is used by the just-in-time optimization code. It is ignored if JIT
|
that is used by the just-in-time optimization code. It is ignored if JIT
|
||||||
optimization is not being used. Providing a stack that is larger than the
|
optimization is not being used. The value is a number of kilobytes. Providing a
|
||||||
default 32K is necessary only for very complicated patterns.
|
stack that is larger than the default 32K is necessary only for very
|
||||||
|
complicated patterns.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
Setting match and recursion limits
|
Setting match and recursion limits
|
||||||
|
@ -939,11 +971,13 @@ appears, though of course it can also be used to set a default in a
|
||||||
available for storing matching information. The default is 15.
|
available for storing matching information. The default is 15.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
At least one pair of offsets is always created by
|
A value of zero is useful when testing the POSIX API because it causes
|
||||||
<b>pcre2_match_data_create()</b>, for matching with PCRE2's native API, so a
|
<b>regexec()</b> to be called with a NULL capture vector. When not testing the
|
||||||
value of 0 is the same as 1. However a value of 0 is useful when testing the
|
POSIX API, a value of zero is used to cause
|
||||||
POSIX API because it causes <b>regexec()</b> to be called with a NULL capture
|
<b>pcre2_match_data_create_from_pattern</b> to be called, in order to create a
|
||||||
vector.
|
match block of exactly the right size for the pattern. (It is not possible to
|
||||||
|
create a match block with a zero-length ovector; there is always one pair of
|
||||||
|
offsets.)
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC12" href="#TOC1">THE ALTERNATIVE MATCHING FUNCTION</a><br>
|
<br><a name="SEC12" href="#TOC1">THE ALTERNATIVE MATCHING FUNCTION</a><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -1175,10 +1209,9 @@ characters.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC18" href="#TOC1">SEE ALSO</a><br>
|
<br><a name="SEC18" href="#TOC1">SEE ALSO</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre2</b>(3), <b>pcre16</b>(3), <b>pcre32</b>(3), <b>pcre2api</b>(3),
|
<b>pcre2</b>(3), <b>pcre2api</b>(3), <b>pcre2callout</b>(3),
|
||||||
<b>pcre2callout</b>(3),
|
|
||||||
<b>pcre2jit</b>, <b>pcre2matching</b>(3), <b>pcre2partial</b>(d),
|
<b>pcre2jit</b>, <b>pcre2matching</b>(3), <b>pcre2partial</b>(d),
|
||||||
<b>pcre2pattern</b>(3), <b>pcre2precompile</b>(3).
|
<b>pcre2pattern</b>(3).
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC19" href="#TOC1">AUTHOR</a><br>
|
<br><a name="SEC19" href="#TOC1">AUTHOR</a><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -1191,7 +1224,7 @@ Cambridge CB2 3QH, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC20" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC20" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 11 October 2014
|
Last updated: 02 November 2014
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2014 University of Cambridge.
|
Copyright © 1997-2014 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -140,12 +140,12 @@ in the library.
|
||||||
<tr><td><a href="pcre2_jit_match.html">pcre2_jit_match</a></td>
|
<tr><td><a href="pcre2_jit_match.html">pcre2_jit_match</a></td>
|
||||||
<td> Fast path interface to JIT matching</td></tr>
|
<td> Fast path interface to JIT matching</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre2_jit_stack_alloc.html">pcre2_jit_stack_alloc</a></td>
|
|
||||||
<td> Create a stack for JIT matching</td></tr>
|
|
||||||
|
|
||||||
<tr><td><a href="pcre2_jit_stack_assign.html">pcre2_jit_stack_assign</a></td>
|
<tr><td><a href="pcre2_jit_stack_assign.html">pcre2_jit_stack_assign</a></td>
|
||||||
<td> Assign stack for JIT matching</td></tr>
|
<td> Assign stack for JIT matching</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre2_jit_stack_create.html">pcre2_jit_stack_create</a></td>
|
||||||
|
<td> Create a stack for JIT matching</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre2_jit_stack_free.html">pcre2_jit_stack_free</a></td>
|
<tr><td><a href="pcre2_jit_stack_free.html">pcre2_jit_stack_free</a></td>
|
||||||
<td> Free a JIT matching stack</td></tr>
|
<td> Free a JIT matching stack</td></tr>
|
||||||
|
|
||||||
|
|
347
doc/pcre2.txt
347
doc/pcre2.txt
|
@ -324,7 +324,7 @@ PCRE2 NATIVE API JIT FUNCTIONS
|
||||||
|
|
||||||
void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext);
|
void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext);
|
||||||
|
|
||||||
pcre2_jit_stack *pcre2_jit_stack_alloc(pcre2_general_context *gcontext,
|
pcre2_jit_stack *pcre2_jit_stack_create(pcre2_general_context *gcontext,
|
||||||
PCRE2_SIZE startsize, PCRE2_SIZE maxsize);
|
PCRE2_SIZE startsize, PCRE2_SIZE maxsize);
|
||||||
|
|
||||||
void pcre2_jit_stack_assign(const pcre2_code *code,
|
void pcre2_jit_stack_assign(const pcre2_code *code,
|
||||||
|
@ -437,7 +437,7 @@ PCRE2 API OVERVIEW
|
||||||
support is not available.
|
support is not available.
|
||||||
|
|
||||||
More complicated programs might need to make use of the specialist
|
More complicated programs might need to make use of the specialist
|
||||||
functions pcre2_jit_stack_alloc(), pcre2_jit_stack_free(), and
|
functions pcre2_jit_stack_create(), pcre2_jit_stack_free(), and
|
||||||
pcre2_jit_stack_assign() in order to control the JIT code's memory
|
pcre2_jit_stack_assign() in order to control the JIT code's memory
|
||||||
usage.
|
usage.
|
||||||
|
|
||||||
|
@ -1303,7 +1303,7 @@ JUST-IN-TIME (JIT) COMPILATION
|
||||||
|
|
||||||
void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext);
|
void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext);
|
||||||
|
|
||||||
pcre2_jit_stack *pcre2_jit_stack_alloc(pcre2_general_context *gcontext,
|
pcre2_jit_stack *pcre2_jit_stack_create(pcre2_general_context *gcontext,
|
||||||
PCRE2_SIZE startsize, PCRE2_SIZE maxsize);
|
PCRE2_SIZE startsize, PCRE2_SIZE maxsize);
|
||||||
|
|
||||||
void pcre2_jit_stack_assign(const pcre2_code *code,
|
void pcre2_jit_stack_assign(const pcre2_code *code,
|
||||||
|
@ -2034,10 +2034,12 @@ HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS
|
||||||
after a failed match or a partial match, as well as after a successful
|
after a failed match or a partial match, as well as after a successful
|
||||||
one.
|
one.
|
||||||
|
|
||||||
The offset of the character at which the successful match started is
|
The code unit offset of the character at which a successful match
|
||||||
returned by pcre2_get_startchar(). This can be different to the value
|
started is returned by pcre2_get_startchar(). For a non-partial match,
|
||||||
of ovector[0] if the pattern contains the \K escape sequence. Note,
|
this can be different to the value of ovector[0] if the pattern con-
|
||||||
however, that \K has no effect for a partial match.
|
tains the \K escape sequence. After a partial match, however, this
|
||||||
|
value is always the same as ovector[0] because \K does not affect the
|
||||||
|
result of a partial match.
|
||||||
|
|
||||||
Error return values from pcre2_match()
|
Error return values from pcre2_match()
|
||||||
|
|
||||||
|
@ -2266,23 +2268,24 @@ EXTRACTING CAPTURED SUBSTRINGS BY NAME
|
||||||
be unique (PCRE2_DUPNAMES was not set), you can find the number from
|
be unique (PCRE2_DUPNAMES was not set), you can find the number from
|
||||||
the name by calling pcre2_substring_number_from_name(). The first argu-
|
the name by calling pcre2_substring_number_from_name(). The first argu-
|
||||||
ment is the compiled pattern, and the second is the name. The yield of
|
ment is the compiled pattern, and the second is the name. The yield of
|
||||||
the function is the subpattern number, or PCRE2_ERROR_NOSUBSTRING if
|
the function is the subpattern number, PCRE2_ERROR_NOSUBSTRING if there
|
||||||
there is no subpattern of that name.
|
is no subpattern of that name, or PCRE2_ERROR_NOUNIQUESUBSTRING if
|
||||||
|
there is more than one subpattern of that name.
|
||||||
|
|
||||||
Given the number, you can extract the substring directly, or use one of
|
Given the number, you can extract the substring directly, or use one of
|
||||||
the functions described in the previous section. For convenience, there
|
the functions described in the previous section. For convenience, there
|
||||||
are also "byname" functions that correspond to the "bynumber" func-
|
are also "byname" functions that correspond to the "bynumber" func-
|
||||||
tions, the only difference being that the second argument is a name
|
tions, the only difference being that the second argument is a name
|
||||||
instead of a number. However, if PCRE2_DUPNAMES is set and there are
|
instead of a number. However, if PCRE2_DUPNAMES is set and there are
|
||||||
duplicate names, the behaviour may not be what you want (see the next
|
duplicate names, the behaviour may not be what you want (see the next
|
||||||
section).
|
section).
|
||||||
|
|
||||||
Warning: If the pattern uses the (?| feature to set up multiple subpat-
|
Warning: If the pattern uses the (?| feature to set up multiple subpat-
|
||||||
terns with the same number, as described in the section on duplicate
|
terns with the same number, as described in the section on duplicate
|
||||||
subpattern numbers in the pcre2pattern page, you cannot use names to
|
subpattern numbers in the pcre2pattern page, you cannot use names to
|
||||||
distinguish the different subpatterns, because names are not included
|
distinguish the different subpatterns, because names are not included
|
||||||
in the compiled code. The matching process uses only numbers. For this
|
in the compiled code. The matching process uses only numbers. For this
|
||||||
reason, the use of different names for subpatterns of the same number
|
reason, the use of different names for subpatterns of the same number
|
||||||
causes an error at compile time.
|
causes an error at compile time.
|
||||||
|
|
||||||
|
|
||||||
|
@ -2291,54 +2294,54 @@ DUPLICATE SUBPATTERN NAMES
|
||||||
int pcre2_substring_nametable_scan(const pcre2_code *code,
|
int pcre2_substring_nametable_scan(const pcre2_code *code,
|
||||||
PCRE2_SPTR name, PCRE2_SPTR *first, PCRE2_SPTR *last);
|
PCRE2_SPTR name, PCRE2_SPTR *first, PCRE2_SPTR *last);
|
||||||
|
|
||||||
When a pattern is compiled with the PCRE2_DUPNAMES option, names for
|
When a pattern is compiled with the PCRE2_DUPNAMES option, names for
|
||||||
subpatterns are not required to be unique. Duplicate names are always
|
subpatterns are not required to be unique. Duplicate names are always
|
||||||
allowed for subpatterns with the same number, created by using the (?|
|
allowed for subpatterns with the same number, created by using the (?|
|
||||||
feature. Indeed, if such subpatterns are named, they are required to
|
feature. Indeed, if such subpatterns are named, they are required to
|
||||||
use the same names.
|
use the same names.
|
||||||
|
|
||||||
Normally, patterns with duplicate names are such that in any one match,
|
Normally, patterns with duplicate names are such that in any one match,
|
||||||
only one of the named subpatterns participates. An example is shown in
|
only one of the named subpatterns participates. An example is shown in
|
||||||
the pcre2pattern documentation.
|
the pcre2pattern documentation.
|
||||||
|
|
||||||
When duplicates are present, pcre2_substring_copy_byname() and
|
When duplicates are present, pcre2_substring_copy_byname() and
|
||||||
pcre2_substring_get_byname() return the first substring corresponding
|
pcre2_substring_get_byname() return the first substring corresponding
|
||||||
to the given name that is set. If none are set, PCRE2_ERROR_NOSUBSTRING
|
to the given name that is set. If none are set, PCRE2_ERROR_NOSUBSTRING
|
||||||
is returned. The pcre2_substring_number_from_name() function returns
|
is returned. The pcre2_substring_number_from_name() function returns
|
||||||
one of the numbers that are associated with the name, but it is not
|
one of the numbers that are associated with the name, but it is not
|
||||||
defined which it is.
|
defined which it is.
|
||||||
|
|
||||||
If you want to get full details of all captured substrings for a given
|
If you want to get full details of all captured substrings for a given
|
||||||
name, you must use the pcre2_substring_nametable_scan() function. The
|
name, you must use the pcre2_substring_nametable_scan() function. The
|
||||||
first argument is the compiled pattern, and the second is the name. If
|
first argument is the compiled pattern, and the second is the name. If
|
||||||
the third and fourth arguments are NULL, the function returns a group
|
the third and fourth arguments are NULL, the function returns a group
|
||||||
number (it is not defined which). Otherwise, the third and fourth argu-
|
number (it is not defined which). Otherwise, the third and fourth argu-
|
||||||
ments must be pointers to variables that are updated by the function.
|
ments must be pointers to variables that are updated by the function.
|
||||||
After it has run, they point to the first and last entries in the name-
|
After it has run, they point to the first and last entries in the name-
|
||||||
to-number table for the given name, and the function returns the length
|
to-number table for the given name, and the function returns the length
|
||||||
of each entry. In both cases, PCRE2_ERROR_NOSUBSTRING is returned if
|
of each entry. In both cases, PCRE2_ERROR_NOSUBSTRING is returned if
|
||||||
there are no entries for the given name.
|
there are no entries for the given name.
|
||||||
|
|
||||||
The format of the name table is described above in the section entitled
|
The format of the name table is described above in the section entitled
|
||||||
Information about a pattern above. Given all the relevant entries for
|
Information about a pattern above. Given all the relevant entries for
|
||||||
the name, you can extract each of their numbers, and hence the captured
|
the name, you can extract each of their numbers, and hence the captured
|
||||||
data.
|
data.
|
||||||
|
|
||||||
|
|
||||||
FINDING ALL POSSIBLE MATCHES
|
FINDING ALL POSSIBLE MATCHES
|
||||||
|
|
||||||
The traditional matching function uses a similar algorithm to Perl,
|
The traditional matching function uses a similar algorithm to Perl,
|
||||||
which stops when it finds the first match, starting at a given point in
|
which stops when it finds the first match, starting at a given point in
|
||||||
the subject. If you want to find all possible matches, or the longest
|
the subject. If you want to find all possible matches, or the longest
|
||||||
possible match at a given position, consider using the alternative
|
possible match at a given position, consider using the alternative
|
||||||
matching function (see below) instead. If you cannot use the alterna-
|
matching function (see below) instead. If you cannot use the alterna-
|
||||||
tive function, you can kludge it up by making use of the callout facil-
|
tive function, you can kludge it up by making use of the callout facil-
|
||||||
ity, which is described in the pcre2callout documentation.
|
ity, which is described in the pcre2callout documentation.
|
||||||
|
|
||||||
What you have to do is to insert a callout right at the end of the pat-
|
What you have to do is to insert a callout right at the end of the pat-
|
||||||
tern. When your callout function is called, extract and save the cur-
|
tern. When your callout function is called, extract and save the cur-
|
||||||
rent matched substring. Then return 1, which forces pcre2_match() to
|
rent matched substring. Then return 1, which forces pcre2_match() to
|
||||||
backtrack and try other alternatives. Ultimately, when it runs out of
|
backtrack and try other alternatives. Ultimately, when it runs out of
|
||||||
matches, pcre2_match() will yield PCRE2_ERROR_NOMATCH.
|
matches, pcre2_match() will yield PCRE2_ERROR_NOMATCH.
|
||||||
|
|
||||||
|
|
||||||
|
@ -2350,26 +2353,26 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
|
||||||
pcre2_match_context *mcontext,
|
pcre2_match_context *mcontext,
|
||||||
int *workspace, PCRE2_SIZE wscount);
|
int *workspace, PCRE2_SIZE wscount);
|
||||||
|
|
||||||
The function pcre2_dfa_match() is called to match a subject string
|
The function pcre2_dfa_match() is called to match a subject string
|
||||||
against a compiled pattern, using a matching algorithm that scans the
|
against a compiled pattern, using a matching algorithm that scans the
|
||||||
subject string just once, and does not backtrack. This has different
|
subject string just once, and does not backtrack. This has different
|
||||||
characteristics to the normal algorithm, and is not compatible with
|
characteristics to the normal algorithm, and is not compatible with
|
||||||
Perl. Some of the features of PCRE2 patterns are not supported. Never-
|
Perl. Some of the features of PCRE2 patterns are not supported. Never-
|
||||||
theless, there are times when this kind of matching can be useful. For
|
theless, there are times when this kind of matching can be useful. For
|
||||||
a discussion of the two matching algorithms, and a list of features
|
a discussion of the two matching algorithms, and a list of features
|
||||||
that pcre2_dfa_match() does not support, see the pcre2matching documen-
|
that pcre2_dfa_match() does not support, see the pcre2matching documen-
|
||||||
tation.
|
tation.
|
||||||
|
|
||||||
The arguments for the pcre2_dfa_match() function are the same as for
|
The arguments for the pcre2_dfa_match() function are the same as for
|
||||||
pcre2_match(), plus two extras. The ovector within the match data block
|
pcre2_match(), plus two extras. The ovector within the match data block
|
||||||
is used in a different way, and this is described below. The other com-
|
is used in a different way, and this is described below. The other com-
|
||||||
mon arguments are used in the same way as for pcre2_match(), so their
|
mon arguments are used in the same way as for pcre2_match(), so their
|
||||||
description is not repeated here.
|
description is not repeated here.
|
||||||
|
|
||||||
The two additional arguments provide workspace for the function. The
|
The two additional arguments provide workspace for the function. The
|
||||||
workspace vector should contain at least 20 elements. It is used for
|
workspace vector should contain at least 20 elements. It is used for
|
||||||
keeping track of multiple paths through the pattern tree. More
|
keeping track of multiple paths through the pattern tree. More
|
||||||
workspace is needed for patterns and subjects where there are a lot of
|
workspace is needed for patterns and subjects where there are a lot of
|
||||||
potential matches.
|
potential matches.
|
||||||
|
|
||||||
Here is an example of a simple call to pcre2_dfa_match():
|
Here is an example of a simple call to pcre2_dfa_match():
|
||||||
|
@ -2389,45 +2392,45 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
|
||||||
|
|
||||||
Option bits for pcre_dfa_match()
|
Option bits for pcre_dfa_match()
|
||||||
|
|
||||||
The unused bits of the options argument for pcre2_dfa_match() must be
|
The unused bits of the options argument for pcre2_dfa_match() must be
|
||||||
zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_NOTBOL,
|
zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_NOTBOL,
|
||||||
PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
|
PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
|
||||||
PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, PCRE2_PARTIAL_SOFT,
|
PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, PCRE2_PARTIAL_SOFT,
|
||||||
PCRE2_DFA_SHORTEST, and PCRE2_DFA_RESTART. All but the last four of
|
PCRE2_DFA_SHORTEST, and PCRE2_DFA_RESTART. All but the last four of
|
||||||
these are exactly the same as for pcre2_match(), so their description
|
these are exactly the same as for pcre2_match(), so their description
|
||||||
is not repeated here.
|
is not repeated here.
|
||||||
|
|
||||||
PCRE2_PARTIAL_HARD
|
PCRE2_PARTIAL_HARD
|
||||||
PCRE2_PARTIAL_SOFT
|
PCRE2_PARTIAL_SOFT
|
||||||
|
|
||||||
These have the same general effect as they do for pcre2_match(), but
|
These have the same general effect as they do for pcre2_match(), but
|
||||||
the details are slightly different. When PCRE2_PARTIAL_HARD is set for
|
the details are slightly different. When PCRE2_PARTIAL_HARD is set for
|
||||||
pcre2_dfa_match(), it returns PCRE2_ERROR_PARTIAL if the end of the
|
pcre2_dfa_match(), it returns PCRE2_ERROR_PARTIAL if the end of the
|
||||||
subject is reached and there is still at least one matching possibility
|
subject is reached and there is still at least one matching possibility
|
||||||
that requires additional characters. This happens even if some complete
|
that requires additional characters. This happens even if some complete
|
||||||
matches have already been found. When PCRE2_PARTIAL_SOFT is set, the
|
matches have already been found. When PCRE2_PARTIAL_SOFT is set, the
|
||||||
return code PCRE2_ERROR_NOMATCH is converted into PCRE2_ERROR_PARTIAL
|
return code PCRE2_ERROR_NOMATCH is converted into PCRE2_ERROR_PARTIAL
|
||||||
if the end of the subject is reached, there have been no complete
|
if the end of the subject is reached, there have been no complete
|
||||||
matches, but there is still at least one matching possibility. The por-
|
matches, but there is still at least one matching possibility. The por-
|
||||||
tion of the string that was inspected when the longest partial match
|
tion of the string that was inspected when the longest partial match
|
||||||
was found is set as the first matching string in both cases. There is a
|
was found is set as the first matching string in both cases. There is a
|
||||||
more detailed discussion of partial and multi-segment matching, with
|
more detailed discussion of partial and multi-segment matching, with
|
||||||
examples, in the pcre2partial documentation.
|
examples, in the pcre2partial documentation.
|
||||||
|
|
||||||
PCRE2_DFA_SHORTEST
|
PCRE2_DFA_SHORTEST
|
||||||
|
|
||||||
Setting the PCRE2_DFA_SHORTEST option causes the matching algorithm to
|
Setting the PCRE2_DFA_SHORTEST option causes the matching algorithm to
|
||||||
stop as soon as it has found one match. Because of the way the alterna-
|
stop as soon as it has found one match. Because of the way the alterna-
|
||||||
tive algorithm works, this is necessarily the shortest possible match
|
tive algorithm works, this is necessarily the shortest possible match
|
||||||
at the first possible matching point in the subject string.
|
at the first possible matching point in the subject string.
|
||||||
|
|
||||||
PCRE2_DFA_RESTART
|
PCRE2_DFA_RESTART
|
||||||
|
|
||||||
When pcre2_dfa_match() returns a partial match, it is possible to call
|
When pcre2_dfa_match() returns a partial match, it is possible to call
|
||||||
it again, with additional subject characters, and have it continue with
|
it again, with additional subject characters, and have it continue with
|
||||||
the same match. The PCRE2_DFA_RESTART option requests this action; when
|
the same match. The PCRE2_DFA_RESTART option requests this action; when
|
||||||
it is set, the workspace and wscount options must reference the same
|
it is set, the workspace and wscount options must reference the same
|
||||||
vector as before because data about the match so far is left in them
|
vector as before because data about the match so far is left in them
|
||||||
after a partial match. There is more discussion of this facility in the
|
after a partial match. There is more discussion of this facility in the
|
||||||
pcre2partial documentation.
|
pcre2partial documentation.
|
||||||
|
|
||||||
|
@ -2435,8 +2438,8 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
|
||||||
|
|
||||||
When pcre2_dfa_match() succeeds, it may have matched more than one sub-
|
When pcre2_dfa_match() succeeds, it may have matched more than one sub-
|
||||||
string in the subject. Note, however, that all the matches from one run
|
string in the subject. Note, however, that all the matches from one run
|
||||||
of the function start at the same point in the subject. The shorter
|
of the function start at the same point in the subject. The shorter
|
||||||
matches are all initial substrings of the longer matches. For example,
|
matches are all initial substrings of the longer matches. For example,
|
||||||
if the pattern
|
if the pattern
|
||||||
|
|
||||||
<.*>
|
<.*>
|
||||||
|
@ -2451,66 +2454,66 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
|
||||||
<something> <something else>
|
<something> <something else>
|
||||||
<something> <something else> <something further>
|
<something> <something else> <something further>
|
||||||
|
|
||||||
On success, the yield of the function is a number greater than zero,
|
On success, the yield of the function is a number greater than zero,
|
||||||
which is the number of matched substrings. The offsets of the sub-
|
which is the number of matched substrings. The offsets of the sub-
|
||||||
strings are returned in the ovector, and can be extracted in the same
|
strings are returned in the ovector, and can be extracted in the same
|
||||||
way as for pcre2_match(). They are returned in reverse order of
|
way as for pcre2_match(). They are returned in reverse order of
|
||||||
length; that is, the longest matching string is given first. If there
|
length; that is, the longest matching string is given first. If there
|
||||||
were too many matches to fit into the ovector, the yield of the func-
|
were too many matches to fit into the ovector, the yield of the func-
|
||||||
tion is zero, and the vector is filled with the longest matches.
|
tion is zero, and the vector is filled with the longest matches.
|
||||||
|
|
||||||
NOTE: PCRE2's "auto-possessification" optimization usually applies to
|
NOTE: PCRE2's "auto-possessification" optimization usually applies to
|
||||||
character repeats at the end of a pattern (as well as internally). For
|
character repeats at the end of a pattern (as well as internally). For
|
||||||
example, the pattern "a\d+" is compiled as if it were "a\d++" because
|
example, the pattern "a\d+" is compiled as if it were "a\d++" because
|
||||||
there is no point in backtracking into the repeated digits. For DFA
|
there is no point in backtracking into the repeated digits. For DFA
|
||||||
matching, this means that only one possible match is found. If you
|
matching, this means that only one possible match is found. If you
|
||||||
really do want multiple matches in such cases, either use an ungreedy
|
really do want multiple matches in such cases, either use an ungreedy
|
||||||
repeat ("a\d+?") or set the PCRE2_NO_AUTO_POSSESS option when compil-
|
repeat ("a\d+?") or set the PCRE2_NO_AUTO_POSSESS option when compil-
|
||||||
ing.
|
ing.
|
||||||
|
|
||||||
Error returns from pcre2_dfa_match()
|
Error returns from pcre2_dfa_match()
|
||||||
|
|
||||||
The pcre2_dfa_match() function returns a negative number when it fails.
|
The pcre2_dfa_match() function returns a negative number when it fails.
|
||||||
Many of the errors are the same as for pcre2_match(), as described
|
Many of the errors are the same as for pcre2_match(), as described
|
||||||
above. There are in addition the following errors that are specific to
|
above. There are in addition the following errors that are specific to
|
||||||
pcre2_dfa_match():
|
pcre2_dfa_match():
|
||||||
|
|
||||||
PCRE2_ERROR_DFA_UITEM
|
PCRE2_ERROR_DFA_UITEM
|
||||||
|
|
||||||
This return is given if pcre2_dfa_match() encounters an item in the
|
This return is given if pcre2_dfa_match() encounters an item in the
|
||||||
pattern that it does not support, for instance, the use of \C or a back
|
pattern that it does not support, for instance, the use of \C or a back
|
||||||
reference.
|
reference.
|
||||||
|
|
||||||
PCRE2_ERROR_DFA_UCOND
|
PCRE2_ERROR_DFA_UCOND
|
||||||
|
|
||||||
This return is given if pcre2_dfa_match() encounters a condition item
|
This return is given if pcre2_dfa_match() encounters a condition item
|
||||||
that uses a back reference for the condition, or a test for recursion
|
that uses a back reference for the condition, or a test for recursion
|
||||||
in a specific group. These are not supported.
|
in a specific group. These are not supported.
|
||||||
|
|
||||||
PCRE2_ERROR_DFA_WSSIZE
|
PCRE2_ERROR_DFA_WSSIZE
|
||||||
|
|
||||||
This return is given if pcre2_dfa_match() runs out of space in the
|
This return is given if pcre2_dfa_match() runs out of space in the
|
||||||
workspace vector.
|
workspace vector.
|
||||||
|
|
||||||
PCRE2_ERROR_DFA_RECURSE
|
PCRE2_ERROR_DFA_RECURSE
|
||||||
|
|
||||||
When a recursive subpattern is processed, the matching function calls
|
When a recursive subpattern is processed, the matching function calls
|
||||||
itself recursively, using private memory for the ovector and workspace.
|
itself recursively, using private memory for the ovector and workspace.
|
||||||
This error is given if the internal ovector is not large enough. This
|
This error is given if the internal ovector is not large enough. This
|
||||||
should be extremely rare, as a vector of size 1000 is used.
|
should be extremely rare, as a vector of size 1000 is used.
|
||||||
|
|
||||||
PCRE2_ERROR_DFA_BADRESTART
|
PCRE2_ERROR_DFA_BADRESTART
|
||||||
|
|
||||||
When pcre2_dfa_match() is called with the pcre2_dfa_RESTART option,
|
When pcre2_dfa_match() is called with the pcre2_dfa_RESTART option,
|
||||||
some plausibility checks are made on the contents of the workspace,
|
some plausibility checks are made on the contents of the workspace,
|
||||||
which should contain data about the previous partial match. If any of
|
which should contain data about the previous partial match. If any of
|
||||||
these checks fail, this error is given.
|
these checks fail, this error is given.
|
||||||
|
|
||||||
|
|
||||||
SEE ALSO
|
SEE ALSO
|
||||||
|
|
||||||
pcre2build(3), pcre2libs(3), pcre2callout(3), pcre2matching(3),
|
pcre2build(3), pcre2libs(3), pcre2callout(3), pcre2matching(3),
|
||||||
pcre2partial(3), pcre2posix(3), pcre2demo(3), pcre2sample(3),
|
pcre2partial(3), pcre2posix(3), pcre2demo(3), pcre2sample(3),
|
||||||
pcre2stack(3).
|
pcre2stack(3).
|
||||||
|
|
||||||
|
|
||||||
|
@ -2523,7 +2526,7 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 16 October 2014
|
Last updated: 03 November 2014
|
||||||
Copyright (c) 1997-2014 University of Cambridge.
|
Copyright (c) 1997-2014 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@ -3411,8 +3414,6 @@ NAME
|
||||||
|
|
||||||
PCRE2 JUST-IN-TIME COMPILER SUPPORT
|
PCRE2 JUST-IN-TIME COMPILER SUPPORT
|
||||||
|
|
||||||
FIXME: This needs checking over once JIT support is implemented.
|
|
||||||
|
|
||||||
Just-in-time compiling is a heavyweight optimization that can greatly
|
Just-in-time compiling is a heavyweight optimization that can greatly
|
||||||
speed up pattern matching. However, it comes at the cost of extra pro-
|
speed up pattern matching. However, it comes at the cost of extra pro-
|
||||||
cessing before the match is performed. Therefore, it is of most benefit
|
cessing before the match is performed. Therefore, it is of most benefit
|
||||||
|
@ -3462,100 +3463,97 @@ SIMPLE USE OF JIT
|
||||||
second is a set of option bits, which must include at least one of
|
second is a set of option bits, which must include at least one of
|
||||||
PCRE2_JIT_COMPLETE, PCRE2_JIT_PARTIAL_HARD, or PCRE2_JIT_PARTIAL_SOFT.
|
PCRE2_JIT_COMPLETE, PCRE2_JIT_PARTIAL_HARD, or PCRE2_JIT_PARTIAL_SOFT.
|
||||||
|
|
||||||
The returned value from pcre2_jit_compile() is zero on success, or a
|
If JIT support is not available, a call to pcre2_jit_comple() does
|
||||||
negative error code. In particular, PCRE2_ERROR_JIT_BADOPTION is
|
nothing and returns PCRE2_ERROR_JIT_BADOPTION. Otherwise, the compiled
|
||||||
returned if JIT is not supported or if an unknown options bit is set.
|
pattern is passed to the JIT compiler, which turns it into machine code
|
||||||
|
that executes much faster than the normal interpretive code, but yields
|
||||||
|
exactly the same results. The returned value from pcre2_jit_compile()
|
||||||
|
is zero on success, or a negative error code.
|
||||||
|
|
||||||
PCRE2_JIT_COMPLETE requests the JIT compiler to generate code for com-
|
PCRE2_JIT_COMPLETE requests the JIT compiler to generate code for com-
|
||||||
plete matches. If you want to run partial matches using the PCRE2_PAR-
|
plete matches. If you want to run partial matches using the PCRE2_PAR-
|
||||||
TIAL_HARD or PCRE2_PARTIAL_SOFT options of pcre2_match(), you should
|
TIAL_HARD or PCRE2_PARTIAL_SOFT options of pcre2_match(), you should
|
||||||
set one or both of the other options as well as, or instead of
|
set one or both of the other options as well as, or instead of
|
||||||
PCRE2_JIT_COMPLETE. The JIT compiler generates different optimized code
|
PCRE2_JIT_COMPLETE. The JIT compiler generates different optimized code
|
||||||
for each of the three modes (normal, soft partial, hard partial). When
|
for each of the three modes (normal, soft partial, hard partial). When
|
||||||
pcre2_match() is called, the appropriate code is run if it is avail-
|
pcre2_match() is called, the appropriate code is run if it is avail-
|
||||||
able. Otherwise, the pattern is matched using interpretive code.
|
able. Otherwise, the pattern is matched using interpretive code.
|
||||||
|
|
||||||
In some circumstances you may need to call additional functions. These
|
In some circumstances you may need to call additional functions. These
|
||||||
are described in the section entitled "Controlling the JIT stack"
|
are described in the section entitled "Controlling the JIT stack"
|
||||||
below.
|
below.
|
||||||
|
|
||||||
If JIT support is not available, a call to pcre2_jit_comple() does
|
|
||||||
nothing and returns FIXME. Otherwise, the compiled pattern is passed to
|
|
||||||
the JIT compiler, which turns it into machine code that executes much
|
|
||||||
faster than the normal interpretive code, but yields exactly the same
|
|
||||||
results.
|
|
||||||
|
|
||||||
There are some pcre2_match() options that are not supported by JIT, and
|
There are some pcre2_match() options that are not supported by JIT, and
|
||||||
there are also some pattern items that JIT cannot handle. Details are
|
there are also some pattern items that JIT cannot handle. Details are
|
||||||
given below. In both cases, matching automatically falls back to the
|
given below. In both cases, matching automatically falls back to the
|
||||||
interpretive code. If you want to know whether JIT was actually used
|
interpretive code. If you want to know whether JIT was actually used
|
||||||
for a particular match, you should arrange for a JIT callback function
|
for a particular match, you should arrange for a JIT callback function
|
||||||
to be set up as described in the section entitled "Controlling the JIT
|
to be set up as described in the section entitled "Controlling the JIT
|
||||||
stack" below, even if you do not need to supply a non-default JIT
|
stack" below, even if you do not need to supply a non-default JIT
|
||||||
stack. Such a callback function is called whenever JIT code is about to
|
stack. Such a callback function is called whenever JIT code is about to
|
||||||
be obeyed. If the match-time options are not right for JIT execution,
|
be obeyed. If the match-time options are not right for JIT execution,
|
||||||
the callback function is not obeyed.
|
the callback function is not obeyed.
|
||||||
|
|
||||||
If the JIT compiler finds an unsupported item, no JIT data is gener-
|
If the JIT compiler finds an unsupported item, no JIT data is gener-
|
||||||
ated. You can find out if JIT matching is available after compiling a
|
ated. You can find out if JIT matching is available after compiling a
|
||||||
pattern by calling pcre2_pattern_info() with the PCRE2_INFO_JIT option.
|
pattern by calling pcre2_pattern_info() with the PCRE2_INFO_JIT option.
|
||||||
A result of 1 means that JIT compilation was successful. A result of 0
|
A result of 1 means that JIT compilation was successful. A result of 0
|
||||||
means that JIT support is not available, or the pattern was not pro-
|
means that JIT support is not available, or the pattern was not pro-
|
||||||
cessed by pcre2_jit_compile(), or the JIT compiler was not able to han-
|
cessed by pcre2_jit_compile(), or the JIT compiler was not able to han-
|
||||||
dle the pattern.
|
dle the pattern.
|
||||||
|
|
||||||
|
|
||||||
UNSUPPORTED OPTIONS AND PATTERN ITEMS
|
UNSUPPORTED OPTIONS AND PATTERN ITEMS
|
||||||
|
|
||||||
The pcre2_match() options that are supported for JIT matching are
|
The pcre2_match() options that are supported for JIT matching are
|
||||||
PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
|
PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
|
||||||
PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. The
|
PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. The
|
||||||
PCRE2_ANCHORED option is not supported at match time.
|
PCRE2_ANCHORED option is not supported at match time.
|
||||||
|
|
||||||
The only unsupported pattern items are \C (match a single data unit)
|
The only unsupported pattern items are \C (match a single data unit)
|
||||||
when running in a UTF mode, and a callout immediately before an asser-
|
when running in a UTF mode, and a callout immediately before an asser-
|
||||||
tion condition in a conditional group.
|
tion condition in a conditional group.
|
||||||
|
|
||||||
|
|
||||||
RETURN VALUES FROM JIT MATCHING
|
RETURN VALUES FROM JIT MATCHING
|
||||||
|
|
||||||
When a pattern is matched using JIT matching, the return values are the
|
When a pattern is matched using JIT matching, the return values are the
|
||||||
same as those given by the interpretive pcre2_match() code, with the
|
same as those given by the interpretive pcre2_match() code, with the
|
||||||
addition of one new error code: PCRE2_ERROR_JIT_STACKLIMIT. This means
|
addition of one new error code: PCRE2_ERROR_JIT_STACKLIMIT. This means
|
||||||
that the memory used for the JIT stack was insufficient. See "Control-
|
that the memory used for the JIT stack was insufficient. See "Control-
|
||||||
ling the JIT stack" below for a discussion of JIT stack usage.
|
ling the JIT stack" below for a discussion of JIT stack usage.
|
||||||
|
|
||||||
The error code PCRE2_ERROR_MATCHLIMIT is returned by the JIT code if
|
The error code PCRE2_ERROR_MATCHLIMIT is returned by the JIT code if
|
||||||
searching a very large pattern tree goes on for too long, as it is in
|
searching a very large pattern tree goes on for too long, as it is in
|
||||||
the same circumstance when JIT is not used, but the details of exactly
|
the same circumstance when JIT is not used, but the details of exactly
|
||||||
what is counted are not the same. The PCRE2_ERROR_RECURSIONLIMIT error
|
what is counted are not the same. The PCRE2_ERROR_RECURSIONLIMIT error
|
||||||
code is never returned when JIT matching is used.
|
code is never returned when JIT matching is used.
|
||||||
|
|
||||||
|
|
||||||
CONTROLLING THE JIT STACK
|
CONTROLLING THE JIT STACK
|
||||||
|
|
||||||
When the compiled JIT code runs, it needs a block of memory to use as a
|
When the compiled JIT code runs, it needs a block of memory to use as a
|
||||||
stack. By default, it uses 32K on the machine stack. However, some
|
stack. By default, it uses 32K on the machine stack. However, some
|
||||||
large or complicated patterns need more than this. The error
|
large or complicated patterns need more than this. The error
|
||||||
PCRE2_ERROR_JIT_STACKLIMIT is given when there is not enough stack.
|
PCRE2_ERROR_JIT_STACKLIMIT is given when there is not enough stack.
|
||||||
Three functions are provided for managing blocks of memory for use as
|
Three functions are provided for managing blocks of memory for use as
|
||||||
JIT stacks. There is further discussion about the use of JIT stacks in
|
JIT stacks. There is further discussion about the use of JIT stacks in
|
||||||
the section entitled "JIT stack FAQ" below.
|
the section entitled "JIT stack FAQ" below.
|
||||||
|
|
||||||
The pcre2_jit_stack_alloc() function creates a JIT stack. Its arguments
|
The pcre2_jit_stack_create() function creates a JIT stack. Its argu-
|
||||||
are a general context (for memory allocation functions, or NULL for
|
ments are a general context (for memory allocation functions, or NULL
|
||||||
standard memory allocation), a starting size and a maximum size, and it
|
for standard memory allocation), a starting size and a maximum size,
|
||||||
returns a pointer to an opaque structure of type pcre2_jit_stack, or
|
and it returns a pointer to an opaque structure of type
|
||||||
NULL if there is an error. The pcre2_jit_stack_free() function is used
|
pcre2_jit_stack, or NULL if there is an error. The
|
||||||
to free a stack that is no longer needed. (For the technically minded:
|
pcre2_jit_stack_free() function is used to free a stack that is no
|
||||||
the address space is allocated by mmap or VirtualAlloc.) FIXME Is this
|
longer needed. (For the technically minded: the address space is allo-
|
||||||
right?
|
cated by mmap or VirtualAlloc.) FIXME Is this right?
|
||||||
|
|
||||||
JIT uses far less memory for recursion than the interpretive code, and
|
JIT uses far less memory for recursion than the interpretive code, and
|
||||||
a maximum stack size of 512K to 1M should be more than enough for any
|
a maximum stack size of 512K to 1M should be more than enough for any
|
||||||
pattern.
|
pattern.
|
||||||
|
|
||||||
The pcre2_jit_stack_assign() function specifies which stack JIT code
|
The pcre2_jit_stack_assign() function specifies which stack JIT code
|
||||||
should use. Its arguments are as follows:
|
should use. Its arguments are as follows:
|
||||||
|
|
||||||
pcre2_code *code
|
pcre2_code *code
|
||||||
|
@ -3563,21 +3561,22 @@ CONTROLLING THE JIT STACK
|
||||||
void *data
|
void *data
|
||||||
|
|
||||||
The code argument is a pointer to a compiled pattern, after it has been
|
The code argument is a pointer to a compiled pattern, after it has been
|
||||||
processed by pcre2_jit_compile(). There are three cases for the values
|
processed by pcre2_jit_compile(). There are three cases for the values
|
||||||
of the other two options:
|
of the other two options:
|
||||||
|
|
||||||
(1) If callback is NULL and data is NULL, an internal 32K block
|
(1) If callback is NULL and data is NULL, an internal 32K block
|
||||||
on the machine stack is used.
|
on the machine stack is used.
|
||||||
|
|
||||||
(2) If callback is NULL and data is not NULL, data must be
|
(2) If callback is NULL and data is not NULL, data must be
|
||||||
a valid JIT stack, the result of calling pcre2_jit_stack_alloc().
|
a valid JIT stack, the result of calling pcre2_jit_stack_cre-
|
||||||
|
ate().
|
||||||
|
|
||||||
(3) If callback is not NULL, it must point to a function that is
|
(3) If callback is not NULL, it must point to a function that is
|
||||||
called with data as an argument at the start of matching, in
|
called with data as an argument at the start of matching, in
|
||||||
order to set up a JIT stack. If the return from the callback
|
order to set up a JIT stack. If the return from the callback
|
||||||
function is NULL, the internal 32K stack is used; otherwise the
|
function is NULL, the internal 32K stack is used; otherwise the
|
||||||
return value must be a valid JIT stack, the result of calling
|
return value must be a valid JIT stack, the result of calling
|
||||||
pcre2_jit_stack_alloc().
|
pcre2_jit_stack_create().
|
||||||
|
|
||||||
A callback function is obeyed whenever JIT code is about to be run; it
|
A callback function is obeyed whenever JIT code is about to be run; it
|
||||||
is not obeyed when pcre2_match() is called with options that are incom-
|
is not obeyed when pcre2_match() is called with options that are incom-
|
||||||
|
@ -3605,7 +3604,7 @@ CONTROLLING THE JIT STACK
|
||||||
up non-default JIT stacks might operate:
|
up non-default JIT stacks might operate:
|
||||||
|
|
||||||
During thread initalization
|
During thread initalization
|
||||||
thread_local_var = pcre2_jit_stack_alloc(...)
|
thread_local_var = pcre2_jit_stack_create(...)
|
||||||
|
|
||||||
During thread exit
|
During thread exit
|
||||||
pcre2_jit_stack_free(thread_local_var)
|
pcre2_jit_stack_free(thread_local_var)
|
||||||
|
@ -3687,6 +3686,19 @@ JIT STACK FAQ
|
||||||
throw out this complicated API.
|
throw out this complicated API.
|
||||||
|
|
||||||
|
|
||||||
|
FREEING JIT SPECULATIVE MEMORY
|
||||||
|
|
||||||
|
void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext);
|
||||||
|
|
||||||
|
The JIT executable allocator does not free all memory when it is possi-
|
||||||
|
ble. It expects new allocations, and keeps some free memory around to
|
||||||
|
improve allocation speed. However, in low memory conditions, it might
|
||||||
|
be better to free all possible memory. You can cause this to happen by
|
||||||
|
calling pcre2_jit_free_unused_memory(). Its argument is a general con-
|
||||||
|
text, for custom memory management, or NULL for standard memory manage-
|
||||||
|
ment.
|
||||||
|
|
||||||
|
|
||||||
EXAMPLE CODE
|
EXAMPLE CODE
|
||||||
|
|
||||||
This is a single-threaded example that specifies a JIT stack without
|
This is a single-threaded example that specifies a JIT stack without
|
||||||
|
@ -3702,7 +3714,7 @@ EXAMPLE CODE
|
||||||
/* Check for errors */
|
/* Check for errors */
|
||||||
rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
|
rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
|
||||||
/* Check for errors */
|
/* Check for errors */
|
||||||
jit_stack = pcre2_jit_stack_alloc(NULL, 32*1024, 512*1024);
|
jit_stack = pcre2_jit_stack_create(NULL, 32*1024, 512*1024);
|
||||||
/* Check for error (NULL) */
|
/* Check for error (NULL) */
|
||||||
pcre2_jit_stack_assign(re, NULL, jit_stack);
|
pcre2_jit_stack_assign(re, NULL, jit_stack);
|
||||||
match_data = pcre2_match_data_create(re, 10);
|
match_data = pcre2_match_data_create(re, 10);
|
||||||
|
@ -3727,19 +3739,20 @@ JIT FAST PATH API
|
||||||
exactly the same arguments as pcre2_match(), plus one additional argu-
|
exactly the same arguments as pcre2_match(), plus one additional argu-
|
||||||
ment that must either point to a JIT stack or be NULL. In the latter
|
ment that must either point to a JIT stack or be NULL. In the latter
|
||||||
case, if a callback function has been set up by
|
case, if a callback function has been set up by
|
||||||
pcre2_jit_stack_alloc(), it is called. Otherwise the system stack is
|
pcre2_jit_stack_assign(), it is called. Otherwise the system stack is
|
||||||
used. The return values are the same as for pcre2_match(), plus
|
used. The return values are the same as for pcre2_match(), plus
|
||||||
PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial or complete) is
|
PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial or complete) is
|
||||||
requested that was not compiled. Unsupported option bits are ignored.
|
requested that was not compiled. Unsupported option bits (for example,
|
||||||
|
PCRE2_ANCHORED) are ignored.
|
||||||
|
|
||||||
When you call pcre2_match(), as well as testing for invalid options, a
|
When you call pcre2_match(), as well as testing for invalid options, a
|
||||||
number of other sanity checks are performed on the arguments. For exam-
|
number of other sanity checks are performed on the arguments. For exam-
|
||||||
ple, if the subject pointer is NULL, an immediate error is given. Also,
|
ple, if the subject pointer is NULL, an immediate error is given. Also,
|
||||||
unless PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested for
|
unless PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested for
|
||||||
validity. In the interests of speed, these checks do not happen on the
|
validity. In the interests of speed, these checks do not happen on the
|
||||||
JIT fast path, and if invalid data is passed, the result is undefined.
|
JIT fast path, and if invalid data is passed, the result is undefined.
|
||||||
|
|
||||||
Bypassing the sanity checks and the pcre2_match() wrapping can give
|
Bypassing the sanity checks and the pcre2_match() wrapping can give
|
||||||
speedups of more than 10%.
|
speedups of more than 10%.
|
||||||
|
|
||||||
|
|
||||||
|
@ -3757,7 +3770,7 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 21 October 2014
|
Last updated: 03 November 2014
|
||||||
Copyright (c) 1997-2014 University of Cambridge.
|
Copyright (c) 1997-2014 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
|
@ -15,9 +15,9 @@ PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.sp
|
.sp
|
||||||
This function frees unused JIT executable memory. The argument is a general
|
This function frees unused JIT executable memory. The argument is a general
|
||||||
context, for custom memory management, or NULL for standard memory management.
|
context, for custom memory management, or NULL for standard memory management.
|
||||||
JIT memory allocation retains some memory in order to improve future JIT
|
JIT memory allocation retains some memory in order to improve future JIT
|
||||||
compilation speed. In low memory conditions,
|
compilation speed. In low memory conditions,
|
||||||
\fBpcre2_jit_free_unused_memory()\fB can be used to cause this memory to be
|
\fBpcre2_jit_free_unused_memory()\fB can be used to cause this memory to be
|
||||||
freed.
|
freed.
|
||||||
.P
|
.P
|
||||||
There is a complete description of the PCRE2 native API in the
|
There is a complete description of the PCRE2 native API in the
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2_JIT_MATCH 3 "21 October 2014" "PCRE2 10.0"
|
.TH PCRE2_JIT_MATCH 3 "03 November 2014" "PCRE2 10.0"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -26,7 +26,7 @@ Its arguments are exactly the same as for
|
||||||
.\"
|
.\"
|
||||||
plus one additional argument that must either point to a JIT stack or be NULL.
|
plus one additional argument that must either point to a JIT stack or be NULL.
|
||||||
In the latter case, if a callback function has been set up by
|
In the latter case, if a callback function has been set up by
|
||||||
\fBpcre2_jit_stack_alloc()\fP, it is called. Otherwise the system stack is
|
\fBpcre2_jit_stack_create()\fP, it is called. Otherwise the system stack is
|
||||||
used.
|
used.
|
||||||
.P
|
.P
|
||||||
The supported options are PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY,
|
The supported options are PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY,
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2_JIT_STACK_ASSIGN 3 "21 October 2014" "PCRE2 10.0"
|
.TH PCRE2_JIT_STACK_ASSIGN 3 "03 November 2014" "PCRE2 10.0"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -27,12 +27,12 @@ block on the machine stack is used.
|
||||||
.P
|
.P
|
||||||
If \fIcallback\fP is NULL and \fIcallback_data\fP is not NULL,
|
If \fIcallback\fP is NULL and \fIcallback_data\fP is not NULL,
|
||||||
\fIcallback_data\fP must be a valid JIT stack, the result of calling
|
\fIcallback_data\fP must be a valid JIT stack, the result of calling
|
||||||
\fBpcre2_jit_stack_alloc()\fP.
|
\fBpcre2_jit_stack_create()\fP.
|
||||||
.P
|
.P
|
||||||
If \fIcallback\fP not NULL, it is called with \fIcallback_data\fP as an
|
If \fIcallback\fP not NULL, it is called with \fIcallback_data\fP as an
|
||||||
argument at the start of matching, in order to set up a JIT stack. If the
|
argument at the start of matching, in order to set up a JIT stack. If the
|
||||||
result is NULL, the internal 32K stack is used; otherwise the return value must
|
result is NULL, the internal 32K stack is used; otherwise the return value must
|
||||||
be a valid JIT stack, the result of calling \fBpcre2_jit_stack_alloc()\fP.
|
be a valid JIT stack, the result of calling \fBpcre2_jit_stack_create()\fP.
|
||||||
.P
|
.P
|
||||||
You may safely assign the same JIT stack to multiple patterns, as long as they
|
You may safely assign the same JIT stack to multiple patterns, as long as they
|
||||||
are all matched in the same thread. In a multithread application, each thread
|
are all matched in the same thread. In a multithread application, each thread
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2_JIT_STACK_ALLOC 3 "21 October 2014" "PCRE2 10.00"
|
.TH PCRE2_JIT_STACK_CREATE 3 "03 November 2014" "PCRE2 10.00"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -7,7 +7,7 @@ PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.B #include <pcre2.h>
|
.B #include <pcre2.h>
|
||||||
.PP
|
.PP
|
||||||
.nf
|
.nf
|
||||||
.B pcre2_jit_stack *pcre2_jit_stack_alloc(pcre2_general_context *\fIgcontext\fP,
|
.B pcre2_jit_stack *pcre2_jit_stack_create(pcre2_general_context *\fIgcontext\fP,
|
||||||
.B " PCRE2_SIZE \fIstartsize\fP, PCRE2_SIZE \fImaxsize\fP);"
|
.B " PCRE2_SIZE \fIstartsize\fP, PCRE2_SIZE \fImaxsize\fP);"
|
||||||
.fi
|
.fi
|
||||||
.
|
.
|
|
@ -13,7 +13,7 @@ PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
This function is used to free a JIT stack that was created by
|
This function is used to free a JIT stack that was created by
|
||||||
\fBpcre2_jit_stack_alloc()\fP when it is no longer needed. For more details,
|
\fBpcre2_jit_stack_create()\fP when it is no longer needed. For more details,
|
||||||
see the
|
see the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
\fBpcre2jit\fP
|
\fBpcre2jit\fP
|
||||||
|
|
|
@ -15,7 +15,7 @@ PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
This convenience function finds the number of a named substring capturing
|
This convenience function finds the number of a named substring capturing
|
||||||
parenthesis in a compiled pattern, provided that it is a unique name. The
|
parenthesis in a compiled pattern, provided that it is a unique name. The
|
||||||
function arguments are:
|
function arguments are:
|
||||||
.sp
|
.sp
|
||||||
\fIcode\fP Compiled regular expression
|
\fIcode\fP Compiled regular expression
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2API 3 "29 October 2014" "PCRE2 10.00"
|
.TH PCRE2API 3 "03 November 2014" "PCRE2 10.00"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.sp
|
.sp
|
||||||
|
@ -182,7 +182,7 @@ document for an overview of all the PCRE2 documentation.
|
||||||
.sp
|
.sp
|
||||||
.B void pcre2_jit_free_unused_memory(pcre2_general_context *\fIgcontext\fP);
|
.B void pcre2_jit_free_unused_memory(pcre2_general_context *\fIgcontext\fP);
|
||||||
.sp
|
.sp
|
||||||
.B pcre2_jit_stack *pcre2_jit_stack_alloc(pcre2_general_context *\fIgcontext\fP,
|
.B pcre2_jit_stack *pcre2_jit_stack_create(pcre2_general_context *\fIgcontext\fP,
|
||||||
.B " PCRE2_SIZE \fIstartsize\fP, PCRE2_SIZE \fImaxsize\fP);"
|
.B " PCRE2_SIZE \fIstartsize\fP, PCRE2_SIZE \fImaxsize\fP);"
|
||||||
.sp
|
.sp
|
||||||
.B void pcre2_jit_stack_assign(const pcre2_code *\fIcode\fP,
|
.B void pcre2_jit_stack_assign(const pcre2_code *\fIcode\fP,
|
||||||
|
@ -308,7 +308,7 @@ successfully compiled by \fBpcre2_compile()\fP. This does nothing if JIT
|
||||||
support is not available.
|
support is not available.
|
||||||
.P
|
.P
|
||||||
More complicated programs might need to make use of the specialist functions
|
More complicated programs might need to make use of the specialist functions
|
||||||
\fBpcre2_jit_stack_alloc()\fP, \fBpcre2_jit_stack_free()\fP, and
|
\fBpcre2_jit_stack_create()\fP, \fBpcre2_jit_stack_free()\fP, and
|
||||||
\fBpcre2_jit_stack_assign()\fP in order to control the JIT code's memory usage.
|
\fBpcre2_jit_stack_assign()\fP in order to control the JIT code's memory usage.
|
||||||
.P
|
.P
|
||||||
JIT matching is automatically used by \fBpcre2_match()\fP if it is available.
|
JIT matching is automatically used by \fBpcre2_match()\fP if it is available.
|
||||||
|
@ -1265,7 +1265,7 @@ textual error message from any error code.
|
||||||
.sp
|
.sp
|
||||||
.B void pcre2_jit_free_unused_memory(pcre2_general_context *\fIgcontext\fP);
|
.B void pcre2_jit_free_unused_memory(pcre2_general_context *\fIgcontext\fP);
|
||||||
.sp
|
.sp
|
||||||
.B pcre2_jit_stack *pcre2_jit_stack_alloc(pcre2_general_context *\fIgcontext\fP,
|
.B pcre2_jit_stack *pcre2_jit_stack_create(pcre2_general_context *\fIgcontext\fP,
|
||||||
.B " PCRE2_SIZE \fIstartsize\fP, PCRE2_SIZE \fImaxsize\fP);"
|
.B " PCRE2_SIZE \fIstartsize\fP, PCRE2_SIZE \fImaxsize\fP);"
|
||||||
.sp
|
.sp
|
||||||
.B void pcre2_jit_stack_assign(const pcre2_code *\fIcode\fP,
|
.B void pcre2_jit_stack_assign(const pcre2_code *\fIcode\fP,
|
||||||
|
@ -2072,7 +2072,7 @@ match or a partial match, as well as after a successful one.
|
||||||
The code unit offset of the character at which a successful match started is
|
The code unit offset of the character at which a successful match started is
|
||||||
returned by \fBpcre2_get_startchar()\fP. For a non-partial match, this can be
|
returned by \fBpcre2_get_startchar()\fP. For a non-partial match, this can be
|
||||||
different to the value of \fIovector[0]\fP if the pattern contains the \eK
|
different to the value of \fIovector[0]\fP if the pattern contains the \eK
|
||||||
escape sequence. After a partial match, however, this value is always the same
|
escape sequence. After a partial match, however, this value is always the same
|
||||||
as \fIovector[0]\fP because \eK does not affect the result of a partial match.
|
as \fIovector[0]\fP because \eK does not affect the result of a partial match.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
@ -2333,7 +2333,7 @@ unique (PCRE2_DUPNAMES was not set), you can find the number from the name by
|
||||||
calling \fBpcre2_substring_number_from_name()\fP. The first argument is the
|
calling \fBpcre2_substring_number_from_name()\fP. The first argument is the
|
||||||
compiled pattern, and the second is the name. The yield of the function is the
|
compiled pattern, and the second is the name. The yield of the function is the
|
||||||
subpattern number, PCRE2_ERROR_NOSUBSTRING if there is no subpattern of that
|
subpattern number, PCRE2_ERROR_NOSUBSTRING if there is no subpattern of that
|
||||||
name, or PCRE2_ERROR_NOUNIQUESUBSTRING if there is more than one subpattern of
|
name, or PCRE2_ERROR_NOUNIQUESUBSTRING if there is more than one subpattern of
|
||||||
that name.
|
that name.
|
||||||
.P
|
.P
|
||||||
Given the number, you can extract the substring directly, or use one of the
|
Given the number, you can extract the substring directly, or use one of the
|
||||||
|
@ -2631,6 +2631,6 @@ Cambridge CB2 3QH, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 29 October 2014
|
Last updated: 03 November 2014
|
||||||
Copyright (c) 1997-2014 University of Cambridge.
|
Copyright (c) 1997-2014 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2JIT 3 "02 November 2014" "PCRE2 10.00"
|
.TH PCRE2JIT 3 "03 November 2014" "PCRE2 10.00"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.SH "PCRE2 JUST-IN-TIME COMPILER SUPPORT"
|
.SH "PCRE2 JUST-IN-TIME COMPILER SUPPORT"
|
||||||
|
@ -149,7 +149,7 @@ about the use of JIT stacks in the section entitled
|
||||||
.\"
|
.\"
|
||||||
below.
|
below.
|
||||||
.P
|
.P
|
||||||
The \fBpcre2_jit_stack_alloc()\fP function creates a JIT stack. Its arguments
|
The \fBpcre2_jit_stack_create()\fP function creates a JIT stack. Its arguments
|
||||||
are a general context (for memory allocation functions, or NULL for standard
|
are a general context (for memory allocation functions, or NULL for standard
|
||||||
memory allocation), a starting size and a maximum size, and it returns a
|
memory allocation), a starting size and a maximum size, and it returns a
|
||||||
pointer to an opaque structure of type \fBpcre2_jit_stack\fP, or NULL if there
|
pointer to an opaque structure of type \fBpcre2_jit_stack\fP, or NULL if there
|
||||||
|
@ -176,14 +176,14 @@ the other two options:
|
||||||
on the machine stack is used.
|
on the machine stack is used.
|
||||||
.sp
|
.sp
|
||||||
(2) If \fIcallback\fP is NULL and \fIdata\fP is not NULL, \fIdata\fP must be
|
(2) If \fIcallback\fP is NULL and \fIdata\fP is not NULL, \fIdata\fP must be
|
||||||
a valid JIT stack, the result of calling \fBpcre2_jit_stack_alloc()\fP.
|
a valid JIT stack, the result of calling \fBpcre2_jit_stack_create()\fP.
|
||||||
.sp
|
.sp
|
||||||
(3) If \fIcallback\fP is not NULL, it must point to a function that is
|
(3) If \fIcallback\fP is not NULL, it must point to a function that is
|
||||||
called with \fIdata\fP as an argument at the start of matching, in
|
called with \fIdata\fP as an argument at the start of matching, in
|
||||||
order to set up a JIT stack. If the return from the callback
|
order to set up a JIT stack. If the return from the callback
|
||||||
function is NULL, the internal 32K stack is used; otherwise the
|
function is NULL, the internal 32K stack is used; otherwise the
|
||||||
return value must be a valid JIT stack, the result of calling
|
return value must be a valid JIT stack, the result of calling
|
||||||
\fBpcre2_jit_stack_alloc()\fP.
|
\fBpcre2_jit_stack_create()\fP.
|
||||||
.sp
|
.sp
|
||||||
A callback function is obeyed whenever JIT code is about to be run; it is not
|
A callback function is obeyed whenever JIT code is about to be run; it is not
|
||||||
obeyed when \fBpcre2_match()\fP is called with options that are incompatible
|
obeyed when \fBpcre2_match()\fP is called with options that are incompatible
|
||||||
|
@ -209,7 +209,7 @@ This is a suggestion for how a multithreaded program that needs to set up
|
||||||
non-default JIT stacks might operate:
|
non-default JIT stacks might operate:
|
||||||
.sp
|
.sp
|
||||||
During thread initalization
|
During thread initalization
|
||||||
thread_local_var = pcre2_jit_stack_alloc(...)
|
thread_local_var = pcre2_jit_stack_create(...)
|
||||||
.sp
|
.sp
|
||||||
During thread exit
|
During thread exit
|
||||||
pcre2_jit_stack_free(thread_local_var)
|
pcre2_jit_stack_free(thread_local_var)
|
||||||
|
@ -323,7 +323,7 @@ callback.
|
||||||
/* Check for errors */
|
/* Check for errors */
|
||||||
rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
|
rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
|
||||||
/* Check for errors */
|
/* Check for errors */
|
||||||
jit_stack = pcre2_jit_stack_alloc(NULL, 32*1024, 512*1024);
|
jit_stack = pcre2_jit_stack_create(NULL, 32*1024, 512*1024);
|
||||||
/* Check for error (NULL) */
|
/* Check for error (NULL) */
|
||||||
pcre2_jit_stack_assign(re, NULL, jit_stack);
|
pcre2_jit_stack_assign(re, NULL, jit_stack);
|
||||||
match_data = pcre2_match_data_create(re, 10);
|
match_data = pcre2_match_data_create(re, 10);
|
||||||
|
@ -352,7 +352,7 @@ must either point to a JIT stack or be NULL. In the latter case, if a callback
|
||||||
function has been set up by \fBpcre2_jit_stack_assign()\fP, it is called.
|
function has been set up by \fBpcre2_jit_stack_assign()\fP, it is called.
|
||||||
Otherwise the system stack is used. The return values are the same as for
|
Otherwise the system stack is used. The return values are the same as for
|
||||||
\fBpcre2_match()\fP, plus PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial
|
\fBpcre2_match()\fP, plus PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial
|
||||||
or complete) is requested that was not compiled. Unsupported option bits (for
|
or complete) is requested that was not compiled. Unsupported option bits (for
|
||||||
example, PCRE2_ANCHORED) are ignored.
|
example, PCRE2_ANCHORED) are ignored.
|
||||||
.P
|
.P
|
||||||
When you call \fBpcre2_match()\fP, as well as testing for invalid options, a
|
When you call \fBpcre2_match()\fP, as well as testing for invalid options, a
|
||||||
|
@ -386,6 +386,6 @@ Cambridge CB2 3QH, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 02 November 2014
|
Last updated: 03 November 2014
|
||||||
Copyright (c) 1997-2014 University of Cambridge.
|
Copyright (c) 1997-2014 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -437,7 +437,7 @@ about the pattern:
|
||||||
/I info show info about compiled pattern
|
/I info show info about compiled pattern
|
||||||
hex pattern is coded in hexadecimal
|
hex pattern is coded in hexadecimal
|
||||||
jit[=<number>] use JIT
|
jit[=<number>] use JIT
|
||||||
jitfast use JIT fast path
|
jitfast use JIT fast path
|
||||||
jitverify verify JIT use
|
jitverify verify JIT use
|
||||||
locale=<name> use this locale
|
locale=<name> use this locale
|
||||||
memory show memory used
|
memory show memory used
|
||||||
|
@ -577,8 +577,8 @@ also output.
|
||||||
.sp
|
.sp
|
||||||
The \fBparens_nest_limit\fP modifier sets a limit on the depth of nested
|
The \fBparens_nest_limit\fP modifier sets a limit on the depth of nested
|
||||||
parentheses in a pattern. Breaching the limit causes a compilation error.
|
parentheses in a pattern. Breaching the limit causes a compilation error.
|
||||||
The default for the library is set when PCRE2 is built, but \fBpcre2test\fP
|
The default for the library is set when PCRE2 is built, but \fBpcre2test\fP
|
||||||
sets its own default of 220, which is required for running the standard test
|
sets its own default of 220, which is required for running the standard test
|
||||||
suite.
|
suite.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
@ -651,7 +651,7 @@ not affect the compilation process.
|
||||||
allusedtext show all consulted text
|
allusedtext show all consulted text
|
||||||
/g global global matching
|
/g global global matching
|
||||||
mark show mark values
|
mark show mark values
|
||||||
startchar show starting character when relevant
|
startchar show starting character when relevant
|
||||||
.sp
|
.sp
|
||||||
These modifiers may not appear in a \fB#pattern\fP command. If you want them as
|
These modifiers may not appear in a \fB#pattern\fP command. If you want them as
|
||||||
defaults, set them in a \fB#subject\fP command.
|
defaults, set them in a \fB#subject\fP command.
|
||||||
|
@ -725,7 +725,7 @@ pattern.
|
||||||
offset=<n> set starting offset
|
offset=<n> set starting offset
|
||||||
ovector=<n> set size of output vector
|
ovector=<n> set size of output vector
|
||||||
recursion_limit=<n> set a recursion limit
|
recursion_limit=<n> set a recursion limit
|
||||||
startchar show startchar when relevant
|
startchar show startchar when relevant
|
||||||
.sp
|
.sp
|
||||||
The effects of these modifiers are described in the following sections.
|
The effects of these modifiers are described in the following sections.
|
||||||
FIXME: Give more examples.
|
FIXME: Give more examples.
|
||||||
|
@ -759,17 +759,17 @@ underneath them. Here is an example:
|
||||||
This shows that the matched string is "abc", with the preceding and following
|
This shows that the matched string is "abc", with the preceding and following
|
||||||
strings "pqr" and "xyz" also consulted during the match.
|
strings "pqr" and "xyz" also consulted during the match.
|
||||||
.P
|
.P
|
||||||
The \fBstartchar\fP modifier requests that the starting character for the match
|
The \fBstartchar\fP modifier requests that the starting character for the match
|
||||||
be indicated, if it is different to the start of the matched string. The only
|
be indicated, if it is different to the start of the matched string. The only
|
||||||
time when this occurs is when \eK has been processed as part of the match. In
|
time when this occurs is when \eK has been processed as part of the match. In
|
||||||
this situation, the output for the matched string is displayed from the
|
this situation, the output for the matched string is displayed from the
|
||||||
starting character instead of from the match point, with circumflex characters
|
starting character instead of from the match point, with circumflex characters
|
||||||
under the earlier characters. For example:
|
under the earlier characters. For example:
|
||||||
.sp
|
.sp
|
||||||
re> /abc\eKxyz/
|
re> /abc\eKxyz/
|
||||||
data> abcxyz\e=startchar
|
data> abcxyz\e=startchar
|
||||||
0: abcxyz
|
0: abcxyz
|
||||||
^^^
|
^^^
|
||||||
.sp
|
.sp
|
||||||
Unlike \fBallusedtext\fP, the \fBstartchar\fP modifier can be used with JIT.
|
Unlike \fBallusedtext\fP, the \fBstartchar\fP modifier can be used with JIT.
|
||||||
However, these two modifiers are mutually exclusive.
|
However, these two modifiers are mutually exclusive.
|
||||||
|
@ -856,7 +856,7 @@ The \fBjitstack\fP modifier provides a way of setting the maximum stack size
|
||||||
that is used by the just-in-time optimization code. It is ignored if JIT
|
that is used by the just-in-time optimization code. It is ignored if JIT
|
||||||
optimization is not being used. The value is a number of kilobytes. Providing a
|
optimization is not being used. The value is a number of kilobytes. Providing a
|
||||||
stack that is larger than the default 32K is necessary only for very
|
stack that is larger than the default 32K is necessary only for very
|
||||||
complicated patterns.
|
complicated patterns.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SS "Setting match and recursion limits"
|
.SS "Setting match and recursion limits"
|
||||||
|
@ -925,9 +925,9 @@ available for storing matching information. The default is 15.
|
||||||
A value of zero is useful when testing the POSIX API because it causes
|
A value of zero is useful when testing the POSIX API because it causes
|
||||||
\fBregexec()\fP to be called with a NULL capture vector. When not testing the
|
\fBregexec()\fP to be called with a NULL capture vector. When not testing the
|
||||||
POSIX API, a value of zero is used to cause
|
POSIX API, a value of zero is used to cause
|
||||||
\fBpcre2_match_data_create_from_pattern\fP to be called, in order to create a
|
\fBpcre2_match_data_create_from_pattern\fP to be called, in order to create a
|
||||||
match block of exactly the right size for the pattern. (It is not possible to
|
match block of exactly the right size for the pattern. (It is not possible to
|
||||||
create a match block with a zero-length ovector; there is always one pair of
|
create a match block with a zero-length ovector; there is always one pair of
|
||||||
offsets.)
|
offsets.)
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
|
|
@ -169,137 +169,138 @@ COMMAND LINE OPTIONS
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
|
|
||||||
If pcre2test is given two filename arguments, it reads from the first
|
If pcre2test is given two filename arguments, it reads from the first
|
||||||
and writes to the second. If it is given only one filename argument, it
|
and writes to the second. If the first name is "-", input is taken from
|
||||||
reads from that file and writes to stdout. Otherwise, it reads from
|
the standard input. If pcre2test is given only one argument, it reads
|
||||||
stdin and writes to stdout, and prompts for each line of input, using
|
from that file and writes to stdout. Otherwise, it reads from stdin and
|
||||||
"re>" to prompt for regular expression patterns, and "data>" to prompt
|
writes to stdout. When the input is a terminal, it prompts for each
|
||||||
for subject lines.
|
line of input, using "re>" to prompt for regular expression patterns,
|
||||||
|
and "data>" to prompt for subject lines.
|
||||||
|
|
||||||
When pcre2test is built, a configuration option can specify that it
|
When pcre2test is built, a configuration option can specify that it
|
||||||
should be linked with the libreadline or libedit library. When this is
|
should be linked with the libreadline or libedit library. When this is
|
||||||
done, if the input is from a terminal, it is read using the readline()
|
done, if the input is from a terminal, it is read using the readline()
|
||||||
function. This provides line-editing and history facilities. The output
|
function. This provides line-editing and history facilities. The output
|
||||||
from the -help option states whether or not readline() will be used.
|
from the -help option states whether or not readline() will be used.
|
||||||
|
|
||||||
The program handles any number of tests, each of which consists of a
|
The program handles any number of tests, each of which consists of a
|
||||||
set of input lines. Each set starts with a regular expression pattern,
|
set of input lines. Each set starts with a regular expression pattern,
|
||||||
followed by any number of subject lines to be matched against that pat-
|
followed by any number of subject lines to be matched against that pat-
|
||||||
tern. In between sets of test data, command lines that begin with a
|
tern. In between sets of test data, command lines that begin with a
|
||||||
hash (#) character may appear. This file format, with some restric-
|
hash (#) character may appear. This file format, with some restric-
|
||||||
tions, can also be processed by the perltest.pl script that is distrib-
|
tions, can also be processed by the perltest.pl script that is distrib-
|
||||||
uted with PCRE2 as a means of checking that the behaviour of PCRE2 and
|
uted with PCRE2 as a means of checking that the behaviour of PCRE2 and
|
||||||
Perl is the same.
|
Perl is the same.
|
||||||
|
|
||||||
Each subject line is matched separately and independently. If you want
|
Each subject line is matched separately and independently. If you want
|
||||||
to do multi-line matches, you have to use the \n escape sequence (or \r
|
to do multi-line matches, you have to use the \n escape sequence (or \r
|
||||||
or \r\n, etc., depending on the newline setting) in a single line of
|
or \r\n, etc., depending on the newline setting) in a single line of
|
||||||
input to encode the newline sequences. There is no limit on the length
|
input to encode the newline sequences. There is no limit on the length
|
||||||
of subject lines; the input buffer is automatically extended if it is
|
of subject lines; the input buffer is automatically extended if it is
|
||||||
too small. There is a replication feature that makes it possible to
|
too small. There is a replication feature that makes it possible to
|
||||||
generate long subject lines without having to supply them explicitly.
|
generate long subject lines without having to supply them explicitly.
|
||||||
|
|
||||||
An empty line or the end of the file signals the end of the subject
|
An empty line or the end of the file signals the end of the subject
|
||||||
lines for a test, at which point a new pattern or command line is
|
lines for a test, at which point a new pattern or command line is
|
||||||
expected if there is still input to be read.
|
expected if there is still input to be read.
|
||||||
|
|
||||||
|
|
||||||
COMMAND LINES
|
COMMAND LINES
|
||||||
|
|
||||||
In between sets of test data, a line that begins with a hash (#) char-
|
In between sets of test data, a line that begins with a hash (#) char-
|
||||||
acter is interpreted as a command line. If the first character is fol-
|
acter is interpreted as a command line. If the first character is fol-
|
||||||
lowed by white space or an exclamation mark, the line is treated as a
|
lowed by white space or an exclamation mark, the line is treated as a
|
||||||
comment, and ignored. Otherwise, the following commands are recog-
|
comment, and ignored. Otherwise, the following commands are recog-
|
||||||
nized:
|
nized:
|
||||||
|
|
||||||
#forbid_utf
|
#forbid_utf
|
||||||
|
|
||||||
Subsequent patterns automatically have the PCRE2_NEVER_UTF and
|
Subsequent patterns automatically have the PCRE2_NEVER_UTF and
|
||||||
PCRE2_NEVER_UCP options set, which locks out the use of UTF and Unicode
|
PCRE2_NEVER_UCP options set, which locks out the use of UTF and Unicode
|
||||||
property features. This is a trigger guard that is used in test files
|
property features. This is a trigger guard that is used in test files
|
||||||
to ensure that UTF/Unicode tests are not accidentally added to files
|
to ensure that UTF/Unicode tests are not accidentally added to files
|
||||||
that are used when UTF support is not included in the library. This
|
that are used when UTF support is not included in the library. This
|
||||||
effect can also be obtained by the use of #pattern; the difference is
|
effect can also be obtained by the use of #pattern; the difference is
|
||||||
that #forbid_utf cannot be unset, and the automatic options are not
|
that #forbid_utf cannot be unset, and the automatic options are not
|
||||||
displayed in pattern information, to avoid cluttering up test output.
|
displayed in pattern information, to avoid cluttering up test output.
|
||||||
|
|
||||||
#pattern <modifier-list>
|
#pattern <modifier-list>
|
||||||
|
|
||||||
This command sets a default modifier list that applies to all subse-
|
This command sets a default modifier list that applies to all subse-
|
||||||
quent patterns. Modifiers on a pattern can change these settings.
|
quent patterns. Modifiers on a pattern can change these settings.
|
||||||
|
|
||||||
#perltest
|
#perltest
|
||||||
|
|
||||||
The appearance of this line causes all subsequent modifier settings to
|
The appearance of this line causes all subsequent modifier settings to
|
||||||
be checked for compatibility with the perltest.pl script, which is used
|
be checked for compatibility with the perltest.pl script, which is used
|
||||||
to confirm that Perl gives the same results as PCRE2. Also, apart from
|
to confirm that Perl gives the same results as PCRE2. Also, apart from
|
||||||
comment lines, none of the other command lines are permitted, because
|
comment lines, none of the other command lines are permitted, because
|
||||||
they and many of the modifiers are specific to pcre2test, and should
|
they and many of the modifiers are specific to pcre2test, and should
|
||||||
not be used in test files that are also processed by perltest.pl. The
|
not be used in test files that are also processed by perltest.pl. The
|
||||||
#perltest command helps detect tests that are accidentally put in the
|
#perltest command helps detect tests that are accidentally put in the
|
||||||
wrong file.
|
wrong file.
|
||||||
|
|
||||||
#subject <modifier-list>
|
#subject <modifier-list>
|
||||||
|
|
||||||
This command sets a default modifier list that applies to all subse-
|
This command sets a default modifier list that applies to all subse-
|
||||||
quent subject lines. Modifiers on a subject line can change these set-
|
quent subject lines. Modifiers on a subject line can change these set-
|
||||||
tings.
|
tings.
|
||||||
|
|
||||||
|
|
||||||
MODIFIER SYNTAX
|
MODIFIER SYNTAX
|
||||||
|
|
||||||
Modifier lists are used with both pattern and subject lines. Items in a
|
Modifier lists are used with both pattern and subject lines. Items in a
|
||||||
list are separated by commas and optional white space. Some modifiers
|
list are separated by commas and optional white space. Some modifiers
|
||||||
may be given for both patterns and subject lines, whereas others are
|
may be given for both patterns and subject lines, whereas others are
|
||||||
valid for one or the other only. Each modifier has a long name, for
|
valid for one or the other only. Each modifier has a long name, for
|
||||||
example "anchored", and some of them must be followed by an equals sign
|
example "anchored", and some of them must be followed by an equals sign
|
||||||
and a value, for example, "offset=12". Modifiers that do not take val-
|
and a value, for example, "offset=12". Modifiers that do not take val-
|
||||||
ues may be preceded by a minus sign to turn off a previous default set-
|
ues may be preceded by a minus sign to turn off a previous default set-
|
||||||
ting.
|
ting.
|
||||||
|
|
||||||
A few of the more common modifiers can also be specified as single let-
|
A few of the more common modifiers can also be specified as single let-
|
||||||
ters, for example "i" for "caseless". In documentation, following the
|
ters, for example "i" for "caseless". In documentation, following the
|
||||||
Perl convention, these are written with a slash ("the /i modifier") for
|
Perl convention, these are written with a slash ("the /i modifier") for
|
||||||
clarity. Abbreviated modifiers must all be concatenated in the first
|
clarity. Abbreviated modifiers must all be concatenated in the first
|
||||||
item of a modifier list. If the first item is not recognized as a long
|
item of a modifier list. If the first item is not recognized as a long
|
||||||
modifier name, it is interpreted as a sequence of these abbreviations.
|
modifier name, it is interpreted as a sequence of these abbreviations.
|
||||||
For example:
|
For example:
|
||||||
|
|
||||||
/abc/ig,newline=cr,jit=3
|
/abc/ig,newline=cr,jit=3
|
||||||
|
|
||||||
This is a pattern line whose modifier list starts with two one-letter
|
This is a pattern line whose modifier list starts with two one-letter
|
||||||
modifiers (/i and /g). The lower-case abbreviated modifiers are the
|
modifiers (/i and /g). The lower-case abbreviated modifiers are the
|
||||||
same as used in Perl.
|
same as used in Perl.
|
||||||
|
|
||||||
|
|
||||||
PATTERN SYNTAX
|
PATTERN SYNTAX
|
||||||
|
|
||||||
A pattern line must start with one of the following characters (common
|
A pattern line must start with one of the following characters (common
|
||||||
symbols, excluding pattern meta-characters):
|
symbols, excluding pattern meta-characters):
|
||||||
|
|
||||||
/ ! " ' ` - = _ : ; , % & @ ~
|
/ ! " ' ` - = _ : ; , % & @ ~
|
||||||
|
|
||||||
This is interpreted as the pattern's delimiter. A regular expression
|
This is interpreted as the pattern's delimiter. A regular expression
|
||||||
may be continued over several input lines, in which case the newline
|
may be continued over several input lines, in which case the newline
|
||||||
characters are included within it. It is possible to include the delim-
|
characters are included within it. It is possible to include the delim-
|
||||||
iter within the pattern by escaping it with a backslash, for example
|
iter within the pattern by escaping it with a backslash, for example
|
||||||
|
|
||||||
/abc\/def/
|
/abc\/def/
|
||||||
|
|
||||||
If you do this, the escape and the delimiter form part of the pattern,
|
If you do this, the escape and the delimiter form part of the pattern,
|
||||||
but since the delimiters are all non-alphanumeric, this does not affect
|
but since the delimiters are all non-alphanumeric, this does not affect
|
||||||
its interpretation. If the terminating delimiter is immediately fol-
|
its interpretation. If the terminating delimiter is immediately fol-
|
||||||
lowed by a backslash, for example,
|
lowed by a backslash, for example,
|
||||||
|
|
||||||
/abc/\
|
/abc/\
|
||||||
|
|
||||||
then a backslash is added to the end of the pattern. This is done to
|
then a backslash is added to the end of the pattern. This is done to
|
||||||
provide a way of testing the error condition that arises if a pattern
|
provide a way of testing the error condition that arises if a pattern
|
||||||
finishes with a backslash, because
|
finishes with a backslash, because
|
||||||
|
|
||||||
/abc\/
|
/abc\/
|
||||||
|
|
||||||
is interpreted as the first line of a pattern that starts with "abc/",
|
is interpreted as the first line of a pattern that starts with "abc/",
|
||||||
causing pcre2test to read the next line as a continuation of the regu-
|
causing pcre2test to read the next line as a continuation of the regu-
|
||||||
lar expression.
|
lar expression.
|
||||||
|
|
||||||
A pattern can be followed by a modifier list (details below).
|
A pattern can be followed by a modifier list (details below).
|
||||||
|
@ -307,7 +308,7 @@ PATTERN SYNTAX
|
||||||
|
|
||||||
SUBJECT LINE SYNTAX
|
SUBJECT LINE SYNTAX
|
||||||
|
|
||||||
Before each subject line is passed to pcre2_match() or
|
Before each subject line is passed to pcre2_match() or
|
||||||
pcre2_dfa_match(), leading and trailing white space is removed, and the
|
pcre2_dfa_match(), leading and trailing white space is removed, and the
|
||||||
line is scanned for backslash escapes. The following provide a means of
|
line is scanned for backslash escapes. The following provide a means of
|
||||||
encoding non-printing characters in a visible way:
|
encoding non-printing characters in a visible way:
|
||||||
|
@ -327,23 +328,23 @@ SUBJECT LINE SYNTAX
|
||||||
\x{hh...} hexadecimal character (any number of hex digits)
|
\x{hh...} hexadecimal character (any number of hex digits)
|
||||||
|
|
||||||
The use of \x{hh...} is not dependent on the use of the utf modifier on
|
The use of \x{hh...} is not dependent on the use of the utf modifier on
|
||||||
the pattern. It is recognized always. There may be any number of hexa-
|
the pattern. It is recognized always. There may be any number of hexa-
|
||||||
decimal digits inside the braces; invalid values provoke error mes-
|
decimal digits inside the braces; invalid values provoke error mes-
|
||||||
sages.
|
sages.
|
||||||
|
|
||||||
Note that \xhh specifies one byte rather than one character in UTF-8
|
Note that \xhh specifies one byte rather than one character in UTF-8
|
||||||
mode; this makes it possible to construct invalid UTF-8 sequences for
|
mode; this makes it possible to construct invalid UTF-8 sequences for
|
||||||
testing purposes. On the other hand, \x{hh} is interpreted as a UTF-8
|
testing purposes. On the other hand, \x{hh} is interpreted as a UTF-8
|
||||||
character in UTF-8 mode, generating more than one byte if the value is
|
character in UTF-8 mode, generating more than one byte if the value is
|
||||||
greater than 127. When testing the 8-bit library not in UTF-8 mode,
|
greater than 127. When testing the 8-bit library not in UTF-8 mode,
|
||||||
\x{hh} generates one byte for values less than 256, and causes an error
|
\x{hh} generates one byte for values less than 256, and causes an error
|
||||||
for greater values.
|
for greater values.
|
||||||
|
|
||||||
In UTF-16 mode, all 4-digit \x{hhhh} values are accepted. This makes it
|
In UTF-16 mode, all 4-digit \x{hhhh} values are accepted. This makes it
|
||||||
possible to construct invalid UTF-16 sequences for testing purposes.
|
possible to construct invalid UTF-16 sequences for testing purposes.
|
||||||
|
|
||||||
In UTF-32 mode, all 4- to 8-digit \x{...} values are accepted. This
|
In UTF-32 mode, all 4- to 8-digit \x{...} values are accepted. This
|
||||||
makes it possible to construct invalid UTF-32 sequences for testing
|
makes it possible to construct invalid UTF-32 sequences for testing
|
||||||
purposes.
|
purposes.
|
||||||
|
|
||||||
There is a special backslash sequence that specifies replication of one
|
There is a special backslash sequence that specifies replication of one
|
||||||
|
@ -351,38 +352,38 @@ SUBJECT LINE SYNTAX
|
||||||
|
|
||||||
\[<characters>]{<count>}
|
\[<characters>]{<count>}
|
||||||
|
|
||||||
This makes it possible to test long strings without having to provide
|
This makes it possible to test long strings without having to provide
|
||||||
them as part of the file. For example:
|
them as part of the file. For example:
|
||||||
|
|
||||||
\[abc]{4}
|
\[abc]{4}
|
||||||
|
|
||||||
is converted to "abcabcabcabc". This feature does not support nesting.
|
is converted to "abcabcabcabc". This feature does not support nesting.
|
||||||
To include a closing square bracket in the characters, code it as \x5D.
|
To include a closing square bracket in the characters, code it as \x5D.
|
||||||
|
|
||||||
A backslash followed by an equals sign marke the end of the subject
|
A backslash followed by an equals sign marke the end of the subject
|
||||||
string and the start of a modifier list. For example:
|
string and the start of a modifier list. For example:
|
||||||
|
|
||||||
abc\=notbol,notempty
|
abc\=notbol,notempty
|
||||||
|
|
||||||
A backslash followed by any other non-alphanumeric character just
|
A backslash followed by any other non-alphanumeric character just
|
||||||
escapes that character. A backslash followed by anything else causes an
|
escapes that character. A backslash followed by anything else causes an
|
||||||
error. However, if the very last character in the line is a backslash
|
error. However, if the very last character in the line is a backslash
|
||||||
(and there is no modifier list), it is ignored. This gives a way of
|
(and there is no modifier list), it is ignored. This gives a way of
|
||||||
passing an empty line as data, since a real empty line terminates the
|
passing an empty line as data, since a real empty line terminates the
|
||||||
data input.
|
data input.
|
||||||
|
|
||||||
|
|
||||||
PATTERN MODIFIERS
|
PATTERN MODIFIERS
|
||||||
|
|
||||||
There are three types of modifier that can appear in pattern lines, two
|
There are three types of modifier that can appear in pattern lines, two
|
||||||
of which may also be used in a #pattern command. A pattern's modifier
|
of which may also be used in a #pattern command. A pattern's modifier
|
||||||
list can add to or override default modifiers that were set by a previ-
|
list can add to or override default modifiers that were set by a previ-
|
||||||
ous #pattern command.
|
ous #pattern command.
|
||||||
|
|
||||||
Setting compilation options
|
Setting compilation options
|
||||||
|
|
||||||
The following modifiers set options for pcre2_compile(). The most com-
|
The following modifiers set options for pcre2_compile(). The most com-
|
||||||
mon ones have single-letter abbreviations. See pcreapi for a descrip-
|
mon ones have single-letter abbreviations. See pcreapi for a descrip-
|
||||||
tion of their effects.
|
tion of their effects.
|
||||||
|
|
||||||
allow_empty_class set PCRE2_ALLOW_EMPTY_CLASS
|
allow_empty_class set PCRE2_ALLOW_EMPTY_CLASS
|
||||||
|
@ -408,13 +409,13 @@ PATTERN MODIFIERS
|
||||||
utf set PCRE2_UTF
|
utf set PCRE2_UTF
|
||||||
|
|
||||||
As well as turning on the PCRE2_UTF option, the utf modifier causes all
|
As well as turning on the PCRE2_UTF option, the utf modifier causes all
|
||||||
non-printing characters in output strings to be printed using the
|
non-printing characters in output strings to be printed using the
|
||||||
\x{hh...} notation. Otherwise, those less than 0x100 are output in hex
|
\x{hh...} notation. Otherwise, those less than 0x100 are output in hex
|
||||||
without the curly brackets.
|
without the curly brackets.
|
||||||
|
|
||||||
Setting compilation controls
|
Setting compilation controls
|
||||||
|
|
||||||
The following modifiers affect the compilation process or request
|
The following modifiers affect the compilation process or request
|
||||||
information about the pattern:
|
information about the pattern:
|
||||||
|
|
||||||
bsr=[anycrlf|unicode] specify \R handling
|
bsr=[anycrlf|unicode] specify \R handling
|
||||||
|
@ -424,6 +425,7 @@ PATTERN MODIFIERS
|
||||||
/I info show info about compiled pattern
|
/I info show info about compiled pattern
|
||||||
hex pattern is coded in hexadecimal
|
hex pattern is coded in hexadecimal
|
||||||
jit[=<number>] use JIT
|
jit[=<number>] use JIT
|
||||||
|
jitfast use JIT fast path
|
||||||
jitverify verify JIT use
|
jitverify verify JIT use
|
||||||
locale=<name> use this locale
|
locale=<name> use this locale
|
||||||
memory show memory used
|
memory show memory used
|
||||||
|
@ -440,55 +442,55 @@ PATTERN MODIFIERS
|
||||||
|
|
||||||
Newline and \R handling
|
Newline and \R handling
|
||||||
|
|
||||||
The bsr modifier specifies what \R in a pattern should match. If it is
|
The bsr modifier specifies what \R in a pattern should match. If it is
|
||||||
set to "anycrlf", \R matches CR, LF, or CRLF only. If it is set to
|
set to "anycrlf", \R matches CR, LF, or CRLF only. If it is set to
|
||||||
"unicode", \R matches any Unicode newline sequence. The default is
|
"unicode", \R matches any Unicode newline sequence. The default is
|
||||||
specified when PCRE2 is built, with the default default being Unicode.
|
specified when PCRE2 is built, with the default default being Unicode.
|
||||||
|
|
||||||
The newline modifier specifies which characters are to be interpreted
|
The newline modifier specifies which characters are to be interpreted
|
||||||
as newlines, both in the pattern and (by default) in subject lines. The
|
as newlines, both in the pattern and (by default) in subject lines. The
|
||||||
type must be one of CR, LF, CRLF, ANYCRLF, or ANY.
|
type must be one of CR, LF, CRLF, ANYCRLF, or ANY.
|
||||||
|
|
||||||
Information about a pattern
|
Information about a pattern
|
||||||
|
|
||||||
The debug modifier is a shorthand for info,fullbincode, requesting all
|
The debug modifier is a shorthand for info,fullbincode, requesting all
|
||||||
available information.
|
available information.
|
||||||
|
|
||||||
The bincode modifier causes a representation of the compiled code to be
|
The bincode modifier causes a representation of the compiled code to be
|
||||||
output after compilation. This information does not contain length and
|
output after compilation. This information does not contain length and
|
||||||
offset values, which ensures that the same output is generated for dif-
|
offset values, which ensures that the same output is generated for dif-
|
||||||
ferent internal link sizes and different code unit widths. By using
|
ferent internal link sizes and different code unit widths. By using
|
||||||
bincode, the same regression tests can be used in different environ-
|
bincode, the same regression tests can be used in different environ-
|
||||||
ments.
|
ments.
|
||||||
|
|
||||||
The fullbincode modifier, by contrast, does include length and offset
|
The fullbincode modifier, by contrast, does include length and offset
|
||||||
values. This is used in a few special tests and is also useful for one-
|
values. This is used in a few special tests and is also useful for one-
|
||||||
off tests.
|
off tests.
|
||||||
|
|
||||||
The info modifier requests information about the compiled pattern
|
The info modifier requests information about the compiled pattern
|
||||||
(whether it is anchored, has a fixed first character, and so on). The
|
(whether it is anchored, has a fixed first character, and so on). The
|
||||||
information is obtained from the pcre2_pattern_info() function.
|
information is obtained from the pcre2_pattern_info() function.
|
||||||
|
|
||||||
Specifying a pattern in hex
|
Specifying a pattern in hex
|
||||||
|
|
||||||
The hex modifier specifies that the characters of the pattern are to be
|
The hex modifier specifies that the characters of the pattern are to be
|
||||||
interpreted as pairs of hexadecimal digits. White space is permitted
|
interpreted as pairs of hexadecimal digits. White space is permitted
|
||||||
between pairs. For example:
|
between pairs. For example:
|
||||||
|
|
||||||
/ab 32 59/hex
|
/ab 32 59/hex
|
||||||
|
|
||||||
This feature is provided as a way of creating patterns that contain
|
This feature is provided as a way of creating patterns that contain
|
||||||
binary zero characters. When hex is set, it implies use_length.
|
binary zero characters. When hex is set, it implies use_length.
|
||||||
|
|
||||||
Using the pattern's length
|
Using the pattern's length
|
||||||
|
|
||||||
By default, pcre2test passes patterns as zero-terminated strings to
|
By default, pcre2test passes patterns as zero-terminated strings to
|
||||||
pcre2_compile(), giving the length as -1. If use_length is set, the
|
pcre2_compile(), giving the length as -1. If use_length is set, the
|
||||||
length of the pattern is passed. This is implied if hex is set.
|
length of the pattern is passed. This is implied if hex is set.
|
||||||
|
|
||||||
JIT compilation
|
JIT compilation
|
||||||
|
|
||||||
The /jit modifier may optionally be followed by and equals sign and a
|
The /jit modifier may optionally be followed by and equals sign and a
|
||||||
number in the range 0 to 7:
|
number in the range 0 to 7:
|
||||||
|
|
||||||
0 disable JIT
|
0 disable JIT
|
||||||
|
@ -499,17 +501,23 @@ PATTERN MODIFIERS
|
||||||
6 use JIT for soft and hard partial match
|
6 use JIT for soft and hard partial match
|
||||||
7 all three modes
|
7 all three modes
|
||||||
|
|
||||||
If no number is given, 7 is assumed. If JIT compilation is successful,
|
If no number is given, 7 is assumed. If JIT compilation is successful,
|
||||||
the compiled JIT code will automatically be used when pcre2_match() is
|
the compiled JIT code will automatically be used when pcre2_match() is
|
||||||
run for the appropriate type of match, except when incompatible run-
|
run for the appropriate type of match, except when incompatible run-
|
||||||
time options are specified. For more details, see the pcre2jit documen-
|
time options are specified. For more details, see the pcre2jit documen-
|
||||||
tation. See also the jitstack modifier below for a way of setting the
|
tation. See also the jitstack modifier below for a way of setting the
|
||||||
size of the JIT stack.
|
size of the JIT stack.
|
||||||
|
|
||||||
If the jitverify modifier is specified, information about the compiled
|
If the jitfast modifier is specified, matching is done using the JIT
|
||||||
pattern shows whether JIT compilation was or was not successful. If
|
"fast path" interface (pcre2_jit_match()), which skips some of the san-
|
||||||
jitverify is specified without jit, jit=7 is assumed. If JIT compila-
|
ity checks that are done by pcre2_match(), and of course does not work
|
||||||
tion is successful when jitverify is set, the text "(JIT)" is added to
|
when JIT is not supported. If jitfast is specified without jit, jit=7
|
||||||
|
is assumed.
|
||||||
|
|
||||||
|
If the jitverify modifier is specified, information about the compiled
|
||||||
|
pattern shows whether JIT compilation was or was not successful. If
|
||||||
|
jitverify is specified without jit, jit=7 is assumed. If JIT compila-
|
||||||
|
tion is successful when jitverify is set, the text "(JIT)" is added to
|
||||||
the first output line after a match or non match when JIT-compiled code
|
the first output line after a match or non match when JIT-compiled code
|
||||||
was actually used.
|
was actually used.
|
||||||
|
|
||||||
|
@ -520,31 +528,33 @@ PATTERN MODIFIERS
|
||||||
/pattern/locale=fr_FR
|
/pattern/locale=fr_FR
|
||||||
|
|
||||||
The given locale is set, pcre2_maketables() is called to build a set of
|
The given locale is set, pcre2_maketables() is called to build a set of
|
||||||
character tables for the locale, and this is then passed to pcre2_com-
|
character tables for the locale, and this is then passed to pcre2_com-
|
||||||
pile() when compiling the regular expression. The same tables are used
|
pile() when compiling the regular expression. The same tables are used
|
||||||
when matching the following subject lines. The /locale modifier applies
|
when matching the following subject lines. The /locale modifier applies
|
||||||
only to the pattern on which it appears, but can be given in a #pattern
|
only to the pattern on which it appears, but can be given in a #pattern
|
||||||
command if a default is needed. Setting a locale and alternate charac-
|
command if a default is needed. Setting a locale and alternate charac-
|
||||||
ter tables are mutually exclusive.
|
ter tables are mutually exclusive.
|
||||||
|
|
||||||
Showing pattern memory
|
Showing pattern memory
|
||||||
|
|
||||||
The /memory modifier causes the size in bytes of the memory block used
|
The /memory modifier causes the size in bytes of the memory block used
|
||||||
to hold the compiled pattern to be output. This does not include the
|
to hold the compiled pattern to be output. This does not include the
|
||||||
size of the pcre2_code block; it is just the actual compiled data. If
|
size of the pcre2_code block; it is just the actual compiled data. If
|
||||||
the pattern is subsequently passed to the JIT compiler, the size of the
|
the pattern is subsequently passed to the JIT compiler, the size of the
|
||||||
JIT compiled code is also output.
|
JIT compiled code is also output.
|
||||||
|
|
||||||
Limiting nested parentheses
|
Limiting nested parentheses
|
||||||
|
|
||||||
The parens_nest_limit modifier sets a limit on the depth of nested
|
The parens_nest_limit modifier sets a limit on the depth of nested
|
||||||
parentheses in a pattern. Breaching the limit causes a compilation
|
parentheses in a pattern. Breaching the limit causes a compilation
|
||||||
error.
|
error. The default for the library is set when PCRE2 is built, but
|
||||||
|
pcre2test sets its own default of 220, which is required for running
|
||||||
|
the standard test suite.
|
||||||
|
|
||||||
Using the POSIX wrapper API
|
Using the POSIX wrapper API
|
||||||
|
|
||||||
The /posix modifier causes pcre2test to call PCRE2 via the POSIX wrap-
|
The /posix modifier causes pcre2test to call PCRE2 via the POSIX wrap-
|
||||||
per API rather than its native API. This supports only the 8-bit
|
per API rather than its native API. This supports only the 8-bit
|
||||||
library. When the POSIX API is being used, the following pattern modi-
|
library. When the POSIX API is being used, the following pattern modi-
|
||||||
fiers set options for the regcomp() function:
|
fiers set options for the regcomp() function:
|
||||||
|
|
||||||
|
@ -556,25 +566,25 @@ PATTERN MODIFIERS
|
||||||
ucp REG_UCP ) the POSIX standard
|
ucp REG_UCP ) the POSIX standard
|
||||||
utf REG_UTF8 )
|
utf REG_UTF8 )
|
||||||
|
|
||||||
The aftertext and allaftertext subject modifiers work as described
|
The aftertext and allaftertext subject modifiers work as described
|
||||||
below. All other modifiers cause an error.
|
below. All other modifiers cause an error.
|
||||||
|
|
||||||
Testing the stack guard feature
|
Testing the stack guard feature
|
||||||
|
|
||||||
The /stackguard modifier is used to test the use of pcre2_set_com-
|
The /stackguard modifier is used to test the use of pcre2_set_com-
|
||||||
pile_recursion_guard(), a function that is provided to enable stack
|
pile_recursion_guard(), a function that is provided to enable stack
|
||||||
availability to be checked during compilation (see the pcre2api docu-
|
availability to be checked during compilation (see the pcre2api docu-
|
||||||
mentation for details). If the number specified by the modifier is
|
mentation for details). If the number specified by the modifier is
|
||||||
greater than zero, pcre2_set_compile_recursion_guard() is called to set
|
greater than zero, pcre2_set_compile_recursion_guard() is called to set
|
||||||
up callback from pcre2_compile() to a local function. The argument it
|
up callback from pcre2_compile() to a local function. The argument it
|
||||||
is passed is the current nesting parenthesis depth; if this is greater
|
is passed is the current nesting parenthesis depth; if this is greater
|
||||||
than the value given by the modifier, non-zero is returned, causing the
|
than the value given by the modifier, non-zero is returned, causing the
|
||||||
compilation to be aborted.
|
compilation to be aborted.
|
||||||
|
|
||||||
Using alternative character tables
|
Using alternative character tables
|
||||||
|
|
||||||
The /tables modifier must be followed by a single digit. It causes a
|
The /tables modifier must be followed by a single digit. It causes a
|
||||||
specific set of built-in character tables to be passed to pcre2_com-
|
specific set of built-in character tables to be passed to pcre2_com-
|
||||||
pile(). This is used in the PCRE2 tests to check behaviour with differ-
|
pile(). This is used in the PCRE2 tests to check behaviour with differ-
|
||||||
ent character tables. The digit specifies the tables as follows:
|
ent character tables. The digit specifies the tables as follows:
|
||||||
|
|
||||||
|
@ -583,25 +593,26 @@ PATTERN MODIFIERS
|
||||||
pcre2_chartables.c.dist
|
pcre2_chartables.c.dist
|
||||||
2 a set of tables defining ISO 8859 characters
|
2 a set of tables defining ISO 8859 characters
|
||||||
|
|
||||||
In table 2, some characters whose codes are greater than 128 are iden-
|
In table 2, some characters whose codes are greater than 128 are iden-
|
||||||
tified as letters, digits, spaces, etc. Setting alternate character
|
tified as letters, digits, spaces, etc. Setting alternate character
|
||||||
tables and a locale are mutually exclusive.
|
tables and a locale are mutually exclusive.
|
||||||
|
|
||||||
Setting certain match controls
|
Setting certain match controls
|
||||||
|
|
||||||
The following modifiers are really subject modifiers, and are described
|
The following modifiers are really subject modifiers, and are described
|
||||||
below. However, they may be included in a pattern's modifier list, in
|
below. However, they may be included in a pattern's modifier list, in
|
||||||
which case they are applied to every subject line that is processed
|
which case they are applied to every subject line that is processed
|
||||||
with that pattern. They do not affect the compilation process.
|
with that pattern. They do not affect the compilation process.
|
||||||
|
|
||||||
aftertext show text after match
|
aftertext show text after match
|
||||||
allaftertext show text after captures
|
allaftertext show text after captures
|
||||||
allcaptures show all captures
|
allcaptures show all captures
|
||||||
allusedtext show all consulted text
|
allusedtext show all consulted text
|
||||||
/g global global matching
|
/g global global matching
|
||||||
mark show mark values
|
mark show mark values
|
||||||
|
startchar show starting character when relevant
|
||||||
|
|
||||||
These modifiers may not appear in a #pattern command. If you want them
|
These modifiers may not appear in a #pattern command. If you want them
|
||||||
as defaults, set them in a #subject command.
|
as defaults, set them in a #subject command.
|
||||||
|
|
||||||
|
|
||||||
|
@ -612,7 +623,7 @@ SUBJECT MODIFIERS
|
||||||
|
|
||||||
Setting match options
|
Setting match options
|
||||||
|
|
||||||
The following modifiers set options for pcre2_match() or
|
The following modifiers set options for pcre2_match() or
|
||||||
pcre2_dfa_match(). See pcreapi for a description of their effects.
|
pcre2_dfa_match(). See pcreapi for a description of their effects.
|
||||||
|
|
||||||
anchored set PCRE2_ANCHORED
|
anchored set PCRE2_ANCHORED
|
||||||
|
@ -626,20 +637,20 @@ SUBJECT MODIFIERS
|
||||||
partial_hard (or ph) set PCRE2_PARTIAL_HARD
|
partial_hard (or ph) set PCRE2_PARTIAL_HARD
|
||||||
partial_soft (or ps) set PCRE2_PARTIAL_SOFT
|
partial_soft (or ps) set PCRE2_PARTIAL_SOFT
|
||||||
|
|
||||||
The partial matching modifiers are provided with abbreviations because
|
The partial matching modifiers are provided with abbreviations because
|
||||||
they appear frequently in tests.
|
they appear frequently in tests.
|
||||||
|
|
||||||
If the /posix modifier was present on the pattern, causing the POSIX
|
If the /posix modifier was present on the pattern, causing the POSIX
|
||||||
wrapper API to be used, the only option-setting modifiers that have any
|
wrapper API to be used, the only option-setting modifiers that have any
|
||||||
effect are notbol, notempty, and noteol, causing REG_NOTBOL,
|
effect are notbol, notempty, and noteol, causing REG_NOTBOL,
|
||||||
REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to regexec().
|
REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to regexec().
|
||||||
Any other modifiers cause an error.
|
Any other modifiers cause an error.
|
||||||
|
|
||||||
Setting match controls
|
Setting match controls
|
||||||
|
|
||||||
The following modifiers affect the matching process or request addi-
|
The following modifiers affect the matching process or request addi-
|
||||||
tional information. Some of them may also be specified on a pattern
|
tional information. Some of them may also be specified on a pattern
|
||||||
line (see above), in which case they apply to every subject line that
|
line (see above), in which case they apply to every subject line that
|
||||||
is matched against that pattern.
|
is matched against that pattern.
|
||||||
|
|
||||||
aftertext show text after match
|
aftertext show text after match
|
||||||
|
@ -664,39 +675,56 @@ SUBJECT MODIFIERS
|
||||||
offset=<n> set starting offset
|
offset=<n> set starting offset
|
||||||
ovector=<n> set size of output vector
|
ovector=<n> set size of output vector
|
||||||
recursion_limit=<n> set a recursion limit
|
recursion_limit=<n> set a recursion limit
|
||||||
|
startchar show startchar when relevant
|
||||||
|
|
||||||
The effects of these modifiers are described in the following sections.
|
The effects of these modifiers are described in the following sections.
|
||||||
FIXME: Give more examples.
|
FIXME: Give more examples.
|
||||||
|
|
||||||
Showing more text
|
Showing more text
|
||||||
|
|
||||||
The aftertext modifier requests that as well as outputting the sub-
|
The aftertext modifier requests that as well as outputting the sub-
|
||||||
string that matched the entire pattern, pcre2test should in addition
|
string that matched the entire pattern, pcre2test should in addition
|
||||||
output the remainder of the subject string. This is useful for tests
|
output the remainder of the subject string. This is useful for tests
|
||||||
where the subject contains multiple copies of the same substring. The
|
where the subject contains multiple copies of the same substring. The
|
||||||
allaftertext modifier requests the same action for captured substrings
|
allaftertext modifier requests the same action for captured substrings
|
||||||
as well as the main matched substring. In each case the remainder is
|
as well as the main matched substring. In each case the remainder is
|
||||||
output on the following line with a plus character following the cap-
|
output on the following line with a plus character following the cap-
|
||||||
ture number.
|
ture number.
|
||||||
|
|
||||||
The allusedtext modifier requests that all the text that was consulted
|
The allusedtext modifier requests that all the text that was consulted
|
||||||
during a successful pattern match by the interpreter should be shown.
|
during a successful pattern match by the interpreter should be shown.
|
||||||
This feature is not supported for JIT matching, and if requested with
|
This feature is not supported for JIT matching, and if requested with
|
||||||
JIT it is ignored (with a warning message). Setting this modifier
|
JIT it is ignored (with a warning message). Setting this modifier
|
||||||
affects the output if there is a lookbehind at the start of a match, or
|
affects the output if there is a lookbehind at the start of a match, or
|
||||||
a lookahead at the end, or if \K is used in the pattern. Characters
|
a lookahead at the end, or if \K is used in the pattern. Characters
|
||||||
that precede or follow the start and end of the actual match are indi-
|
that precede or follow the start and end of the actual match are indi-
|
||||||
cated in the output by '<' or '>' characters underneath them. Here is
|
cated in the output by '<' or '>' characters underneath them. Here is
|
||||||
an example:
|
an example:
|
||||||
|
|
||||||
/(?<=pqr)abc(?=xyz)/
|
re> /(?<=pqr)abc(?=xyz)/
|
||||||
123pqrabcxyz456\=allusedtext
|
data> 123pqrabcxyz456\=allusedtext
|
||||||
0: pqrabcxyz
|
0: pqrabcxyz
|
||||||
<<< >>>
|
<<< >>>
|
||||||
|
|
||||||
This shows that the matched string is "abc", with the preceding and
|
This shows that the matched string is "abc", with the preceding and
|
||||||
following strings "pqr" and "xyz" also consulted during the match.
|
following strings "pqr" and "xyz" also consulted during the match.
|
||||||
|
|
||||||
|
The startchar modifier requests that the starting character for the
|
||||||
|
match be indicated, if it is different to the start of the matched
|
||||||
|
string. The only time when this occurs is when \K has been processed as
|
||||||
|
part of the match. In this situation, the output for the matched string
|
||||||
|
is displayed from the starting character instead of from the match
|
||||||
|
point, with circumflex characters under the earlier characters. For
|
||||||
|
example:
|
||||||
|
|
||||||
|
re> /abc\Kxyz/
|
||||||
|
data> abcxyz\=startchar
|
||||||
|
0: abcxyz
|
||||||
|
^^^
|
||||||
|
|
||||||
|
Unlike allusedtext, the startchar modifier can be used with JIT. How-
|
||||||
|
ever, these two modifiers are mutually exclusive.
|
||||||
|
|
||||||
Showing the value of all capture groups
|
Showing the value of all capture groups
|
||||||
|
|
||||||
The allcaptures modifier requests that the values of all potential cap-
|
The allcaptures modifier requests that the values of all potential cap-
|
||||||
|
@ -768,66 +796,70 @@ SUBJECT MODIFIERS
|
||||||
|
|
||||||
The jitstack modifier provides a way of setting the maximum stack size
|
The jitstack modifier provides a way of setting the maximum stack size
|
||||||
that is used by the just-in-time optimization code. It is ignored if
|
that is used by the just-in-time optimization code. It is ignored if
|
||||||
JIT optimization is not being used. Providing a stack that is larger
|
JIT optimization is not being used. The value is a number of kilobytes.
|
||||||
than the default 32K is necessary only for very complicated patterns.
|
Providing a stack that is larger than the default 32K is necessary only
|
||||||
|
for very complicated patterns.
|
||||||
|
|
||||||
Setting match and recursion limits
|
Setting match and recursion limits
|
||||||
|
|
||||||
The match_limit and recursion_limit modifiers set the appropriate lim-
|
The match_limit and recursion_limit modifiers set the appropriate lim-
|
||||||
its in the match context. These values are ignored when the find_limits
|
its in the match context. These values are ignored when the find_limits
|
||||||
modifier is specified.
|
modifier is specified.
|
||||||
|
|
||||||
Finding minimum limits
|
Finding minimum limits
|
||||||
|
|
||||||
If the find_limits modifier is present, pcre2test calls pcre2_match()
|
If the find_limits modifier is present, pcre2test calls pcre2_match()
|
||||||
several times, setting different values in the match context via
|
several times, setting different values in the match context via
|
||||||
pcre2_set_match_limit() and pcre2_set_recursion_limit() until it finds
|
pcre2_set_match_limit() and pcre2_set_recursion_limit() until it finds
|
||||||
the minimum values for each parameter that allow pcre2_match() to com-
|
the minimum values for each parameter that allow pcre2_match() to com-
|
||||||
plete without error.
|
plete without error.
|
||||||
|
|
||||||
If JIT is being used, only the match limit is relevant. If DFA matching
|
If JIT is being used, only the match limit is relevant. If DFA matching
|
||||||
is being used, neither limit is relevant, and this modifier is ignored
|
is being used, neither limit is relevant, and this modifier is ignored
|
||||||
(with a warning message).
|
(with a warning message).
|
||||||
|
|
||||||
The match_limit number is a measure of the amount of backtracking that
|
The match_limit number is a measure of the amount of backtracking that
|
||||||
takes place, and learning the minimum value can be instructive. For
|
takes place, and learning the minimum value can be instructive. For
|
||||||
most simple matches, the number is quite small, but for patterns with
|
most simple matches, the number is quite small, but for patterns with
|
||||||
very large numbers of matching possibilities, it can become large very
|
very large numbers of matching possibilities, it can become large very
|
||||||
quickly with increasing length of subject string. The
|
quickly with increasing length of subject string. The
|
||||||
match_limit_recursion number is a measure of how much stack (or, if
|
match_limit_recursion number is a measure of how much stack (or, if
|
||||||
PCRE2 is compiled with NO_RECURSE, how much heap) memory is needed to
|
PCRE2 is compiled with NO_RECURSE, how much heap) memory is needed to
|
||||||
complete the match attempt.
|
complete the match attempt.
|
||||||
|
|
||||||
Showing MARK names
|
Showing MARK names
|
||||||
|
|
||||||
|
|
||||||
The mark modifier causes the names from backtracking control verbs that
|
The mark modifier causes the names from backtracking control verbs that
|
||||||
are returned from calls to pcre2_match() to be displayed. If a mark is
|
are returned from calls to pcre2_match() to be displayed. If a mark is
|
||||||
returned for a match, non-match, or partial match, pcre2test shows it.
|
returned for a match, non-match, or partial match, pcre2test shows it.
|
||||||
For a match, it is on a line by itself, tagged with "MK:". Otherwise,
|
For a match, it is on a line by itself, tagged with "MK:". Otherwise,
|
||||||
it is added to the non-match message.
|
it is added to the non-match message.
|
||||||
|
|
||||||
Showing memory usage
|
Showing memory usage
|
||||||
|
|
||||||
The memory modifier causes pcre2test to log all memory allocation and
|
The memory modifier causes pcre2test to log all memory allocation and
|
||||||
freeing calls that occur during a match operation.
|
freeing calls that occur during a match operation.
|
||||||
|
|
||||||
Setting a starting offset
|
Setting a starting offset
|
||||||
|
|
||||||
The offset modifier sets an offset in the subject string at which
|
The offset modifier sets an offset in the subject string at which
|
||||||
matching starts. Its value is a number of code units, not characters.
|
matching starts. Its value is a number of code units, not characters.
|
||||||
|
|
||||||
Setting the size of the output vector
|
Setting the size of the output vector
|
||||||
|
|
||||||
The ovector modifier applies only to the subject line in which it
|
The ovector modifier applies only to the subject line in which it
|
||||||
appears, though of course it can also be used to set a default in a
|
appears, though of course it can also be used to set a default in a
|
||||||
#subject command. It specifies the number of pairs of offsets that are
|
#subject command. It specifies the number of pairs of offsets that are
|
||||||
available for storing matching information. The default is 15.
|
available for storing matching information. The default is 15.
|
||||||
|
|
||||||
At least one pair of offsets is always created by pcre2_match_data_cre-
|
A value of zero is useful when testing the POSIX API because it causes
|
||||||
ate(), for matching with PCRE2's native API, so a value of 0 is the
|
regexec() to be called with a NULL capture vector. When not testing the
|
||||||
same as 1. However a value of 0 is useful when testing the POSIX API
|
POSIX API, a value of zero is used to cause pcre2_match_data_cre-
|
||||||
because it causes regexec() to be called with a NULL capture vector.
|
ate_from_pattern to be called, in order to create a match block of
|
||||||
|
exactly the right size for the pattern. (It is not possible to create a
|
||||||
|
match block with a zero-length ovector; there is always one pair of
|
||||||
|
offsets.)
|
||||||
|
|
||||||
|
|
||||||
THE ALTERNATIVE MATCHING FUNCTION
|
THE ALTERNATIVE MATCHING FUNCTION
|
||||||
|
@ -1058,8 +1090,8 @@ NON-PRINTING CHARACTERS
|
||||||
|
|
||||||
SEE ALSO
|
SEE ALSO
|
||||||
|
|
||||||
pcre2(3), pcre16(3), pcre32(3), pcre2api(3), pcre2callout(3), pcre2jit,
|
pcre2(3), pcre2api(3), pcre2callout(3), pcre2jit, pcre2matching(3),
|
||||||
pcre2matching(3), pcre2partial(d), pcre2pattern(3), pcre2precompile(3).
|
pcre2partial(d), pcre2pattern(3).
|
||||||
|
|
||||||
|
|
||||||
AUTHOR
|
AUTHOR
|
||||||
|
@ -1071,5 +1103,5 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 11 October 2014
|
Last updated: 02 November 2014
|
||||||
Copyright (c) 1997-2014 University of Cambridge.
|
Copyright (c) 1997-2014 University of Cambridge.
|
||||||
|
|
24
perltest.sh
24
perltest.sh
|
@ -8,7 +8,7 @@
|
||||||
#
|
#
|
||||||
# The desired effect is achieved by making this a shell script that passes the
|
# The desired effect is achieved by making this a shell script that passes the
|
||||||
# Perl script to Perl through a pipe. If the first argument is "-utf8", a
|
# Perl script to Perl through a pipe. If the first argument is "-utf8", a
|
||||||
# suitable prefix is set up.
|
# suitable prefix is set up.
|
||||||
#
|
#
|
||||||
# The remaining arguments, if any, are passed to Perl. They are an input file
|
# The remaining arguments, if any, are passed to Perl. They are an input file
|
||||||
# and an output file. If there is one argument, the output is written to
|
# and an output file. If there is one argument, the output is written to
|
||||||
|
@ -20,7 +20,7 @@ perl=perl
|
||||||
prefix=''
|
prefix=''
|
||||||
if [ $# > 0 -a "$1" = "-utf8" ] ; then
|
if [ $# > 0 -a "$1" = "-utf8" ] ; then
|
||||||
prefix="use utf8; require Encode;"
|
prefix="use utf8; require Encode;"
|
||||||
shift
|
shift
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
@ -28,23 +28,23 @@ fi
|
||||||
# can be given identical input, except that input patterns can be followed only
|
# can be given identical input, except that input patterns can be followed only
|
||||||
# by Perl's lower case modifiers and certain other pcre2test modifiers that are
|
# by Perl's lower case modifiers and certain other pcre2test modifiers that are
|
||||||
# either handled or ignored:
|
# either handled or ignored:
|
||||||
#
|
#
|
||||||
# aftertext interpreted as "print $' afterwards"
|
# aftertext interpreted as "print $' afterwards"
|
||||||
# afteralltext ignored
|
# afteralltext ignored
|
||||||
# dupnames ignored (Perl always allows)
|
# dupnames ignored (Perl always allows)
|
||||||
# mark ignored
|
# mark ignored
|
||||||
# no_auto_possess ignored
|
# no_auto_possess ignored
|
||||||
# no_start_optimize ignored
|
# no_start_optimize ignored
|
||||||
# ucp sets Perl's /u modifier
|
# ucp sets Perl's /u modifier
|
||||||
# utf invoke UTF-8 functionality
|
# utf invoke UTF-8 functionality
|
||||||
#
|
#
|
||||||
# The data lines must not have any pcre2test modifiers. They are processed as
|
# The data lines must not have any pcre2test modifiers. They are processed as
|
||||||
# Perl double-quoted strings, so if they contain " $ or @ characters, these
|
# Perl double-quoted strings, so if they contain " $ or @ characters, these
|
||||||
# have to be escaped. For this reason, all such characters in the
|
# have to be escaped. For this reason, all such characters in the
|
||||||
# Perl-compatible testinput1 and testinput4 files are escaped so that they can
|
# Perl-compatible testinput1 and testinput4 files are escaped so that they can
|
||||||
# be used for perltest as well as for pcre2test. The output from this script
|
# be used for perltest as well as for pcre2test. The output from this script
|
||||||
# should be same as from pcre2test, apart from the initial identifying banner.
|
# should be same as from pcre2test, apart from the initial identifying banner.
|
||||||
#
|
#
|
||||||
# The other testinput files are not suitable for feeding to perltest.sh,
|
# The other testinput files are not suitable for feeding to perltest.sh,
|
||||||
# because they make use of the special modifiers that pcre2test uses for
|
# because they make use of the special modifiers that pcre2test uses for
|
||||||
# testing features of PCRE2. Some of these files also contain malformed regular
|
# testing features of PCRE2. Some of these files also contain malformed regular
|
||||||
|
@ -90,11 +90,11 @@ if (@ARGV > 0)
|
||||||
$infile = "INFILE";
|
$infile = "INFILE";
|
||||||
$interact = 0;
|
$interact = 0;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
open(INFILE, "</dev/tty") || die "Failed to open /dev/tty\n";
|
open(INFILE, "</dev/tty") || die "Failed to open /dev/tty\n";
|
||||||
$infile = "INFILE";
|
$infile = "INFILE";
|
||||||
$interact = 1;
|
$interact = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (@ARGV > 1)
|
if (@ARGV > 1)
|
||||||
|
@ -291,5 +291,5 @@ for (;;)
|
||||||
|
|
||||||
PERLEND
|
PERLEND
|
||||||
) | $perl - $@
|
) | $perl - $@
|
||||||
|
|
||||||
# End
|
# End
|
||||||
|
|
|
@ -86,8 +86,7 @@ passed. Put these bits at the most significant end of the options word so
|
||||||
others can be added next to them */
|
others can be added next to them */
|
||||||
|
|
||||||
#define PCRE2_ANCHORED 0x80000000u
|
#define PCRE2_ANCHORED 0x80000000u
|
||||||
#define PCRE2_NO_START_OPTIMIZE 0x40000000u
|
#define PCRE2_NO_UTF_CHECK 0x40000000u
|
||||||
#define PCRE2_NO_UTF_CHECK 0x20000000u
|
|
||||||
|
|
||||||
/* Other options that can be passed to pcre2_compile(). They may affect
|
/* Other options that can be passed to pcre2_compile(). They may affect
|
||||||
compilation, JIT compilation, and/or interpretive execution. The following tags
|
compilation, JIT compilation, and/or interpretive execution. The following tags
|
||||||
|
@ -95,7 +94,7 @@ indicate which:
|
||||||
|
|
||||||
C alters what is compiled
|
C alters what is compiled
|
||||||
J alters what JIT compiles
|
J alters what JIT compiles
|
||||||
E is inspected during pcre2_match() execution
|
M is inspected during pcre2_match() execution
|
||||||
D is inspected during pcre2_dfa_match() execution
|
D is inspected during pcre2_dfa_match() execution
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
@ -103,24 +102,25 @@ D is inspected during pcre2_dfa_match() execution
|
||||||
#define PCRE2_ALT_BSUX 0x00000002u /* C */
|
#define PCRE2_ALT_BSUX 0x00000002u /* C */
|
||||||
#define PCRE2_AUTO_CALLOUT 0x00000004u /* C */
|
#define PCRE2_AUTO_CALLOUT 0x00000004u /* C */
|
||||||
#define PCRE2_CASELESS 0x00000008u /* C */
|
#define PCRE2_CASELESS 0x00000008u /* C */
|
||||||
#define PCRE2_DOLLAR_ENDONLY 0x00000010u /* J E D */
|
#define PCRE2_DOLLAR_ENDONLY 0x00000010u /* J M D */
|
||||||
#define PCRE2_DOTALL 0x00000020u /* C */
|
#define PCRE2_DOTALL 0x00000020u /* C */
|
||||||
#define PCRE2_DUPNAMES 0x00000040u /* C */
|
#define PCRE2_DUPNAMES 0x00000040u /* C */
|
||||||
#define PCRE2_EXTENDED 0x00000080u /* C */
|
#define PCRE2_EXTENDED 0x00000080u /* C */
|
||||||
#define PCRE2_FIRSTLINE 0x00000100u /* J E D */
|
#define PCRE2_FIRSTLINE 0x00000100u /* J M D */
|
||||||
#define PCRE2_MATCH_UNSET_BACKREF 0x00000200u /* C J E */
|
#define PCRE2_MATCH_UNSET_BACKREF 0x00000200u /* C J M */
|
||||||
#define PCRE2_MULTILINE 0x00000400u /* C */
|
#define PCRE2_MULTILINE 0x00000400u /* C */
|
||||||
#define PCRE2_NEVER_UCP 0x00000800u /* C */
|
#define PCRE2_NEVER_UCP 0x00000800u /* C */
|
||||||
#define PCRE2_NEVER_UTF 0x00001000u /* C */
|
#define PCRE2_NEVER_UTF 0x00001000u /* C */
|
||||||
#define PCRE2_NO_AUTO_CAPTURE 0x00002000u /* C */
|
#define PCRE2_NO_AUTO_CAPTURE 0x00002000u /* C */
|
||||||
#define PCRE2_NO_AUTO_POSSESS 0x00004000u /* C */
|
#define PCRE2_NO_AUTO_POSSESS 0x00004000u /* C */
|
||||||
#define PCRE2_UCP 0x00008000u /* C J E D */
|
#define PCRE2_NO_START_OPTIMIZE 0x00008000u /* J M D */
|
||||||
#define PCRE2_UNGREEDY 0x00010000u /* C */
|
#define PCRE2_UCP 0x00010000u /* C J M D */
|
||||||
#define PCRE2_UTF 0x00020000u /* C J E D */
|
#define PCRE2_UNGREEDY 0x00020000u /* C */
|
||||||
|
#define PCRE2_UTF 0x00040000u /* C J M D */
|
||||||
|
|
||||||
/* These are for pcre2_jit_compile(). */
|
/* These are for pcre2_jit_compile(). */
|
||||||
|
|
||||||
#define PCRE2_JIT 0x00000001u /* For full matching */
|
#define PCRE2_JIT_COMPLETE 0x00000001u /* For full matching */
|
||||||
#define PCRE2_JIT_PARTIAL_SOFT 0x00000002u
|
#define PCRE2_JIT_PARTIAL_SOFT 0x00000002u
|
||||||
#define PCRE2_JIT_PARTIAL_HARD 0x00000004u
|
#define PCRE2_JIT_PARTIAL_HARD 0x00000004u
|
||||||
|
|
||||||
|
@ -130,8 +130,8 @@ functions, so take care not to define synonyms by mistake. */
|
||||||
|
|
||||||
#define PCRE2_NOTBOL 0x00000001u
|
#define PCRE2_NOTBOL 0x00000001u
|
||||||
#define PCRE2_NOTEOL 0x00000002u
|
#define PCRE2_NOTEOL 0x00000002u
|
||||||
#define PCRE2_NOTEMPTY 0x00000004u
|
#define PCRE2_NOTEMPTY 0x00000004u /* ) These two must be kept */
|
||||||
#define PCRE2_NOTEMPTY_ATSTART 0x00000008u
|
#define PCRE2_NOTEMPTY_ATSTART 0x00000008u /* ) adjacent to each other. */
|
||||||
#define PCRE2_PARTIAL_SOFT 0x00000010u
|
#define PCRE2_PARTIAL_SOFT 0x00000010u
|
||||||
#define PCRE2_PARTIAL_HARD 0x00000020u
|
#define PCRE2_PARTIAL_HARD 0x00000020u
|
||||||
|
|
||||||
|
@ -140,9 +140,9 @@ functions, so take care not to define synonyms by mistake. */
|
||||||
#define PCRE2_DFA_RESTART 0x00000040u
|
#define PCRE2_DFA_RESTART 0x00000040u
|
||||||
#define PCRE2_DFA_SHORTEST 0x00000080u
|
#define PCRE2_DFA_SHORTEST 0x00000080u
|
||||||
|
|
||||||
/* Newline and \R settings, for use in the compile and match contexts. The
|
/* Newline and \R settings, for use in compile contexts. The newline values
|
||||||
newline values must be kept in step with values set in config.h and both sets
|
must be kept in step with values set in config.h and both sets must all be
|
||||||
must all be greater than zero. */
|
greater than zero. */
|
||||||
|
|
||||||
#define PCRE2_NEWLINE_CR 1
|
#define PCRE2_NEWLINE_CR 1
|
||||||
#define PCRE2_NEWLINE_LF 2
|
#define PCRE2_NEWLINE_LF 2
|
||||||
|
@ -193,32 +193,33 @@ must all be greater than zero. */
|
||||||
#define PCRE2_ERROR_UTF32_ERR1 (-27)
|
#define PCRE2_ERROR_UTF32_ERR1 (-27)
|
||||||
#define PCRE2_ERROR_UTF32_ERR2 (-28)
|
#define PCRE2_ERROR_UTF32_ERR2 (-28)
|
||||||
|
|
||||||
/* Error codes for pcre2[_dfa]_match() */
|
/* Error codes for pcre2[_dfa]_match(), substring extraction functions, and
|
||||||
|
context functions. */
|
||||||
|
|
||||||
#define PCRE2_ERROR_BADCOUNT (-29)
|
#define PCRE2_ERROR_BADDATA (-29)
|
||||||
#define PCRE2_ERROR_BADENDIANNESS (-30)
|
#define PCRE2_ERROR_BADLENGTH (-30)
|
||||||
#define PCRE2_ERROR_BADLENGTH (-31)
|
#define PCRE2_ERROR_BADMAGIC (-31)
|
||||||
#define PCRE2_ERROR_BADMAGIC (-32)
|
#define PCRE2_ERROR_BADMODE (-32)
|
||||||
#define PCRE2_ERROR_BADMODE (-33)
|
#define PCRE2_ERROR_BADOFFSET (-33)
|
||||||
#define PCRE2_ERROR_BADOFFSET (-34)
|
#define PCRE2_ERROR_BADOPTION (-34)
|
||||||
#define PCRE2_ERROR_BADOPTION (-35)
|
#define PCRE2_ERROR_BADUTFOFFSET (-35)
|
||||||
#define PCRE2_ERROR_BADUTFOFFSET (-36)
|
#define PCRE2_ERROR_CALLOUT (-36) /* Never used by PCRE2 itself */
|
||||||
#define PCRE2_ERROR_CALLOUT (-37) /* Never used by PCRE2 itself */
|
#define PCRE2_ERROR_DFA_BADRESTART (-37)
|
||||||
#define PCRE2_ERROR_DFA_BADRESTART (-38)
|
#define PCRE2_ERROR_DFA_RECURSE (-38)
|
||||||
#define PCRE2_ERROR_DFA_RECURSE (-39)
|
#define PCRE2_ERROR_DFA_UCOND (-39)
|
||||||
#define PCRE2_ERROR_DFA_UCOND (-40)
|
#define PCRE2_ERROR_DFA_UITEM (-40)
|
||||||
#define PCRE2_ERROR_DFA_UITEM (-41)
|
#define PCRE2_ERROR_DFA_WSSIZE (-41)
|
||||||
#define PCRE2_ERROR_DFA_UMLIMIT (-42)
|
#define PCRE2_ERROR_INTERNAL (-42)
|
||||||
#define PCRE2_ERROR_DFA_WSSIZE (-43)
|
#define PCRE2_ERROR_JIT_BADOPTION (-43)
|
||||||
#define PCRE2_ERROR_INTERNAL (-44)
|
#define PCRE2_ERROR_JIT_STACKLIMIT (-44)
|
||||||
#define PCRE2_ERROR_JIT_BADOPTION (-45)
|
#define PCRE2_ERROR_MATCHLIMIT (-45)
|
||||||
#define PCRE2_ERROR_JIT_STACKLIMIT (-46)
|
#define PCRE2_ERROR_NOMEMORY (-46)
|
||||||
#define PCRE2_ERROR_MATCHLIMIT (-47)
|
#define PCRE2_ERROR_NOSUBSTRING (-47)
|
||||||
#define PCRE2_ERROR_NOMEMORY (-48)
|
#define PCRE2_ERROR_NOUNIQUESUBSTRING (-48)
|
||||||
#define PCRE2_ERROR_NOSUBSTRING (-49)
|
#define PCRE2_ERROR_NULL (-49)
|
||||||
#define PCRE2_ERROR_NULL (-50)
|
#define PCRE2_ERROR_RECURSELOOP (-50)
|
||||||
#define PCRE2_ERROR_RECURSELOOP (-51)
|
#define PCRE2_ERROR_RECURSIONLIMIT (-51)
|
||||||
#define PCRE2_ERROR_RECURSIONLIMIT (-52)
|
#define PCRE2_ERROR_UNSET (-52)
|
||||||
|
|
||||||
/* Request types for pcre2_pattern_info() */
|
/* Request types for pcre2_pattern_info() */
|
||||||
|
|
||||||
|
@ -257,8 +258,8 @@ must all be greater than zero. */
|
||||||
#define PCRE2_CONFIG_PARENSLIMIT 7
|
#define PCRE2_CONFIG_PARENSLIMIT 7
|
||||||
#define PCRE2_CONFIG_RECURSIONLIMIT 5
|
#define PCRE2_CONFIG_RECURSIONLIMIT 5
|
||||||
#define PCRE2_CONFIG_STACKRECURSE 8
|
#define PCRE2_CONFIG_STACKRECURSE 8
|
||||||
#define PCRE2_CONFIG_UNICODE_VERSION 9
|
#define PCRE2_CONFIG_UNICODE 9
|
||||||
#define PCRE2_CONFIG_UTF 10
|
#define PCRE2_CONFIG_UNICODE_VERSION 10
|
||||||
#define PCRE2_CONFIG_VERSION 11
|
#define PCRE2_CONFIG_VERSION 11
|
||||||
|
|
||||||
/* Types for code units in patterns and subject strings. */
|
/* Types for code units in patterns and subject strings. */
|
||||||
|
@ -271,12 +272,14 @@ typedef const PCRE2_UCHAR8 *PCRE2_SPTR8;
|
||||||
typedef const PCRE2_UCHAR16 *PCRE2_SPTR16;
|
typedef const PCRE2_UCHAR16 *PCRE2_SPTR16;
|
||||||
typedef const PCRE2_UCHAR32 *PCRE2_SPTR32;
|
typedef const PCRE2_UCHAR32 *PCRE2_SPTR32;
|
||||||
|
|
||||||
/* Offsets in the pattern (for errors) and in the subject (after a match) are
|
/* The PCRE2_SIZE type is used for all string lengths and offsets in PCRE2,
|
||||||
unsigned 32-bit numbers. We also define a value to indicate "unset" in the
|
including pattern offsets for errors and subject offsets after a match. We
|
||||||
offset vector (ovector). */
|
define special values to indicate zero-terminated strings and unset offsets in
|
||||||
|
the offset vector (ovector). */
|
||||||
|
|
||||||
#define PCRE2_OFFSET PCRE2_UCHAR32
|
#define PCRE2_SIZE size_t
|
||||||
#define PCRE2_UNSET (~(PCRE2_OFFSET)0)
|
#define PCRE2_ZERO_TERMINATED (~(PCRE2_SIZE)0)
|
||||||
|
#define PCRE2_UNSET (~(PCRE2_SIZE)0)
|
||||||
|
|
||||||
/* Generic types for opaque structures and JIT callback functions. These
|
/* Generic types for opaque structures and JIT callback functions. These
|
||||||
declarations are defined in a macro that is expanded for each width later. */
|
declarations are defined in a macro that is expanded for each width later. */
|
||||||
|
@ -311,22 +314,20 @@ versions are generated from this macro below. */
|
||||||
|
|
||||||
#define PCRE2_STRUCTURE_LIST \
|
#define PCRE2_STRUCTURE_LIST \
|
||||||
typedef struct pcre2_callout_block { \
|
typedef struct pcre2_callout_block { \
|
||||||
int version; /* Identifies version of block */ \
|
uint32_t version; /* Identifies version of block */ \
|
||||||
/* ------------------------ Version 0 ------------------------------- */ \
|
/* ------------------------ Version 0 ------------------------------- */ \
|
||||||
uint32_t callout_number; /* Number compiled into pattern */ \
|
uint32_t callout_number; /* Number compiled into pattern */ \
|
||||||
PCRE2_OFFSET *offset_vector; /* The offset vector */ \
|
|
||||||
PCRE2_SPTR subject; /* The subject being matched */ \
|
|
||||||
size_t subject_length; /* The length of the subject */ \
|
|
||||||
PCRE2_OFFSET start_match; /* Offset to start of this match attempt */ \
|
|
||||||
PCRE2_OFFSET current_position; /* Where we currently are in the subject */ \
|
|
||||||
uint32_t capture_top; /* Max current capture */ \
|
uint32_t capture_top; /* Max current capture */ \
|
||||||
uint32_t capture_last; /* Most recently closed capture */ \
|
uint32_t capture_last; /* Most recently closed capture */ \
|
||||||
void *callout_data; /* Data passed in with the call */ \
|
void *callout_data; /* Data passed in with the call */ \
|
||||||
/* ------------------- Added for Version 1 -------------------------- */ \
|
PCRE2_SIZE *offset_vector; /* The offset vector */ \
|
||||||
PCRE2_OFFSET pattern_position; /* Offset to next item in the pattern */ \
|
PCRE2_SPTR mark; /* Pointer to current mark or NULL */ \
|
||||||
PCRE2_OFFSET next_item_length; /* Length of next item in the pattern */ \
|
PCRE2_SPTR subject; /* The subject being matched */ \
|
||||||
/* ------------------- Added for Version 2 -------------------------- */ \
|
PCRE2_SIZE subject_length; /* The length of the subject */ \
|
||||||
PCRE2_SPTR mark; /* Pointer to current mark or NULL */ \
|
PCRE2_SIZE start_match; /* Offset to start of this match attempt */ \
|
||||||
|
PCRE2_SIZE current_position; /* Where we currently are in the subject */ \
|
||||||
|
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
|
||||||
|
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
|
||||||
/* ------------------------------------------------------------------ */ \
|
/* ------------------------------------------------------------------ */ \
|
||||||
} pcre2_callout_block;
|
} pcre2_callout_block;
|
||||||
|
|
||||||
|
@ -336,7 +337,7 @@ expanded for each width below. Start with functions that give general
|
||||||
information. */
|
information. */
|
||||||
|
|
||||||
#define PCRE2_GENERAL_INFO_FUNCTIONS \
|
#define PCRE2_GENERAL_INFO_FUNCTIONS \
|
||||||
PCRE2_EXP_DECL int pcre2_config(int, void *, size_t);
|
PCRE2_EXP_DECL int pcre2_config(uint32_t, void *);
|
||||||
|
|
||||||
|
|
||||||
/* Functions for manipulating contexts. */
|
/* Functions for manipulating contexts. */
|
||||||
|
@ -346,7 +347,7 @@ PCRE2_EXP_DECL \
|
||||||
pcre2_general_context *pcre2_general_context_copy(pcre2_general_context *); \
|
pcre2_general_context *pcre2_general_context_copy(pcre2_general_context *); \
|
||||||
PCRE2_EXP_DECL \
|
PCRE2_EXP_DECL \
|
||||||
pcre2_general_context *pcre2_general_context_create( \
|
pcre2_general_context *pcre2_general_context_create( \
|
||||||
void *(*)(size_t, void *), \
|
void *(*)(PCRE2_SIZE, void *), \
|
||||||
void (*)(void *, void *), void *); \
|
void (*)(void *, void *), void *); \
|
||||||
PCRE2_EXP_DECL void pcre2_general_context_free(pcre2_general_context *);
|
PCRE2_EXP_DECL void pcre2_general_context_free(pcre2_general_context *);
|
||||||
|
|
||||||
|
@ -356,12 +357,10 @@ PCRE2_EXP_DECL \
|
||||||
PCRE2_EXP_DECL \
|
PCRE2_EXP_DECL \
|
||||||
pcre2_compile_context *pcre2_compile_context_create(pcre2_general_context *);\
|
pcre2_compile_context *pcre2_compile_context_create(pcre2_general_context *);\
|
||||||
PCRE2_EXP_DECL void pcre2_compile_context_free(pcre2_compile_context *); \
|
PCRE2_EXP_DECL void pcre2_compile_context_free(pcre2_compile_context *); \
|
||||||
PCRE2_EXP_DECL int pcre2_set_bsr_compile(pcre2_compile_context *, \
|
PCRE2_EXP_DECL int pcre2_set_bsr(pcre2_compile_context *, uint32_t); \
|
||||||
uint32_t); \
|
|
||||||
PCRE2_EXP_DECL int pcre2_set_character_tables(pcre2_compile_context *, \
|
PCRE2_EXP_DECL int pcre2_set_character_tables(pcre2_compile_context *, \
|
||||||
const unsigned char *); \
|
const unsigned char *); \
|
||||||
PCRE2_EXP_DECL int pcre2_set_newline_compile(pcre2_compile_context *, \
|
PCRE2_EXP_DECL int pcre2_set_newline(pcre2_compile_context *, uint32_t); \
|
||||||
uint32_t); \
|
|
||||||
PCRE2_EXP_DECL int pcre2_set_parens_nest_limit(pcre2_compile_context *, \
|
PCRE2_EXP_DECL int pcre2_set_parens_nest_limit(pcre2_compile_context *, \
|
||||||
uint32_t); \
|
uint32_t); \
|
||||||
PCRE2_EXP_DECL int pcre2_set_compile_recursion_guard(\
|
PCRE2_EXP_DECL int pcre2_set_compile_recursion_guard(\
|
||||||
|
@ -373,18 +372,14 @@ PCRE2_EXP_DECL \
|
||||||
PCRE2_EXP_DECL \
|
PCRE2_EXP_DECL \
|
||||||
pcre2_match_context *pcre2_match_context_create(pcre2_general_context *); \
|
pcre2_match_context *pcre2_match_context_create(pcre2_general_context *); \
|
||||||
PCRE2_EXP_DECL void pcre2_match_context_free(pcre2_match_context *); \
|
PCRE2_EXP_DECL void pcre2_match_context_free(pcre2_match_context *); \
|
||||||
PCRE2_EXP_DECL int pcre2_set_bsr_match(pcre2_match_context *, \
|
|
||||||
uint32_t); \
|
|
||||||
PCRE2_EXP_DECL int pcre2_set_callout(pcre2_match_context *, \
|
PCRE2_EXP_DECL int pcre2_set_callout(pcre2_match_context *, \
|
||||||
int (*)(pcre2_callout_block *), void *); \
|
int (*)(pcre2_callout_block *), void *); \
|
||||||
PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_match_context *, \
|
PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_match_context *, \
|
||||||
uint32_t); \
|
uint32_t); \
|
||||||
PCRE2_EXP_DECL int pcre2_set_newline_match(pcre2_match_context *, \
|
|
||||||
uint32_t); \
|
|
||||||
PCRE2_EXP_DECL int pcre2_set_recursion_limit(pcre2_match_context *, \
|
PCRE2_EXP_DECL int pcre2_set_recursion_limit(pcre2_match_context *, \
|
||||||
uint32_t); \
|
uint32_t); \
|
||||||
PCRE2_EXP_DECL int pcre2_set_recursion_memory_management( \
|
PCRE2_EXP_DECL int pcre2_set_recursion_memory_management( \
|
||||||
pcre2_match_context *, void *(*)(size_t, void *), \
|
pcre2_match_context *, void *(*)(PCRE2_SIZE, void *), \
|
||||||
void (*)(void *, void *), void *);
|
void (*)(void *, void *), void *);
|
||||||
|
|
||||||
|
|
||||||
|
@ -392,8 +387,8 @@ PCRE2_EXP_DECL int pcre2_set_recursion_memory_management( \
|
||||||
|
|
||||||
#define PCRE2_COMPILE_FUNCTIONS \
|
#define PCRE2_COMPILE_FUNCTIONS \
|
||||||
PCRE2_EXP_DECL \
|
PCRE2_EXP_DECL \
|
||||||
pcre2_code *pcre2_compile(PCRE2_SPTR, int, uint32_t, \
|
pcre2_code *pcre2_compile(PCRE2_SPTR, PCRE2_SIZE, uint32_t, \
|
||||||
int *, PCRE2_OFFSET *, pcre2_compile_context *); \
|
int *, PCRE2_SIZE *, pcre2_compile_context *); \
|
||||||
PCRE2_EXP_DECL void pcre2_code_free(pcre2_code *);
|
PCRE2_EXP_DECL void pcre2_code_free(pcre2_code *);
|
||||||
|
|
||||||
|
|
||||||
|
@ -408,65 +403,62 @@ PCRE2_EXP_DECL int pcre2_pattern_info(const pcre2_code *, uint32_t, \
|
||||||
|
|
||||||
#define PCRE2_MATCH_FUNCTIONS \
|
#define PCRE2_MATCH_FUNCTIONS \
|
||||||
PCRE2_EXP_DECL \
|
PCRE2_EXP_DECL \
|
||||||
pcre2_match_data *pcre2_match_data_create(uint32_t, \
|
pcre2_match_data *pcre2_match_data_create(uint32_t, \
|
||||||
pcre2_general_context *); \
|
pcre2_general_context *); \
|
||||||
PCRE2_EXP_DECL \
|
PCRE2_EXP_DECL \
|
||||||
pcre2_match_data *pcre2_match_data_create_from_pattern(pcre2_code *, \
|
pcre2_match_data *pcre2_match_data_create_from_pattern(pcre2_code *, \
|
||||||
pcre2_general_context *); \
|
pcre2_general_context *); \
|
||||||
PCRE2_EXP_DECL int pcre2_dfa_match(const pcre2_code *, \
|
PCRE2_EXP_DECL int pcre2_dfa_match(const pcre2_code *, PCRE2_SPTR, \
|
||||||
PCRE2_SPTR, int, PCRE2_OFFSET, uint32_t, \
|
PCRE2_SIZE, PCRE2_SIZE, uint32_t, \
|
||||||
pcre2_match_data *, pcre2_match_context *, int *, \
|
pcre2_match_data *, pcre2_match_context *, int *, \
|
||||||
size_t); \
|
PCRE2_SIZE); \
|
||||||
PCRE2_EXP_DECL int pcre2_match(const pcre2_code *, \
|
PCRE2_EXP_DECL int pcre2_match(const pcre2_code *, \
|
||||||
PCRE2_SPTR, int, PCRE2_OFFSET, uint32_t, \
|
PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, \
|
||||||
pcre2_match_data *, pcre2_match_context *); \
|
pcre2_match_data *, pcre2_match_context *); \
|
||||||
PCRE2_EXP_DECL void pcre2_match_data_free(pcre2_match_data *); \
|
PCRE2_EXP_DECL void pcre2_match_data_free(pcre2_match_data *); \
|
||||||
PCRE2_EXP_DECL PCRE2_OFFSET pcre2_get_leftchar(pcre2_match_data *); \
|
PCRE2_EXP_DECL PCRE2_SPTR pcre2_get_mark(pcre2_match_data *); \
|
||||||
PCRE2_EXP_DECL PCRE2_SPTR pcre2_get_mark(pcre2_match_data *); \
|
PCRE2_EXP_DECL uint32_t pcre2_get_ovector_count(pcre2_match_data *); \
|
||||||
PCRE2_EXP_DECL uint32_t pcre2_get_ovector_count(pcre2_match_data *); \
|
PCRE2_EXP_DECL PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *); \
|
||||||
PCRE2_EXP_DECL PCRE2_OFFSET *pcre2_get_ovector_pointer(pcre2_match_data *); \
|
PCRE2_EXP_DECL PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *);
|
||||||
PCRE2_EXP_DECL PCRE2_OFFSET pcre2_get_rightchar(pcre2_match_data *); \
|
|
||||||
PCRE2_EXP_DECL PCRE2_OFFSET pcre2_get_startchar(pcre2_match_data *);
|
|
||||||
|
|
||||||
|
|
||||||
/* Convenience functions for handling matched substrings. */
|
/* Convenience functions for handling matched substrings. */
|
||||||
|
|
||||||
#define PCRE2_SUBSTRING_FUNCTIONS \
|
#define PCRE2_SUBSTRING_FUNCTIONS \
|
||||||
PCRE2_EXP_DECL int pcre2_substring_copy_byname(pcre2_match_data *, \
|
PCRE2_EXP_DECL int pcre2_substring_copy_byname(pcre2_match_data *, \
|
||||||
PCRE2_SPTR, PCRE2_UCHAR *, size_t); \
|
PCRE2_SPTR, PCRE2_UCHAR *, PCRE2_SIZE *); \
|
||||||
PCRE2_EXP_DECL int pcre2_substring_copy_bynumber(pcre2_match_data *, \
|
PCRE2_EXP_DECL int pcre2_substring_copy_bynumber(pcre2_match_data *, \
|
||||||
int, PCRE2_UCHAR *, size_t); \
|
unsigned int, PCRE2_UCHAR *, PCRE2_SIZE *); \
|
||||||
PCRE2_EXP_DECL void pcre2_substring_free(PCRE2_UCHAR *); \
|
PCRE2_EXP_DECL void pcre2_substring_free(PCRE2_UCHAR *); \
|
||||||
PCRE2_EXP_DECL int pcre2_substring_get_byname(pcre2_match_data *, \
|
PCRE2_EXP_DECL int pcre2_substring_get_byname(pcre2_match_data *, \
|
||||||
PCRE2_SPTR, PCRE2_UCHAR **); \
|
PCRE2_SPTR, PCRE2_UCHAR **, PCRE2_SIZE *); \
|
||||||
PCRE2_EXP_DECL int pcre2_substring_get_bynumber(pcre2_match_data *, \
|
PCRE2_EXP_DECL int pcre2_substring_get_bynumber(pcre2_match_data *, \
|
||||||
int, PCRE2_UCHAR **); \
|
unsigned int, PCRE2_UCHAR **, PCRE2_SIZE *); \
|
||||||
PCRE2_EXP_DECL int pcre2_substring_length_byname(pcre2_match_data *, \
|
PCRE2_EXP_DECL int pcre2_substring_length_byname(pcre2_match_data *, \
|
||||||
PCRE2_SPTR); \
|
PCRE2_SPTR, PCRE2_SIZE *); \
|
||||||
PCRE2_EXP_DECL int pcre2_substring_length_bynumber(pcre2_match_data *, \
|
PCRE2_EXP_DECL int pcre2_substring_length_bynumber(pcre2_match_data *, \
|
||||||
int); \
|
unsigned int, PCRE2_SIZE *); \
|
||||||
PCRE2_EXP_DECL int pcre2_substring_nametable_scan(const pcre2_code *, \
|
PCRE2_EXP_DECL int pcre2_substring_nametable_scan(const pcre2_code *, \
|
||||||
PCRE2_SPTR, PCRE2_SPTR *, PCRE2_SPTR *); \
|
PCRE2_SPTR, PCRE2_SPTR *, PCRE2_SPTR *); \
|
||||||
PCRE2_EXP_DECL int pcre2_substring_number_from_name(\
|
PCRE2_EXP_DECL int pcre2_substring_number_from_name(\
|
||||||
const pcre2_code *, PCRE2_SPTR); \
|
const pcre2_code *, PCRE2_SPTR); \
|
||||||
PCRE2_EXP_DECL void pcre2_substring_list_free(PCRE2_SPTR *); \
|
PCRE2_EXP_DECL void pcre2_substring_list_free(PCRE2_SPTR *); \
|
||||||
PCRE2_EXP_DECL int pcre2_substring_list_get(pcre2_match_data *, \
|
PCRE2_EXP_DECL int pcre2_substring_list_get(pcre2_match_data *, \
|
||||||
PCRE2_UCHAR ***, size_t **);
|
PCRE2_UCHAR ***, PCRE2_SIZE **);
|
||||||
|
|
||||||
|
|
||||||
/* Functions for JIT processing */
|
/* Functions for JIT processing */
|
||||||
|
|
||||||
#define PCRE2_JIT_FUNCTIONS \
|
#define PCRE2_JIT_FUNCTIONS \
|
||||||
PCRE2_EXP_DECL int pcre2_jit_compile(pcre2_code *, uint32_t, \
|
PCRE2_EXP_DECL int pcre2_jit_compile(pcre2_code *, uint32_t); \
|
||||||
pcre2_match_context *); \
|
|
||||||
PCRE2_EXP_DECL int pcre2_jit_match(const pcre2_code *, \
|
PCRE2_EXP_DECL int pcre2_jit_match(const pcre2_code *, \
|
||||||
PCRE2_SPTR, int, PCRE2_OFFSET, uint32_t, \
|
PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, \
|
||||||
pcre2_match_data *, pcre2_match_context *, \
|
pcre2_match_data *, pcre2_match_context *, \
|
||||||
pcre2_jit_stack *); \
|
pcre2_jit_stack *); \
|
||||||
PCRE2_EXP_DECL void pcre2_jit_free_unused_memory(pcre2_general_context *);\
|
PCRE2_EXP_DECL void pcre2_jit_free_unused_memory(pcre2_general_context *);\
|
||||||
PCRE2_EXP_DECL \
|
PCRE2_EXP_DECL \
|
||||||
pcre2_jit_stack *pcre2_jit_stack_alloc(pcre2_general_context *, \
|
pcre2_jit_stack *pcre2_jit_stack_create(pcre2_general_context *, \
|
||||||
size_t, size_t); \
|
PCRE2_SIZE, PCRE2_SIZE); \
|
||||||
PCRE2_EXP_DECL void pcre2_jit_stack_assign(const pcre2_code *, \
|
PCRE2_EXP_DECL void pcre2_jit_stack_assign(const pcre2_code *, \
|
||||||
pcre2_jit_callback, void *); \
|
pcre2_jit_callback, void *); \
|
||||||
PCRE2_EXP_DECL void pcre2_jit_stack_free(pcre2_jit_stack *);
|
PCRE2_EXP_DECL void pcre2_jit_stack_free(pcre2_jit_stack *);
|
||||||
|
@ -475,7 +467,7 @@ PCRE2_EXP_DECL void pcre2_jit_stack_free(pcre2_jit_stack *);
|
||||||
/* Other miscellaneous functions. */
|
/* Other miscellaneous functions. */
|
||||||
|
|
||||||
#define PCRE2_OTHER_FUNCTIONS \
|
#define PCRE2_OTHER_FUNCTIONS \
|
||||||
PCRE2_EXP_DECL int pcre2_get_error_message(int, PCRE2_UCHAR *, size_t); \
|
PCRE2_EXP_DECL int pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \
|
||||||
PCRE2_EXP_DECL \
|
PCRE2_EXP_DECL \
|
||||||
const uint8_t *pcre2_maketables(pcre2_general_context *); \
|
const uint8_t *pcre2_maketables(pcre2_general_context *); \
|
||||||
|
|
||||||
|
@ -532,17 +524,15 @@ pcre2_compile are called by application code. */
|
||||||
#define pcre2_general_context_create PCRE2_SUFFIX(pcre2_general_context_create_)
|
#define pcre2_general_context_create PCRE2_SUFFIX(pcre2_general_context_create_)
|
||||||
#define pcre2_general_context_free PCRE2_SUFFIX(pcre2_general_context_free_)
|
#define pcre2_general_context_free PCRE2_SUFFIX(pcre2_general_context_free_)
|
||||||
#define pcre2_get_error_message PCRE2_SUFFIX(pcre2_get_error_message_)
|
#define pcre2_get_error_message PCRE2_SUFFIX(pcre2_get_error_message_)
|
||||||
#define pcre2_get_leftchar PCRE2_SUFFIX(pcre2_get_leftchar_)
|
|
||||||
#define pcre2_get_mark PCRE2_SUFFIX(pcre2_get_mark_)
|
#define pcre2_get_mark PCRE2_SUFFIX(pcre2_get_mark_)
|
||||||
#define pcre2_get_ovector_pointer PCRE2_SUFFIX(pcre2_get_ovector_pointer_)
|
#define pcre2_get_ovector_pointer PCRE2_SUFFIX(pcre2_get_ovector_pointer_)
|
||||||
#define pcre2_get_ovector_count PCRE2_SUFFIX(pcre2_get_ovector_count_)
|
#define pcre2_get_ovector_count PCRE2_SUFFIX(pcre2_get_ovector_count_)
|
||||||
#define pcre2_get_rightchar PCRE2_SUFFIX(pcre2_get_rightchar_)
|
|
||||||
#define pcre2_get_startchar PCRE2_SUFFIX(pcre2_get_startchar_)
|
#define pcre2_get_startchar PCRE2_SUFFIX(pcre2_get_startchar_)
|
||||||
#define pcre2_jit_compile PCRE2_SUFFIX(pcre2_jit_compile_)
|
#define pcre2_jit_compile PCRE2_SUFFIX(pcre2_jit_compile_)
|
||||||
#define pcre2_jit_match PCRE2_SUFFIX(pcre2_jit_match_)
|
#define pcre2_jit_match PCRE2_SUFFIX(pcre2_jit_match_)
|
||||||
#define pcre2_jit_free_unused_memory PCRE2_SUFFIX(pcre2_jit_free_unused_memory_)
|
#define pcre2_jit_free_unused_memory PCRE2_SUFFIX(pcre2_jit_free_unused_memory_)
|
||||||
#define pcre2_jit_stack_alloc PCRE2_SUFFIX(pcre2_jit_stack_alloc_)
|
|
||||||
#define pcre2_jit_stack_assign PCRE2_SUFFIX(pcre2_jit_stack_assign_)
|
#define pcre2_jit_stack_assign PCRE2_SUFFIX(pcre2_jit_stack_assign_)
|
||||||
|
#define pcre2_jit_stack_create PCRE2_SUFFIX(pcre2_jit_stack_create_)
|
||||||
#define pcre2_jit_stack_free PCRE2_SUFFIX(pcre2_jit_stack_free_)
|
#define pcre2_jit_stack_free PCRE2_SUFFIX(pcre2_jit_stack_free_)
|
||||||
#define pcre2_maketables PCRE2_SUFFIX(pcre2_maketables_)
|
#define pcre2_maketables PCRE2_SUFFIX(pcre2_maketables_)
|
||||||
#define pcre2_match_context_copy PCRE2_SUFFIX(pcre2_match_context_copy_)
|
#define pcre2_match_context_copy PCRE2_SUFFIX(pcre2_match_context_copy_)
|
||||||
|
@ -552,14 +542,12 @@ pcre2_compile are called by application code. */
|
||||||
#define pcre2_match_data_create_from_pattern PCRE2_SUFFIX(pcre2_match_data_create_from_pattern_)
|
#define pcre2_match_data_create_from_pattern PCRE2_SUFFIX(pcre2_match_data_create_from_pattern_)
|
||||||
#define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_)
|
#define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_)
|
||||||
#define pcre2_pattern_info PCRE2_SUFFIX(pcre2_pattern_info_)
|
#define pcre2_pattern_info PCRE2_SUFFIX(pcre2_pattern_info_)
|
||||||
#define pcre2_set_bsr_compile PCRE2_SUFFIX(pcre2_set_bsr_compile_)
|
#define pcre2_set_bsr PCRE2_SUFFIX(pcre2_set_bsr_)
|
||||||
#define pcre2_set_bsr_match PCRE2_SUFFIX(pcre2_set_bsr_match_)
|
|
||||||
#define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_)
|
#define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_)
|
||||||
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
|
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
|
||||||
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
|
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
|
||||||
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
|
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
|
||||||
#define pcre2_set_newline_compile PCRE2_SUFFIX(pcre2_set_newline_compile_)
|
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
|
||||||
#define pcre2_set_newline_match PCRE2_SUFFIX(pcre2_set_newline_match_)
|
|
||||||
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
|
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
|
||||||
#define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_)
|
#define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_)
|
||||||
#define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_)
|
#define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_)
|
||||||
|
@ -621,24 +609,27 @@ PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||||
#undef PCRE2_OTHER_FUNCTIONS
|
#undef PCRE2_OTHER_FUNCTIONS
|
||||||
#undef PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
#undef PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||||
|
|
||||||
/* Re-define PCRE2_SUFFIX to use the external width value, if defined.
|
/* PCRE2_CODE_UNIT_WIDTH must be defined. If it is 8, 16, or 32, redefine
|
||||||
Otherwise, undefine the other macros and make PCRE2_SUFFIX a no-op, to reduce
|
PCRE2_SUFFIX to use it. If it is 0, undefine the other macros and make
|
||||||
confusion. */
|
PCRE2_SUFFIX a no-op. Otherwise, generate an error. */
|
||||||
|
|
||||||
#undef PCRE2_SUFFIX
|
#undef PCRE2_SUFFIX
|
||||||
#ifdef PCRE2_CODE_UNIT_WIDTH
|
#ifndef PCRE2_CODE_UNIT_WIDTH
|
||||||
#if PCRE2_CODE_UNIT_WIDTH != 8 && \
|
#error PCRE2_CODE_UNIT_WIDTH must be defined before including pcre2.h.
|
||||||
PCRE2_CODE_UNIT_WIDTH != 16 && \
|
#error Use 8, 16, or 32; or 0 for a multi-width application.
|
||||||
PCRE2_CODE_UNIT_WIDTH != 32
|
#else /* PCRE2_CODE_UNIT_WIDTH is defined */
|
||||||
#error PCRE2_CODE_UNIT_WIDTH must be 8, 16, or 32
|
#if PCRE2_CODE_UNIT_WIDTH == 8 || \
|
||||||
#endif
|
PCRE2_CODE_UNIT_WIDTH == 16 || \
|
||||||
|
PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
#define PCRE2_SUFFIX(a) PCRE2_GLUE(a, PCRE2_CODE_UNIT_WIDTH)
|
#define PCRE2_SUFFIX(a) PCRE2_GLUE(a, PCRE2_CODE_UNIT_WIDTH)
|
||||||
#else
|
#elif PCRE2_CODE_UNIT_WIDTH == 0
|
||||||
#undef PCRE2_JOIN
|
#undef PCRE2_JOIN
|
||||||
#undef PCRE2_GLUE
|
#undef PCRE2_GLUE
|
||||||
#define PCRE2_SUFFIX(a) a
|
#define PCRE2_SUFFIX(a) a
|
||||||
|
#else
|
||||||
|
#error PCRE2_CODE_UNIT_WIDTH must be 0, 8, 16, or 32.
|
||||||
#endif
|
#endif
|
||||||
|
#endif /* PCRE2_CODE_UNIT_WIDTH is defined */
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} /* extern "C" */
|
} /* extern "C" */
|
||||||
|
|
|
@ -457,7 +457,7 @@ PCRE2_EXP_DECL int pcre2_jit_match(const pcre2_code *, \
|
||||||
pcre2_jit_stack *); \
|
pcre2_jit_stack *); \
|
||||||
PCRE2_EXP_DECL void pcre2_jit_free_unused_memory(pcre2_general_context *);\
|
PCRE2_EXP_DECL void pcre2_jit_free_unused_memory(pcre2_general_context *);\
|
||||||
PCRE2_EXP_DECL \
|
PCRE2_EXP_DECL \
|
||||||
pcre2_jit_stack *pcre2_jit_stack_alloc(pcre2_general_context *, \
|
pcre2_jit_stack *pcre2_jit_stack_create(pcre2_general_context *, \
|
||||||
PCRE2_SIZE, PCRE2_SIZE); \
|
PCRE2_SIZE, PCRE2_SIZE); \
|
||||||
PCRE2_EXP_DECL void pcre2_jit_stack_assign(const pcre2_code *, \
|
PCRE2_EXP_DECL void pcre2_jit_stack_assign(const pcre2_code *, \
|
||||||
pcre2_jit_callback, void *); \
|
pcre2_jit_callback, void *); \
|
||||||
|
@ -531,8 +531,8 @@ pcre2_compile are called by application code. */
|
||||||
#define pcre2_jit_compile PCRE2_SUFFIX(pcre2_jit_compile_)
|
#define pcre2_jit_compile PCRE2_SUFFIX(pcre2_jit_compile_)
|
||||||
#define pcre2_jit_match PCRE2_SUFFIX(pcre2_jit_match_)
|
#define pcre2_jit_match PCRE2_SUFFIX(pcre2_jit_match_)
|
||||||
#define pcre2_jit_free_unused_memory PCRE2_SUFFIX(pcre2_jit_free_unused_memory_)
|
#define pcre2_jit_free_unused_memory PCRE2_SUFFIX(pcre2_jit_free_unused_memory_)
|
||||||
#define pcre2_jit_stack_alloc PCRE2_SUFFIX(pcre2_jit_stack_alloc_)
|
|
||||||
#define pcre2_jit_stack_assign PCRE2_SUFFIX(pcre2_jit_stack_assign_)
|
#define pcre2_jit_stack_assign PCRE2_SUFFIX(pcre2_jit_stack_assign_)
|
||||||
|
#define pcre2_jit_stack_create PCRE2_SUFFIX(pcre2_jit_stack_create_)
|
||||||
#define pcre2_jit_stack_free PCRE2_SUFFIX(pcre2_jit_stack_free_)
|
#define pcre2_jit_stack_free PCRE2_SUFFIX(pcre2_jit_stack_free_)
|
||||||
#define pcre2_maketables PCRE2_SUFFIX(pcre2_maketables_)
|
#define pcre2_maketables PCRE2_SUFFIX(pcre2_maketables_)
|
||||||
#define pcre2_match_context_copy PCRE2_SUFFIX(pcre2_match_context_copy_)
|
#define pcre2_match_context_copy PCRE2_SUFFIX(pcre2_match_context_copy_)
|
||||||
|
|
|
@ -304,8 +304,8 @@ static const short int escapes[] = {
|
||||||
#else
|
#else
|
||||||
|
|
||||||
/* This is the "abnormal" table for EBCDIC systems without UTF-8 support.
|
/* This is the "abnormal" table for EBCDIC systems without UTF-8 support.
|
||||||
It runs from 'a' to '9'. For some minimal testing of EBCDIC features, the code
|
It runs from 'a' to '9'. For some minimal testing of EBCDIC features, the code
|
||||||
is sometimes compiled on an ASCII system. In this case, we must not use CHAR_a
|
is sometimes compiled on an ASCII system. In this case, we must not use CHAR_a
|
||||||
because it is defined as 'a', which of course picks up the ASCII value. */
|
because it is defined as 'a', which of course picks up the ASCII value. */
|
||||||
|
|
||||||
#if 'a' == 0x81 /* Check for a real EBCDIC environment */
|
#if 'a' == 0x81 /* Check for a real EBCDIC environment */
|
||||||
|
@ -7786,7 +7786,7 @@ if (cb.hwm > cb.start_workspace)
|
||||||
NULL to indicate that forward references have been filled in. */
|
NULL to indicate that forward references have been filled in. */
|
||||||
|
|
||||||
if (cb.workspace_size > COMPILE_WORK_SIZE)
|
if (cb.workspace_size > COMPILE_WORK_SIZE)
|
||||||
ccontext->memctl.free((void *)cb.start_workspace,
|
ccontext->memctl.free((void *)cb.start_workspace,
|
||||||
ccontext->memctl.memory_data);
|
ccontext->memctl.memory_data);
|
||||||
cb.start_workspace = NULL;
|
cb.start_workspace = NULL;
|
||||||
|
|
||||||
|
|
|
@ -221,7 +221,7 @@ static const char match_error_texts[] =
|
||||||
"match limit exceeded\0"
|
"match limit exceeded\0"
|
||||||
"no more memory\0"
|
"no more memory\0"
|
||||||
"unknown or unset substring\0"
|
"unknown or unset substring\0"
|
||||||
"non-unique substring name\0"
|
"non-unique substring name\0"
|
||||||
"NULL argument passed\0"
|
"NULL argument passed\0"
|
||||||
/* 50 */
|
/* 50 */
|
||||||
"nested recursion at the same subject position\0"
|
"nested recursion at the same subject position\0"
|
||||||
|
|
|
@ -97,7 +97,7 @@ sljit_free_unused_memory_exec();
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
PCRE2_EXP_DEFN pcre2_jit_stack * PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN pcre2_jit_stack * PCRE2_CALL_CONVENTION
|
||||||
pcre2_jit_stack_alloc(pcre2_general_context *gcontext, size_t startsize,
|
pcre2_jit_stack_create(pcre2_general_context *gcontext, size_t startsize,
|
||||||
size_t maxsize)
|
size_t maxsize)
|
||||||
{
|
{
|
||||||
#ifndef SUPPORT_JIT
|
#ifndef SUPPORT_JIT
|
||||||
|
|
|
@ -854,7 +854,7 @@ static pcre2_jit_stack_8 *stack8;
|
||||||
static pcre2_jit_stack_8 *getstack8(void)
|
static pcre2_jit_stack_8 *getstack8(void)
|
||||||
{
|
{
|
||||||
if (!stack8)
|
if (!stack8)
|
||||||
stack8 = pcre2_jit_stack_alloc_8(NULL, 1, 1024 * 1024);
|
stack8 = pcre2_jit_stack_create_8(NULL, 1, 1024 * 1024);
|
||||||
return stack8;
|
return stack8;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -877,7 +877,7 @@ static pcre2_jit_stack_16 *stack16;
|
||||||
static pcre2_jit_stack_16 *getstack16(void)
|
static pcre2_jit_stack_16 *getstack16(void)
|
||||||
{
|
{
|
||||||
if (!stack16)
|
if (!stack16)
|
||||||
stack16 = pcre2_jit_stack_alloc_16(NULL, 1, 1024 * 1024);
|
stack16 = pcre2_jit_stack_create_16(NULL, 1, 1024 * 1024);
|
||||||
return stack16;
|
return stack16;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -900,7 +900,7 @@ static pcre2_jit_stack_32 *stack32;
|
||||||
static pcre2_jit_stack_32 *getstack32(void)
|
static pcre2_jit_stack_32 *getstack32(void)
|
||||||
{
|
{
|
||||||
if (!stack32)
|
if (!stack32)
|
||||||
stack32 = pcre2_jit_stack_alloc_32(NULL, 1, 1024 * 1024);
|
stack32 = pcre2_jit_stack_create_32(NULL, 1, 1024 * 1024);
|
||||||
return stack32;
|
return stack32;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -446,7 +446,7 @@ while (top > bot)
|
||||||
if (PRIV(strcmp)(stringname, (last + entrysize + IMM2_SIZE)) != 0) break;
|
if (PRIV(strcmp)(stringname, (last + entrysize + IMM2_SIZE)) != 0) break;
|
||||||
last += entrysize;
|
last += entrysize;
|
||||||
}
|
}
|
||||||
if (firstptr == NULL)
|
if (firstptr == NULL)
|
||||||
return (first == last)? (int)GET2(entry, 0) : PCRE2_ERROR_NOUNIQUESUBSTRING;
|
return (first == last)? (int)GET2(entry, 0) : PCRE2_ERROR_NOUNIQUESUBSTRING;
|
||||||
*firstptr = first;
|
*firstptr = first;
|
||||||
*lastptr = last;
|
*lastptr = last;
|
||||||
|
|
|
@ -3115,7 +3115,7 @@ for (fn = pattern_files; fn != NULL; fn = fn->next)
|
||||||
|
|
||||||
#ifdef SUPPORT_PCRE2GREP_JIT
|
#ifdef SUPPORT_PCRE2GREP_JIT
|
||||||
if (use_jit)
|
if (use_jit)
|
||||||
jit_stack = pcre2_jit_stack_alloc(NULL, 32*1024, 1024*1024);
|
jit_stack = pcre2_jit_stack_create(NULL, 32*1024, 1024*1024);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
|
for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
|
||||||
|
|
|
@ -88,7 +88,7 @@ that first, falling back to readline/readline.h. */
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Put the test for interactive input into a macro so that it can be changed if
|
/* Put the test for interactive input into a macro so that it can be changed if
|
||||||
required for different environments. */
|
required for different environments. */
|
||||||
|
|
||||||
#define INTERACTIVE(f) isatty(fileno(f))
|
#define INTERACTIVE(f) isatty(fileno(f))
|
||||||
|
@ -822,13 +822,13 @@ are supported. */
|
||||||
a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \
|
a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \
|
||||||
(pcre2_jit_stack_32 *)i)
|
(pcre2_jit_stack_32 *)i)
|
||||||
|
|
||||||
#define PCRE2_JIT_STACK_ALLOC(a,b,c,d) \
|
#define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
|
||||||
if (test_mode == PCRE8_MODE) \
|
if (test_mode == PCRE8_MODE) \
|
||||||
a = (PCRE2_JIT_STACK *)pcre2_jit_stack_alloc_8(b,c,d); \
|
a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d); \
|
||||||
else if (test_mode == PCRE16_MODE) \
|
else if (test_mode == PCRE16_MODE) \
|
||||||
a = (PCRE2_JIT_STACK *)pcre2_jit_stack_alloc_16(b,c,d); \
|
a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d); \
|
||||||
else \
|
else \
|
||||||
a = (PCRE2_JIT_STACK *)pcre2_jit_stack_alloc_32(b,c,d);
|
a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
|
||||||
|
|
||||||
#define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
|
#define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
|
||||||
if (test_mode == PCRE8_MODE) \
|
if (test_mode == PCRE8_MODE) \
|
||||||
|
@ -1200,11 +1200,11 @@ the three different cases. */
|
||||||
a = G(pcre2_jit_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
|
a = G(pcre2_jit_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
|
||||||
G(g,BITTWO),G(h,BITTWO),(G(pcre2_jit_stack_,BITTWO) *)i)
|
G(g,BITTWO),G(h,BITTWO),(G(pcre2_jit_stack_,BITTWO) *)i)
|
||||||
|
|
||||||
#define PCRE2_JIT_STACK_ALLOC(a,b,c,d) \
|
#define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
|
||||||
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
||||||
a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_alloc_,BITONE)(b,c,d); \
|
a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITONE)(b,c,d); \
|
||||||
else \
|
else \
|
||||||
a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_alloc_,BITTWO)(b,c,d); \
|
a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITTWO)(b,c,d); \
|
||||||
|
|
||||||
#define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
|
#define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
|
||||||
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
||||||
|
@ -1447,8 +1447,8 @@ the three different cases. */
|
||||||
#define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h,(pcre2_jit_stack_8 *)i) \
|
#define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h,(pcre2_jit_stack_8 *)i) \
|
||||||
a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
|
a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
|
||||||
(pcre2_jit_stack_8 *)i)
|
(pcre2_jit_stack_8 *)i)
|
||||||
#define PCRE2_JIT_STACK_ALLOC(a,b,c,d) \
|
#define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
|
||||||
a = (PCRE2_JIT_STACK *)pcre2_jit_stack_alloc_8(b,c,d);
|
a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d);
|
||||||
#define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
|
#define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
|
||||||
pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c);
|
pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c);
|
||||||
#define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a);
|
#define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a);
|
||||||
|
@ -1526,8 +1526,8 @@ the three different cases. */
|
||||||
#define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h,i) \
|
#define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h,i) \
|
||||||
a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \
|
a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \
|
||||||
(pcre2_jit_stack_16 *)i)
|
(pcre2_jit_stack_16 *)i)
|
||||||
#define PCRE2_JIT_STACK_ALLOC(a,b,c,d) \
|
#define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
|
||||||
a = (PCRE2_JIT_STACK *)pcre2_jit_stack_alloc_16(b,c,d);
|
a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d);
|
||||||
#define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
|
#define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
|
||||||
pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c);
|
pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c);
|
||||||
#define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a);
|
#define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a);
|
||||||
|
@ -1605,8 +1605,8 @@ the three different cases. */
|
||||||
#define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h,i) \
|
#define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h,i) \
|
||||||
a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \
|
a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \
|
||||||
(pcre2_jit_stack_32 *)i)
|
(pcre2_jit_stack_32 *)i)
|
||||||
#define PCRE2_JIT_STACK_ALLOC(a,b,c,d) \
|
#define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
|
||||||
a = (PCRE2_JIT_STACK *)pcre2_jit_stack_alloc_32(b,c,d);
|
a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
|
||||||
#define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
|
#define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
|
||||||
pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
|
pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
|
||||||
#define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
|
#define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
|
||||||
|
@ -3681,7 +3681,7 @@ if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP;
|
||||||
/* Assume full JIT compile for jitverify and/or jitfast if nothing else was
|
/* Assume full JIT compile for jitverify and/or jitfast if nothing else was
|
||||||
specified. */
|
specified. */
|
||||||
|
|
||||||
if (pat_patctl.jit == 0 &&
|
if (pat_patctl.jit == 0 &&
|
||||||
(pat_patctl.control & (CTL_JITVERIFY|CTL_JITFAST)) != 0)
|
(pat_patctl.control & (CTL_JITVERIFY|CTL_JITFAST)) != 0)
|
||||||
pat_patctl.jit = 7;
|
pat_patctl.jit = 7;
|
||||||
utf = (pat_patctl.options & PCRE2_UTF) != 0;
|
utf = (pat_patctl.options & PCRE2_UTF) != 0;
|
||||||
|
@ -3996,7 +3996,7 @@ for (;;)
|
||||||
if ((pat_patctl.control & CTL_JITFAST) != 0)
|
if ((pat_patctl.control & CTL_JITFAST) != 0)
|
||||||
PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
|
PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
|
||||||
dat_datctl.options, match_data, dat_context, jit_stack);
|
dat_datctl.options, match_data, dat_context, jit_stack);
|
||||||
else
|
else
|
||||||
PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
|
PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
|
||||||
dat_datctl.options, match_data, dat_context);
|
dat_datctl.options, match_data, dat_context);
|
||||||
|
|
||||||
|
@ -4637,7 +4637,7 @@ if (dat_datctl.jitstack != 0)
|
||||||
if (dat_datctl.jitstack != jit_stack_size)
|
if (dat_datctl.jitstack != jit_stack_size)
|
||||||
{
|
{
|
||||||
PCRE2_JIT_STACK_FREE(jit_stack);
|
PCRE2_JIT_STACK_FREE(jit_stack);
|
||||||
PCRE2_JIT_STACK_ALLOC(jit_stack, NULL, 1, dat_datctl.jitstack * 1024);
|
PCRE2_JIT_STACK_CREATE(jit_stack, NULL, 1, dat_datctl.jitstack * 1024);
|
||||||
jit_stack_size = dat_datctl.jitstack;
|
jit_stack_size = dat_datctl.jitstack;
|
||||||
}
|
}
|
||||||
PCRE2_JIT_STACK_ASSIGN(compiled_code, jit_callback, jit_stack);
|
PCRE2_JIT_STACK_ASSIGN(compiled_code, jit_callback, jit_stack);
|
||||||
|
@ -4690,10 +4690,10 @@ for (gmatched = 0;; gmatched++)
|
||||||
PCRE2_SIZE ovecsave[2];
|
PCRE2_SIZE ovecsave[2];
|
||||||
|
|
||||||
ovector = FLD(match_data, ovector);
|
ovector = FLD(match_data, ovector);
|
||||||
|
|
||||||
/* When matching is via pcre2_match(), we will detect the use of JIT via the
|
/* When matching is via pcre2_match(), we will detect the use of JIT via the
|
||||||
stack callback function. */
|
stack callback function. */
|
||||||
|
|
||||||
jit_was_used = (pat_patctl.control & CTL_JITFAST) != 0;
|
jit_was_used = (pat_patctl.control & CTL_JITFAST) != 0;
|
||||||
|
|
||||||
/* After the first time round a global loop, save the current ovector[0,1] so
|
/* After the first time round a global loop, save the current ovector[0,1] so
|
||||||
|
@ -4722,7 +4722,7 @@ for (gmatched = 0;; gmatched++)
|
||||||
}
|
}
|
||||||
if (dfa_workspace == NULL)
|
if (dfa_workspace == NULL)
|
||||||
dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
|
dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
|
||||||
start_time = clock();
|
start_time = clock();
|
||||||
for (i = 0; i < timeitm; i++)
|
for (i = 0; i < timeitm; i++)
|
||||||
{
|
{
|
||||||
PCRE2_DFA_MATCH(capcount, compiled_code, pp, ulen,
|
PCRE2_DFA_MATCH(capcount, compiled_code, pp, ulen,
|
||||||
|
@ -4730,7 +4730,7 @@ for (gmatched = 0;; gmatched++)
|
||||||
dat_context, dfa_workspace, DFA_WS_DIMENSION);
|
dat_context, dfa_workspace, DFA_WS_DIMENSION);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
else if ((pat_patctl.control & CTL_JITFAST) != 0)
|
else if ((pat_patctl.control & CTL_JITFAST) != 0)
|
||||||
{
|
{
|
||||||
start_time = clock();
|
start_time = clock();
|
||||||
|
@ -4740,9 +4740,9 @@ for (gmatched = 0;; gmatched++)
|
||||||
dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
|
dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
|
||||||
dat_context, jit_stack);
|
dat_context, jit_stack);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
start_time = clock();
|
start_time = clock();
|
||||||
for (i = 0; i < timeitm; i++)
|
for (i = 0; i < timeitm; i++)
|
||||||
|
@ -4751,7 +4751,7 @@ for (gmatched = 0;; gmatched++)
|
||||||
dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
|
dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
|
||||||
dat_context);
|
dat_context);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
total_match_time += (time_taken = clock() - start_time);
|
total_match_time += (time_taken = clock() - start_time);
|
||||||
fprintf(outfile, "Match time %.4f milliseconds\n",
|
fprintf(outfile, "Match time %.4f milliseconds\n",
|
||||||
(((double)time_taken * 1000.0) / (double)timeitm) /
|
(((double)time_taken * 1000.0) / (double)timeitm) /
|
||||||
|
@ -4809,7 +4809,7 @@ for (gmatched = 0;; gmatched++)
|
||||||
if ((pat_patctl.control & CTL_JITFAST) != 0)
|
if ((pat_patctl.control & CTL_JITFAST) != 0)
|
||||||
PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
|
PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
|
||||||
dat_datctl.options | g_notempty, match_data, dat_context, jit_stack);
|
dat_datctl.options | g_notempty, match_data, dat_context, jit_stack);
|
||||||
else
|
else
|
||||||
PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
|
PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
|
||||||
dat_datctl.options | g_notempty, match_data, dat_context);
|
dat_datctl.options | g_notempty, match_data, dat_context);
|
||||||
if (capcount == 0)
|
if (capcount == 0)
|
||||||
|
|
|
@ -245,6 +245,11 @@ Minimum match limit = 6
|
||||||
0: aabbccddee (JIT)
|
0: aabbccddee (JIT)
|
||||||
1: aa
|
1: aa
|
||||||
2: cc
|
2: cc
|
||||||
|
3: ee
|
||||||
|
aabbccddee\=jitstack=1
|
||||||
|
0: aabbccddee (JIT)
|
||||||
|
1: aa
|
||||||
|
2: cc
|
||||||
3: ee
|
3: ee
|
||||||
|
|
||||||
/(a+)*zz/
|
/(a+)*zz/
|
||||||
|
|
Loading…
Reference in New Issue