Changed jit_stack_alloc to jit_stack_create.
This commit is contained in:
parent
dea68c01fb
commit
08e3107cbe
|
@ -42,8 +42,8 @@ dist_html_DATA = \
|
|||
doc/html/pcre2_jit_compile.html \
|
||||
doc/html/pcre2_jit_free_unused_memory.html \
|
||||
doc/html/pcre2_jit_match.html \
|
||||
doc/html/pcre2_jit_stack_alloc.html \
|
||||
doc/html/pcre2_jit_stack_assign.html \
|
||||
doc/html/pcre2_jit_stack_create.html \
|
||||
doc/html/pcre2_jit_stack_free.html \
|
||||
doc/html/pcre2_maketables.html \
|
||||
doc/html/pcre2_match.html \
|
||||
|
@ -113,8 +113,8 @@ dist_man_MANS = \
|
|||
doc/pcre2_jit_compile.3 \
|
||||
doc/pcre2_jit_free_unused_memory.3 \
|
||||
doc/pcre2_jit_match.3 \
|
||||
doc/pcre2_jit_stack_alloc.3 \
|
||||
doc/pcre2_jit_stack_assign.3 \
|
||||
doc/pcre2_jit_stack_create.3 \
|
||||
doc/pcre2_jit_stack_free.3 \
|
||||
doc/pcre2_maketables.3 \
|
||||
doc/pcre2_match.3 \
|
||||
|
|
|
@ -82,8 +82,8 @@ checkspecial()
|
|||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# ------ Normal tests ------
|
||||
|
||||
# ------ Normal tests ------
|
||||
|
||||
echo "Testing pcre2grep main features"
|
||||
|
||||
|
@ -585,7 +585,7 @@ $cf $srcdir/testdata/grepoutputN testtrygrep
|
|||
if [ $? != 0 ] ; then exit 1; fi
|
||||
|
||||
|
||||
# Finally, some tests to exercise code that is not tested above, just to be
|
||||
# Finally, some tests to exercise code that is not tested above, just to be
|
||||
# sure that it runs OK. Doing this improves the coverage statistics. The output
|
||||
# is not checked.
|
||||
|
||||
|
|
|
@ -574,7 +574,7 @@ a list of tests.
|
|||
|
||||
The first two tests can always be run, as they expect only plain text strings
|
||||
(not UTF) and make no use of Unicode properties. The first test file can be fed
|
||||
directly into the perltest.pl script to check that Perl gives the same results.
|
||||
directly into the perltest.sh script to check that Perl gives the same results.
|
||||
The only difference you should see is in the first few lines, where the Perl
|
||||
version is given instead of the PCRE2 version. The second set of tests check
|
||||
auxiliary functions, error detection, and run-time flags that are specific to
|
||||
|
@ -609,7 +609,7 @@ of the French locale have been encountered. The test passes if its output
|
|||
matches any one of them.
|
||||
|
||||
The fourth and fifth tests check UTF and Unicode property support, the fourth
|
||||
being compatible with the perltest.pl script, and the fifth checking
|
||||
being compatible with the perltest.sh script, and the fifth checking
|
||||
PCRE2-specific things.
|
||||
|
||||
The sixth and seventh tests check the pcre2_dfa_match() alternative matching
|
||||
|
@ -781,7 +781,6 @@ The distribution should contain the files listed below.
|
|||
doc/html/* HTML documentation
|
||||
doc/pcre2.txt plain text version of the man pages
|
||||
doc/pcre2test.txt plain text documentation of test program
|
||||
doc/perltest.txt plain text documentation of Perl test program
|
||||
install-sh a shell script for installing files
|
||||
libpcre2-8.pc.in template for libpcre2-8.pc for pkg-config
|
||||
libpcre2-16.pc.in template for libpcre2-16.pc for pkg-config
|
||||
|
@ -791,7 +790,7 @@ The distribution should contain the files listed below.
|
|||
missing ) common stub for a few missing GNU programs while
|
||||
) installing, generated by automake
|
||||
mkinstalldirs script for making install directories
|
||||
perltest.pl Perl test program
|
||||
perltest.sh Script for running a Perl test program
|
||||
pcre2-config.in source of script which retains PCRE2 information
|
||||
pcre2_jit_test.c test program for the JIT compiler
|
||||
testdata/testinput* test data for main library tests
|
||||
|
@ -829,4 +828,4 @@ The distribution should contain the files listed below.
|
|||
Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
Last updated: 27 October 2014
|
||||
Last updated: 25 October 2014
|
||||
|
|
|
@ -140,12 +140,12 @@ in the library.
|
|||
<tr><td><a href="pcre2_jit_match.html">pcre2_jit_match</a></td>
|
||||
<td> Fast path interface to JIT matching</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_jit_stack_alloc.html">pcre2_jit_stack_alloc</a></td>
|
||||
<td> Create a stack for JIT matching</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_jit_stack_assign.html">pcre2_jit_stack_assign</a></td>
|
||||
<td> Assign stack for JIT matching</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_jit_stack_create.html">pcre2_jit_stack_create</a></td>
|
||||
<td> Create a stack for JIT matching</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_jit_stack_free.html">pcre2_jit_stack_free</a></td>
|
||||
<td> Free a JIT matching stack</td></tr>
|
||||
|
||||
|
@ -162,7 +162,7 @@ in the library.
|
|||
<tr><td><a href="pcre2_match_context_create.html">pcre2_match_context_create</a></td>
|
||||
<td> Create a match context</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_match_context_free.html">pcre2_match_contest_free</a></td>
|
||||
<tr><td><a href="pcre2_match_context_free.html">pcre2_match_context_free</a></td>
|
||||
<td> Free a match context</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_match_data_create.html">pcre2_match_data_create</a></td>
|
||||
|
|
|
@ -27,9 +27,11 @@ DESCRIPTION
|
|||
<P>
|
||||
After a successful call of <b>pcre2_match()</b> that was passed the match block
|
||||
that is this function's argument, this function returns the code unit offset of
|
||||
the character at which the successful match started. This can be different to
|
||||
the value of <i>ovector[0]</i> if the pattern contains the \K escape sequence.
|
||||
Note, however, that \K has no effect for a partial match.
|
||||
the character at which the successful match started. For a non-partial match,
|
||||
this can be different to the value of <i>ovector[0]</i> if the pattern contains
|
||||
the \K escape sequence. After a partial match, however, this value is always
|
||||
the same as <i>ovector[0]</i> because \K does not affect the result of a
|
||||
partial match.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
|
|
|
@ -27,7 +27,10 @@ DESCRIPTION
|
|||
<P>
|
||||
This function frees unused JIT executable memory. The argument is a general
|
||||
context, for custom memory management, or NULL for standard memory management.
|
||||
FIXME: more detail needed.
|
||||
JIT memory allocation retains some memory in order to improve future JIT
|
||||
compilation speed. In low memory conditions,
|
||||
\fBpcre2_jit_free_unused_memory()\fB can be used to cause this memory to be
|
||||
freed.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
|
|
|
@ -36,7 +36,7 @@ Its arguments are exactly the same as for
|
|||
<a href="pcre2_match.html"><b>pcre2_match()</b></a>
|
||||
plus one additional argument that must either point to a JIT stack or be NULL.
|
||||
In the latter case, if a callback function has been set up by
|
||||
<b>pcre2_jit_stack_alloc()</b>, it is called. Otherwise the system stack is
|
||||
<b>pcre2_jit_stack_create()</b>, it is called. Otherwise the system stack is
|
||||
used.
|
||||
</P>
|
||||
<P>
|
||||
|
|
|
@ -42,13 +42,13 @@ block on the machine stack is used.
|
|||
<P>
|
||||
If <i>callback</i> is NULL and <i>callback_data</i> is not NULL,
|
||||
<i>callback_data</i> must be a valid JIT stack, the result of calling
|
||||
<b>pcre2_jit_stack_alloc()</b>.
|
||||
<b>pcre2_jit_stack_create()</b>.
|
||||
</P>
|
||||
<P>
|
||||
If <i>callback</i> not NULL, it is called with <i>callback_data</i> as an
|
||||
argument at the start of matching, in order to set up a JIT stack. If the
|
||||
result is NULL, the internal 32K stack is used; otherwise the return value must
|
||||
be a valid JIT stack, the result of calling <b>pcre2_jit_stack_alloc()</b>.
|
||||
be a valid JIT stack, the result of calling <b>pcre2_jit_stack_create()</b>.
|
||||
</P>
|
||||
<P>
|
||||
You may safely assign the same JIT stack to multiple patterns, as long as they
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre2_jit_stack_alloc specification</title>
|
||||
<title>pcre2_jit_stack_create specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_jit_stack_alloc man page</h1>
|
||||
<h1>pcre2_jit_stack_create man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
|
@ -19,7 +19,7 @@ SYNOPSIS
|
|||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>pcre2_jit_stack *pcre2_jit_stack_alloc(pcre2_general_context *<i>gcontext</i>,</b>
|
||||
<b>pcre2_jit_stack *pcre2_jit_stack_create(pcre2_general_context *<i>gcontext</i>,</b>
|
||||
<b> PCRE2_SIZE <i>startsize</i>, PCRE2_SIZE <i>maxsize</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
|
@ -26,7 +26,7 @@ DESCRIPTION
|
|||
</b><br>
|
||||
<P>
|
||||
This function is used to free a JIT stack that was created by
|
||||
<b>pcre2_jit_stack_alloc()</b> when it is no longer needed. For more details,
|
||||
<b>pcre2_jit_stack_create()</b> when it is no longer needed. For more details,
|
||||
see the
|
||||
<a href="pcre2jit.html"><b>pcre2jit</b></a>
|
||||
page.
|
||||
|
|
|
@ -27,16 +27,17 @@ DESCRIPTION
|
|||
</b><br>
|
||||
<P>
|
||||
This convenience function finds the number of a named substring capturing
|
||||
parenthesis in a compiled pattern. Its arguments are:
|
||||
parenthesis in a compiled pattern, provided that it is a unique name. The
|
||||
function arguments are:
|
||||
<pre>
|
||||
<i>code</i> Compiled regular expression
|
||||
<i>name</i> Name whose number is required
|
||||
</pre>
|
||||
The yield of the function is the number of the parenthesis if the name is
|
||||
found, or PCRE2_ERROR_NOSUBSTRING otherwise. When duplicate names are allowed
|
||||
(PCRE2_DUPNAMES is set), it is not defined which of the numbers is returned.
|
||||
You can obtain the complete list by calling
|
||||
<b>pcre2_substring_nametable_scan()</b>.
|
||||
found, or PCRE2_ERROR_NOSUBSTRING if it is not found. When duplicate names are
|
||||
allowed (PCRE2_DUPNAMES is set), if the name is not unique,
|
||||
PCRE2_ERROR_NOUNIQUESUBSTRING is returned. You can obtain the list of numbers
|
||||
with the same name by calling <b>pcre2_substring_nametable_scan()</b>.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
|
|
|
@ -236,7 +236,7 @@ document for an overview of all the PCRE2 documentation.
|
|||
<b>void pcre2_jit_free_unused_memory(pcre2_general_context *<i>gcontext</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcre2_jit_stack *pcre2_jit_stack_alloc(pcre2_general_context *<i>gcontext</i>,</b>
|
||||
<b>pcre2_jit_stack *pcre2_jit_stack_create(pcre2_general_context *<i>gcontext</i>,</b>
|
||||
<b> PCRE2_SIZE <i>startsize</i>, PCRE2_SIZE <i>maxsize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
|
@ -363,7 +363,7 @@ support is not available.
|
|||
</P>
|
||||
<P>
|
||||
More complicated programs might need to make use of the specialist functions
|
||||
<b>pcre2_jit_stack_alloc()</b>, <b>pcre2_jit_stack_free()</b>, and
|
||||
<b>pcre2_jit_stack_create()</b>, <b>pcre2_jit_stack_free()</b>, and
|
||||
<b>pcre2_jit_stack_assign()</b> in order to control the JIT code's memory usage.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -1272,7 +1272,7 @@ textual error message from any error code.
|
|||
<b>void pcre2_jit_free_unused_memory(pcre2_general_context *<i>gcontext</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcre2_jit_stack *pcre2_jit_stack_alloc(pcre2_general_context *<i>gcontext</i>,</b>
|
||||
<b>pcre2_jit_stack *pcre2_jit_stack_create(pcre2_general_context *<i>gcontext</i>,</b>
|
||||
<b> PCRE2_SIZE <i>startsize</i>, PCRE2_SIZE <i>maxsize</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
|
@ -2050,10 +2050,11 @@ Otherwise NULL is returned. A (*MARK) name may be available after a failed
|
|||
match or a partial match, as well as after a successful one.
|
||||
</P>
|
||||
<P>
|
||||
The offset of the character at which the successful match started is
|
||||
returned by <b>pcre2_get_startchar()</b>. This can be different to the value of
|
||||
<i>ovector[0]</i> if the pattern contains the \K escape sequence. Note,
|
||||
however, that \K has no effect for a partial match.
|
||||
The code unit offset of the character at which a successful match started is
|
||||
returned by <b>pcre2_get_startchar()</b>. For a non-partial match, this can be
|
||||
different to the value of <i>ovector[0]</i> if the pattern contains the \K
|
||||
escape sequence. After a partial match, however, this value is always the same
|
||||
as <i>ovector[0]</i> because \K does not affect the result of a partial match.
|
||||
<a name="errorlist"></a></P>
|
||||
<br><b>
|
||||
Error return values from <b>pcre2_match()</b>
|
||||
|
@ -2302,8 +2303,9 @@ the number of the subpattern called "xxx" is 2. If the name is known to be
|
|||
unique (PCRE2_DUPNAMES was not set), you can find the number from the name by
|
||||
calling <b>pcre2_substring_number_from_name()</b>. The first argument is the
|
||||
compiled pattern, and the second is the name. The yield of the function is the
|
||||
subpattern number, or PCRE2_ERROR_NOSUBSTRING if there is no subpattern of that
|
||||
name.
|
||||
subpattern number, PCRE2_ERROR_NOSUBSTRING if there is no subpattern of that
|
||||
name, or PCRE2_ERROR_NOUNIQUESUBSTRING if there is more than one subpattern of
|
||||
that name.
|
||||
</P>
|
||||
<P>
|
||||
Given the number, you can extract the substring directly, or use one of the
|
||||
|
@ -2577,7 +2579,7 @@ Cambridge CB2 3QH, England.
|
|||
</P>
|
||||
<br><a name="SEC32" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 16 October 2014
|
||||
Last updated: 03 November 2014
|
||||
<br>
|
||||
Copyright © 1997-2014 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -20,17 +20,15 @@ please consult the man page, in case the conversion went wrong.
|
|||
<li><a name="TOC5" href="#SEC5">RETURN VALUES FROM JIT MATCHING</a>
|
||||
<li><a name="TOC6" href="#SEC6">CONTROLLING THE JIT STACK</a>
|
||||
<li><a name="TOC7" href="#SEC7">JIT STACK FAQ</a>
|
||||
<li><a name="TOC8" href="#SEC8">EXAMPLE CODE</a>
|
||||
<li><a name="TOC9" href="#SEC9">JIT FAST PATH API</a>
|
||||
<li><a name="TOC10" href="#SEC10">SEE ALSO</a>
|
||||
<li><a name="TOC11" href="#SEC11">AUTHOR</a>
|
||||
<li><a name="TOC12" href="#SEC12">REVISION</a>
|
||||
<li><a name="TOC8" href="#SEC8">FREEING JIT SPECULATIVE MEMORY</a>
|
||||
<li><a name="TOC9" href="#SEC9">EXAMPLE CODE</a>
|
||||
<li><a name="TOC10" href="#SEC10">JIT FAST PATH API</a>
|
||||
<li><a name="TOC11" href="#SEC11">SEE ALSO</a>
|
||||
<li><a name="TOC12" href="#SEC12">AUTHOR</a>
|
||||
<li><a name="TOC13" href="#SEC13">REVISION</a>
|
||||
</ul>
|
||||
<br><a name="SEC1" href="#TOC1">PCRE2 JUST-IN-TIME COMPILER SUPPORT</a><br>
|
||||
<P>
|
||||
FIXME: This needs checking over once JIT support is implemented.
|
||||
</P>
|
||||
<P>
|
||||
Just-in-time compiling is a heavyweight optimization that can greatly speed up
|
||||
pattern matching. However, it comes at the cost of extra processing before the
|
||||
match is performed. Therefore, it is of most benefit when the same pattern is
|
||||
|
@ -79,9 +77,12 @@ second is a set of option bits, which must include at least one of
|
|||
PCRE2_JIT_COMPLETE, PCRE2_JIT_PARTIAL_HARD, or PCRE2_JIT_PARTIAL_SOFT.
|
||||
</P>
|
||||
<P>
|
||||
The returned value from <b>pcre2_jit_compile()</b> is zero on success, or a
|
||||
negative error code. In particular, PCRE2_ERROR_JIT_BADOPTION is returned if
|
||||
JIT is not supported or if an unknown options bit is set.
|
||||
If JIT support is not available, a call to <b>pcre2_jit_comple()</b> does
|
||||
nothing and returns PCRE2_ERROR_JIT_BADOPTION. Otherwise, the compiled pattern
|
||||
is passed to the JIT compiler, which turns it into machine code that executes
|
||||
much faster than the normal interpretive code, but yields exactly the same
|
||||
results. The returned value from <b>pcre2_jit_compile()</b> is zero on success,
|
||||
or a negative error code.
|
||||
</P>
|
||||
<P>
|
||||
PCRE2_JIT_COMPLETE requests the JIT compiler to generate code for complete
|
||||
|
@ -100,12 +101,6 @@ described in the section entitled
|
|||
below.
|
||||
</P>
|
||||
<P>
|
||||
If JIT support is not available, a call to <b>pcre2_jit_comple()</b> does
|
||||
nothing and returns FIXME. Otherwise, the compiled pattern is passed to the JIT
|
||||
compiler, which turns it into machine code that executes much faster than the
|
||||
normal interpretive code, but yields exactly the same results.
|
||||
</P>
|
||||
<P>
|
||||
There are some <b>pcre2_match()</b> options that are not supported by JIT, and
|
||||
there are also some pattern items that JIT cannot handle. Details are given
|
||||
below. In both cases, matching automatically falls back to the interpretive
|
||||
|
@ -166,7 +161,7 @@ about the use of JIT stacks in the section entitled
|
|||
below.
|
||||
</P>
|
||||
<P>
|
||||
The <b>pcre2_jit_stack_alloc()</b> function creates a JIT stack. Its arguments
|
||||
The <b>pcre2_jit_stack_create()</b> function creates a JIT stack. Its arguments
|
||||
are a general context (for memory allocation functions, or NULL for standard
|
||||
memory allocation), a starting size and a maximum size, and it returns a
|
||||
pointer to an opaque structure of type <b>pcre2_jit_stack</b>, or NULL if there
|
||||
|
@ -195,14 +190,14 @@ the other two options:
|
|||
on the machine stack is used.
|
||||
|
||||
(2) If <i>callback</i> is NULL and <i>data</i> is not NULL, <i>data</i> must be
|
||||
a valid JIT stack, the result of calling <b>pcre2_jit_stack_alloc()</b>.
|
||||
a valid JIT stack, the result of calling <b>pcre2_jit_stack_create()</b>.
|
||||
|
||||
(3) If <i>callback</i> is not NULL, it must point to a function that is
|
||||
called with <i>data</i> as an argument at the start of matching, in
|
||||
order to set up a JIT stack. If the return from the callback
|
||||
function is NULL, the internal 32K stack is used; otherwise the
|
||||
return value must be a valid JIT stack, the result of calling
|
||||
<b>pcre2_jit_stack_alloc()</b>.
|
||||
<b>pcre2_jit_stack_create()</b>.
|
||||
</pre>
|
||||
A callback function is obeyed whenever JIT code is about to be run; it is not
|
||||
obeyed when <b>pcre2_match()</b> is called with options that are incompatible
|
||||
|
@ -231,7 +226,7 @@ This is a suggestion for how a multithreaded program that needs to set up
|
|||
non-default JIT stacks might operate:
|
||||
<pre>
|
||||
During thread initalization
|
||||
thread_local_var = pcre2_jit_stack_alloc(...)
|
||||
thread_local_var = pcre2_jit_stack_create(...)
|
||||
|
||||
During thread exit
|
||||
pcre2_jit_stack_free(thread_local_var)
|
||||
|
@ -323,7 +318,19 @@ stack handling?
|
|||
No, thanks to Windows. If POSIX threads were used everywhere, we could throw
|
||||
out this complicated API.
|
||||
</P>
|
||||
<br><a name="SEC8" href="#TOC1">EXAMPLE CODE</a><br>
|
||||
<br><a name="SEC8" href="#TOC1">FREEING JIT SPECULATIVE MEMORY</a><br>
|
||||
<P>
|
||||
<b>void pcre2_jit_free_unused_memory(pcre2_general_context *<i>gcontext</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
The JIT executable allocator does not free all memory when it is possible.
|
||||
It expects new allocations, and keeps some free memory around to improve
|
||||
allocation speed. However, in low memory conditions, it might be better to free
|
||||
all possible memory. You can cause this to happen by calling
|
||||
pcre2_jit_free_unused_memory(). Its argument is a general context, for custom
|
||||
memory management, or NULL for standard memory management.
|
||||
</P>
|
||||
<br><a name="SEC9" href="#TOC1">EXAMPLE CODE</a><br>
|
||||
<P>
|
||||
This is a single-threaded example that specifies a JIT stack without using a
|
||||
callback.
|
||||
|
@ -338,7 +345,7 @@ callback.
|
|||
/* Check for errors */
|
||||
rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
|
||||
/* Check for errors */
|
||||
jit_stack = pcre2_jit_stack_alloc(NULL, 32*1024, 512*1024);
|
||||
jit_stack = pcre2_jit_stack_create(NULL, 32*1024, 512*1024);
|
||||
/* Check for error (NULL) */
|
||||
pcre2_jit_stack_assign(re, NULL, jit_stack);
|
||||
match_data = pcre2_match_data_create(re, 10);
|
||||
|
@ -349,7 +356,7 @@ callback.
|
|||
|
||||
</PRE>
|
||||
</P>
|
||||
<br><a name="SEC9" href="#TOC1">JIT FAST PATH API</a><br>
|
||||
<br><a name="SEC10" href="#TOC1">JIT FAST PATH API</a><br>
|
||||
<P>
|
||||
Because the API described above falls back to interpreted matching when JIT is
|
||||
not available, it is convenient for programs that are written for general use
|
||||
|
@ -364,11 +371,11 @@ processed by <b>pcre2_jit_compile()</b>).
|
|||
The fast path function is called <b>pcre2_jit_match()</b>, and it takes exactly
|
||||
the same arguments as <b>pcre2_match()</b>, plus one additional argument that
|
||||
must either point to a JIT stack or be NULL. In the latter case, if a callback
|
||||
function has been set up by <b>pcre2_jit_stack_alloc()</b>, it is called.
|
||||
function has been set up by <b>pcre2_jit_stack_assign()</b>, it is called.
|
||||
Otherwise the system stack is used. The return values are the same as for
|
||||
<b>pcre2_match()</b>, plus PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial
|
||||
or complete) is requested that was not compiled. Unsupported option bits are
|
||||
ignored.
|
||||
or complete) is requested that was not compiled. Unsupported option bits (for
|
||||
example, PCRE2_ANCHORED) are ignored.
|
||||
</P>
|
||||
<P>
|
||||
When you call <b>pcre2_match()</b>, as well as testing for invalid options, a
|
||||
|
@ -382,11 +389,11 @@ invalid data is passed, the result is undefined.
|
|||
Bypassing the sanity checks and the <b>pcre2_match()</b> wrapping can give
|
||||
speedups of more than 10%.
|
||||
</P>
|
||||
<br><a name="SEC10" href="#TOC1">SEE ALSO</a><br>
|
||||
<br><a name="SEC11" href="#TOC1">SEE ALSO</a><br>
|
||||
<P>
|
||||
<b>pcre2api</b>(3)
|
||||
</P>
|
||||
<br><a name="SEC11" href="#TOC1">AUTHOR</a><br>
|
||||
<br><a name="SEC12" href="#TOC1">AUTHOR</a><br>
|
||||
<P>
|
||||
Philip Hazel (FAQ by Zoltan Herczeg)
|
||||
<br>
|
||||
|
@ -395,9 +402,9 @@ University Computing Service
|
|||
Cambridge CB2 3QH, England.
|
||||
<br>
|
||||
</P>
|
||||
<br><a name="SEC12" href="#TOC1">REVISION</a><br>
|
||||
<br><a name="SEC13" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 21 October 2014
|
||||
Last updated: 03 November 2014
|
||||
<br>
|
||||
Copyright © 1997-2014 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -223,10 +223,12 @@ Output the PCRE2 version number and then exit.
|
|||
<br><a name="SEC5" href="#TOC1">DESCRIPTION</a><br>
|
||||
<P>
|
||||
If <b>pcre2test</b> is given two filename arguments, it reads from the first and
|
||||
writes to the second. If it is given only one filename argument, it reads from
|
||||
writes to the second. If the first name is "-", input is taken from the
|
||||
standard input. If <b>pcre2test</b> is given only one argument, it reads from
|
||||
that file and writes to stdout. Otherwise, it reads from stdin and writes to
|
||||
stdout, and prompts for each line of input, using "re>" to prompt for regular
|
||||
expression patterns, and "data>" to prompt for subject lines.
|
||||
stdout. When the input is a terminal, it prompts for each line of input, using
|
||||
"re>" to prompt for regular expression patterns, and "data>" to prompt for
|
||||
subject lines.
|
||||
</P>
|
||||
<P>
|
||||
When <b>pcre2test</b> is built, a configuration option can specify that it
|
||||
|
@ -476,6 +478,7 @@ about the pattern:
|
|||
/I info show info about compiled pattern
|
||||
hex pattern is coded in hexadecimal
|
||||
jit[=<number>] use JIT
|
||||
jitfast use JIT fast path
|
||||
jitverify verify JIT use
|
||||
locale=<name> use this locale
|
||||
memory show memory used
|
||||
|
@ -573,6 +576,13 @@ documentation. See also the <b>jitstack</b> modifier below for a way of
|
|||
setting the size of the JIT stack.
|
||||
</P>
|
||||
<P>
|
||||
If the <b>jitfast</b> modifier is specified, matching is done using the JIT
|
||||
"fast path" interface (\fBpcre2_jit_match()), which skips some of the sanity
|
||||
checks that are done by <b>pcre2_match()</b>, and of course does not work when
|
||||
JIT is not supported. If <b>jitfast</b> is specified without <b>jit</b>, jit=7 is
|
||||
assumed.
|
||||
</P>
|
||||
<P>
|
||||
If the <b>jitverify</b> modifier is specified, information about the compiled
|
||||
pattern shows whether JIT compilation was or was not successful. If
|
||||
<b>jitverify</b> is specified without <b>jit</b>, jit=7 is assumed. If JIT
|
||||
|
@ -612,6 +622,9 @@ Limiting nested parentheses
|
|||
<P>
|
||||
The <b>parens_nest_limit</b> modifier sets a limit on the depth of nested
|
||||
parentheses in a pattern. Breaching the limit causes a compilation error.
|
||||
The default for the library is set when PCRE2 is built, but <b>pcre2test</b>
|
||||
sets its own default of 220, which is required for running the standard test
|
||||
suite.
|
||||
</P>
|
||||
<br><b>
|
||||
Using the POSIX wrapper API
|
||||
|
@ -675,12 +688,13 @@ However, they may be included in a pattern's modifier list, in which case they
|
|||
are applied to every subject line that is processed with that pattern. They do
|
||||
not affect the compilation process.
|
||||
<pre>
|
||||
aftertext show text after match
|
||||
allaftertext show text after captures
|
||||
allcaptures show all captures
|
||||
allusedtext show all consulted text
|
||||
/g global global matching
|
||||
mark show mark values
|
||||
aftertext show text after match
|
||||
allaftertext show text after captures
|
||||
allcaptures show all captures
|
||||
allusedtext show all consulted text
|
||||
/g global global matching
|
||||
mark show mark values
|
||||
startchar show starting character when relevant
|
||||
</pre>
|
||||
These modifiers may not appear in a <b>#pattern</b> command. If you want them as
|
||||
defaults, set them in a <b>#subject</b> command.
|
||||
|
@ -751,6 +765,7 @@ pattern.
|
|||
offset=<n> set starting offset
|
||||
ovector=<n> set size of output vector
|
||||
recursion_limit=<n> set a recursion limit
|
||||
startchar show startchar when relevant
|
||||
</pre>
|
||||
The effects of these modifiers are described in the following sections.
|
||||
FIXME: Give more examples.
|
||||
|
@ -777,14 +792,30 @@ there is a lookbehind at the start of a match, or a lookahead at the end, or if
|
|||
of the actual match are indicated in the output by '<' or '>' characters
|
||||
underneath them. Here is an example:
|
||||
<pre>
|
||||
/(?<=pqr)abc(?=xyz)/
|
||||
123pqrabcxyz456\=allusedtext
|
||||
re> /(?<=pqr)abc(?=xyz)/
|
||||
data> 123pqrabcxyz456\=allusedtext
|
||||
0: pqrabcxyz
|
||||
<<< >>>
|
||||
</pre>
|
||||
This shows that the matched string is "abc", with the preceding and following
|
||||
strings "pqr" and "xyz" also consulted during the match.
|
||||
</P>
|
||||
<P>
|
||||
The <b>startchar</b> modifier requests that the starting character for the match
|
||||
be indicated, if it is different to the start of the matched string. The only
|
||||
time when this occurs is when \K has been processed as part of the match. In
|
||||
this situation, the output for the matched string is displayed from the
|
||||
starting character instead of from the match point, with circumflex characters
|
||||
under the earlier characters. For example:
|
||||
<pre>
|
||||
re> /abc\Kxyz/
|
||||
data> abcxyz\=startchar
|
||||
0: abcxyz
|
||||
^^^
|
||||
</pre>
|
||||
Unlike <b>allusedtext</b>, the <b>startchar</b> modifier can be used with JIT.
|
||||
However, these two modifiers are mutually exclusive.
|
||||
</P>
|
||||
<br><b>
|
||||
Showing the value of all capture groups
|
||||
</b><br>
|
||||
|
@ -870,8 +901,9 @@ Setting the JIT stack size
|
|||
<P>
|
||||
The <b>jitstack</b> modifier provides a way of setting the maximum stack size
|
||||
that is used by the just-in-time optimization code. It is ignored if JIT
|
||||
optimization is not being used. Providing a stack that is larger than the
|
||||
default 32K is necessary only for very complicated patterns.
|
||||
optimization is not being used. The value is a number of kilobytes. Providing a
|
||||
stack that is larger than the default 32K is necessary only for very
|
||||
complicated patterns.
|
||||
</P>
|
||||
<br><b>
|
||||
Setting match and recursion limits
|
||||
|
@ -939,11 +971,13 @@ appears, though of course it can also be used to set a default in a
|
|||
available for storing matching information. The default is 15.
|
||||
</P>
|
||||
<P>
|
||||
At least one pair of offsets is always created by
|
||||
<b>pcre2_match_data_create()</b>, for matching with PCRE2's native API, so a
|
||||
value of 0 is the same as 1. However a value of 0 is useful when testing the
|
||||
POSIX API because it causes <b>regexec()</b> to be called with a NULL capture
|
||||
vector.
|
||||
A value of zero is useful when testing the POSIX API because it causes
|
||||
<b>regexec()</b> to be called with a NULL capture vector. When not testing the
|
||||
POSIX API, a value of zero is used to cause
|
||||
<b>pcre2_match_data_create_from_pattern</b> to be called, in order to create a
|
||||
match block of exactly the right size for the pattern. (It is not possible to
|
||||
create a match block with a zero-length ovector; there is always one pair of
|
||||
offsets.)
|
||||
</P>
|
||||
<br><a name="SEC12" href="#TOC1">THE ALTERNATIVE MATCHING FUNCTION</a><br>
|
||||
<P>
|
||||
|
@ -1175,10 +1209,9 @@ characters.
|
|||
</P>
|
||||
<br><a name="SEC18" href="#TOC1">SEE ALSO</a><br>
|
||||
<P>
|
||||
<b>pcre2</b>(3), <b>pcre16</b>(3), <b>pcre32</b>(3), <b>pcre2api</b>(3),
|
||||
<b>pcre2callout</b>(3),
|
||||
<b>pcre2</b>(3), <b>pcre2api</b>(3), <b>pcre2callout</b>(3),
|
||||
<b>pcre2jit</b>, <b>pcre2matching</b>(3), <b>pcre2partial</b>(d),
|
||||
<b>pcre2pattern</b>(3), <b>pcre2precompile</b>(3).
|
||||
<b>pcre2pattern</b>(3).
|
||||
</P>
|
||||
<br><a name="SEC19" href="#TOC1">AUTHOR</a><br>
|
||||
<P>
|
||||
|
@ -1191,7 +1224,7 @@ Cambridge CB2 3QH, England.
|
|||
</P>
|
||||
<br><a name="SEC20" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 11 October 2014
|
||||
Last updated: 02 November 2014
|
||||
<br>
|
||||
Copyright © 1997-2014 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -140,12 +140,12 @@ in the library.
|
|||
<tr><td><a href="pcre2_jit_match.html">pcre2_jit_match</a></td>
|
||||
<td> Fast path interface to JIT matching</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_jit_stack_alloc.html">pcre2_jit_stack_alloc</a></td>
|
||||
<td> Create a stack for JIT matching</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_jit_stack_assign.html">pcre2_jit_stack_assign</a></td>
|
||||
<td> Assign stack for JIT matching</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_jit_stack_create.html">pcre2_jit_stack_create</a></td>
|
||||
<td> Create a stack for JIT matching</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_jit_stack_free.html">pcre2_jit_stack_free</a></td>
|
||||
<td> Free a JIT matching stack</td></tr>
|
||||
|
||||
|
|
347
doc/pcre2.txt
347
doc/pcre2.txt
|
@ -324,7 +324,7 @@ PCRE2 NATIVE API JIT FUNCTIONS
|
|||
|
||||
void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext);
|
||||
|
||||
pcre2_jit_stack *pcre2_jit_stack_alloc(pcre2_general_context *gcontext,
|
||||
pcre2_jit_stack *pcre2_jit_stack_create(pcre2_general_context *gcontext,
|
||||
PCRE2_SIZE startsize, PCRE2_SIZE maxsize);
|
||||
|
||||
void pcre2_jit_stack_assign(const pcre2_code *code,
|
||||
|
@ -437,7 +437,7 @@ PCRE2 API OVERVIEW
|
|||
support is not available.
|
||||
|
||||
More complicated programs might need to make use of the specialist
|
||||
functions pcre2_jit_stack_alloc(), pcre2_jit_stack_free(), and
|
||||
functions pcre2_jit_stack_create(), pcre2_jit_stack_free(), and
|
||||
pcre2_jit_stack_assign() in order to control the JIT code's memory
|
||||
usage.
|
||||
|
||||
|
@ -1303,7 +1303,7 @@ JUST-IN-TIME (JIT) COMPILATION
|
|||
|
||||
void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext);
|
||||
|
||||
pcre2_jit_stack *pcre2_jit_stack_alloc(pcre2_general_context *gcontext,
|
||||
pcre2_jit_stack *pcre2_jit_stack_create(pcre2_general_context *gcontext,
|
||||
PCRE2_SIZE startsize, PCRE2_SIZE maxsize);
|
||||
|
||||
void pcre2_jit_stack_assign(const pcre2_code *code,
|
||||
|
@ -2034,10 +2034,12 @@ HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS
|
|||
after a failed match or a partial match, as well as after a successful
|
||||
one.
|
||||
|
||||
The offset of the character at which the successful match started is
|
||||
returned by pcre2_get_startchar(). This can be different to the value
|
||||
of ovector[0] if the pattern contains the \K escape sequence. Note,
|
||||
however, that \K has no effect for a partial match.
|
||||
The code unit offset of the character at which a successful match
|
||||
started is returned by pcre2_get_startchar(). For a non-partial match,
|
||||
this can be different to the value of ovector[0] if the pattern con-
|
||||
tains the \K escape sequence. After a partial match, however, this
|
||||
value is always the same as ovector[0] because \K does not affect the
|
||||
result of a partial match.
|
||||
|
||||
Error return values from pcre2_match()
|
||||
|
||||
|
@ -2266,23 +2268,24 @@ EXTRACTING CAPTURED SUBSTRINGS BY NAME
|
|||
be unique (PCRE2_DUPNAMES was not set), you can find the number from
|
||||
the name by calling pcre2_substring_number_from_name(). The first argu-
|
||||
ment is the compiled pattern, and the second is the name. The yield of
|
||||
the function is the subpattern number, or PCRE2_ERROR_NOSUBSTRING if
|
||||
there is no subpattern of that name.
|
||||
the function is the subpattern number, PCRE2_ERROR_NOSUBSTRING if there
|
||||
is no subpattern of that name, or PCRE2_ERROR_NOUNIQUESUBSTRING if
|
||||
there is more than one subpattern of that name.
|
||||
|
||||
Given the number, you can extract the substring directly, or use one of
|
||||
the functions described in the previous section. For convenience, there
|
||||
are also "byname" functions that correspond to the "bynumber" func-
|
||||
tions, the only difference being that the second argument is a name
|
||||
instead of a number. However, if PCRE2_DUPNAMES is set and there are
|
||||
duplicate names, the behaviour may not be what you want (see the next
|
||||
are also "byname" functions that correspond to the "bynumber" func-
|
||||
tions, the only difference being that the second argument is a name
|
||||
instead of a number. However, if PCRE2_DUPNAMES is set and there are
|
||||
duplicate names, the behaviour may not be what you want (see the next
|
||||
section).
|
||||
|
||||
Warning: If the pattern uses the (?| feature to set up multiple subpat-
|
||||
terns with the same number, as described in the section on duplicate
|
||||
subpattern numbers in the pcre2pattern page, you cannot use names to
|
||||
distinguish the different subpatterns, because names are not included
|
||||
in the compiled code. The matching process uses only numbers. For this
|
||||
reason, the use of different names for subpatterns of the same number
|
||||
terns with the same number, as described in the section on duplicate
|
||||
subpattern numbers in the pcre2pattern page, you cannot use names to
|
||||
distinguish the different subpatterns, because names are not included
|
||||
in the compiled code. The matching process uses only numbers. For this
|
||||
reason, the use of different names for subpatterns of the same number
|
||||
causes an error at compile time.
|
||||
|
||||
|
||||
|
@ -2291,54 +2294,54 @@ DUPLICATE SUBPATTERN NAMES
|
|||
int pcre2_substring_nametable_scan(const pcre2_code *code,
|
||||
PCRE2_SPTR name, PCRE2_SPTR *first, PCRE2_SPTR *last);
|
||||
|
||||
When a pattern is compiled with the PCRE2_DUPNAMES option, names for
|
||||
subpatterns are not required to be unique. Duplicate names are always
|
||||
allowed for subpatterns with the same number, created by using the (?|
|
||||
feature. Indeed, if such subpatterns are named, they are required to
|
||||
When a pattern is compiled with the PCRE2_DUPNAMES option, names for
|
||||
subpatterns are not required to be unique. Duplicate names are always
|
||||
allowed for subpatterns with the same number, created by using the (?|
|
||||
feature. Indeed, if such subpatterns are named, they are required to
|
||||
use the same names.
|
||||
|
||||
Normally, patterns with duplicate names are such that in any one match,
|
||||
only one of the named subpatterns participates. An example is shown in
|
||||
only one of the named subpatterns participates. An example is shown in
|
||||
the pcre2pattern documentation.
|
||||
|
||||
When duplicates are present, pcre2_substring_copy_byname() and
|
||||
pcre2_substring_get_byname() return the first substring corresponding
|
||||
When duplicates are present, pcre2_substring_copy_byname() and
|
||||
pcre2_substring_get_byname() return the first substring corresponding
|
||||
to the given name that is set. If none are set, PCRE2_ERROR_NOSUBSTRING
|
||||
is returned. The pcre2_substring_number_from_name() function returns
|
||||
one of the numbers that are associated with the name, but it is not
|
||||
is returned. The pcre2_substring_number_from_name() function returns
|
||||
one of the numbers that are associated with the name, but it is not
|
||||
defined which it is.
|
||||
|
||||
If you want to get full details of all captured substrings for a given
|
||||
name, you must use the pcre2_substring_nametable_scan() function. The
|
||||
first argument is the compiled pattern, and the second is the name. If
|
||||
the third and fourth arguments are NULL, the function returns a group
|
||||
If you want to get full details of all captured substrings for a given
|
||||
name, you must use the pcre2_substring_nametable_scan() function. The
|
||||
first argument is the compiled pattern, and the second is the name. If
|
||||
the third and fourth arguments are NULL, the function returns a group
|
||||
number (it is not defined which). Otherwise, the third and fourth argu-
|
||||
ments must be pointers to variables that are updated by the function.
|
||||
ments must be pointers to variables that are updated by the function.
|
||||
After it has run, they point to the first and last entries in the name-
|
||||
to-number table for the given name, and the function returns the length
|
||||
of each entry. In both cases, PCRE2_ERROR_NOSUBSTRING is returned if
|
||||
of each entry. In both cases, PCRE2_ERROR_NOSUBSTRING is returned if
|
||||
there are no entries for the given name.
|
||||
|
||||
The format of the name table is described above in the section entitled
|
||||
Information about a pattern above. Given all the relevant entries for
|
||||
Information about a pattern above. Given all the relevant entries for
|
||||
the name, you can extract each of their numbers, and hence the captured
|
||||
data.
|
||||
|
||||
|
||||
FINDING ALL POSSIBLE MATCHES
|
||||
|
||||
The traditional matching function uses a similar algorithm to Perl,
|
||||
The traditional matching function uses a similar algorithm to Perl,
|
||||
which stops when it finds the first match, starting at a given point in
|
||||
the subject. If you want to find all possible matches, or the longest
|
||||
possible match at a given position, consider using the alternative
|
||||
matching function (see below) instead. If you cannot use the alterna-
|
||||
the subject. If you want to find all possible matches, or the longest
|
||||
possible match at a given position, consider using the alternative
|
||||
matching function (see below) instead. If you cannot use the alterna-
|
||||
tive function, you can kludge it up by making use of the callout facil-
|
||||
ity, which is described in the pcre2callout documentation.
|
||||
|
||||
What you have to do is to insert a callout right at the end of the pat-
|
||||
tern. When your callout function is called, extract and save the cur-
|
||||
rent matched substring. Then return 1, which forces pcre2_match() to
|
||||
backtrack and try other alternatives. Ultimately, when it runs out of
|
||||
tern. When your callout function is called, extract and save the cur-
|
||||
rent matched substring. Then return 1, which forces pcre2_match() to
|
||||
backtrack and try other alternatives. Ultimately, when it runs out of
|
||||
matches, pcre2_match() will yield PCRE2_ERROR_NOMATCH.
|
||||
|
||||
|
||||
|
@ -2350,26 +2353,26 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
|
|||
pcre2_match_context *mcontext,
|
||||
int *workspace, PCRE2_SIZE wscount);
|
||||
|
||||
The function pcre2_dfa_match() is called to match a subject string
|
||||
against a compiled pattern, using a matching algorithm that scans the
|
||||
subject string just once, and does not backtrack. This has different
|
||||
characteristics to the normal algorithm, and is not compatible with
|
||||
Perl. Some of the features of PCRE2 patterns are not supported. Never-
|
||||
theless, there are times when this kind of matching can be useful. For
|
||||
a discussion of the two matching algorithms, and a list of features
|
||||
The function pcre2_dfa_match() is called to match a subject string
|
||||
against a compiled pattern, using a matching algorithm that scans the
|
||||
subject string just once, and does not backtrack. This has different
|
||||
characteristics to the normal algorithm, and is not compatible with
|
||||
Perl. Some of the features of PCRE2 patterns are not supported. Never-
|
||||
theless, there are times when this kind of matching can be useful. For
|
||||
a discussion of the two matching algorithms, and a list of features
|
||||
that pcre2_dfa_match() does not support, see the pcre2matching documen-
|
||||
tation.
|
||||
|
||||
The arguments for the pcre2_dfa_match() function are the same as for
|
||||
The arguments for the pcre2_dfa_match() function are the same as for
|
||||
pcre2_match(), plus two extras. The ovector within the match data block
|
||||
is used in a different way, and this is described below. The other com-
|
||||
mon arguments are used in the same way as for pcre2_match(), so their
|
||||
mon arguments are used in the same way as for pcre2_match(), so their
|
||||
description is not repeated here.
|
||||
|
||||
The two additional arguments provide workspace for the function. The
|
||||
workspace vector should contain at least 20 elements. It is used for
|
||||
The two additional arguments provide workspace for the function. The
|
||||
workspace vector should contain at least 20 elements. It is used for
|
||||
keeping track of multiple paths through the pattern tree. More
|
||||
workspace is needed for patterns and subjects where there are a lot of
|
||||
workspace is needed for patterns and subjects where there are a lot of
|
||||
potential matches.
|
||||
|
||||
Here is an example of a simple call to pcre2_dfa_match():
|
||||
|
@ -2389,45 +2392,45 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
|
|||
|
||||
Option bits for pcre_dfa_match()
|
||||
|
||||
The unused bits of the options argument for pcre2_dfa_match() must be
|
||||
zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_NOTBOL,
|
||||
The unused bits of the options argument for pcre2_dfa_match() must be
|
||||
zero. The only bits that may be set are PCRE2_ANCHORED, PCRE2_NOTBOL,
|
||||
PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
|
||||
PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, PCRE2_PARTIAL_SOFT,
|
||||
PCRE2_DFA_SHORTEST, and PCRE2_DFA_RESTART. All but the last four of
|
||||
these are exactly the same as for pcre2_match(), so their description
|
||||
PCRE2_DFA_SHORTEST, and PCRE2_DFA_RESTART. All but the last four of
|
||||
these are exactly the same as for pcre2_match(), so their description
|
||||
is not repeated here.
|
||||
|
||||
PCRE2_PARTIAL_HARD
|
||||
PCRE2_PARTIAL_SOFT
|
||||
|
||||
These have the same general effect as they do for pcre2_match(), but
|
||||
the details are slightly different. When PCRE2_PARTIAL_HARD is set for
|
||||
pcre2_dfa_match(), it returns PCRE2_ERROR_PARTIAL if the end of the
|
||||
These have the same general effect as they do for pcre2_match(), but
|
||||
the details are slightly different. When PCRE2_PARTIAL_HARD is set for
|
||||
pcre2_dfa_match(), it returns PCRE2_ERROR_PARTIAL if the end of the
|
||||
subject is reached and there is still at least one matching possibility
|
||||
that requires additional characters. This happens even if some complete
|
||||
matches have already been found. When PCRE2_PARTIAL_SOFT is set, the
|
||||
return code PCRE2_ERROR_NOMATCH is converted into PCRE2_ERROR_PARTIAL
|
||||
if the end of the subject is reached, there have been no complete
|
||||
matches have already been found. When PCRE2_PARTIAL_SOFT is set, the
|
||||
return code PCRE2_ERROR_NOMATCH is converted into PCRE2_ERROR_PARTIAL
|
||||
if the end of the subject is reached, there have been no complete
|
||||
matches, but there is still at least one matching possibility. The por-
|
||||
tion of the string that was inspected when the longest partial match
|
||||
tion of the string that was inspected when the longest partial match
|
||||
was found is set as the first matching string in both cases. There is a
|
||||
more detailed discussion of partial and multi-segment matching, with
|
||||
more detailed discussion of partial and multi-segment matching, with
|
||||
examples, in the pcre2partial documentation.
|
||||
|
||||
PCRE2_DFA_SHORTEST
|
||||
|
||||
Setting the PCRE2_DFA_SHORTEST option causes the matching algorithm to
|
||||
Setting the PCRE2_DFA_SHORTEST option causes the matching algorithm to
|
||||
stop as soon as it has found one match. Because of the way the alterna-
|
||||
tive algorithm works, this is necessarily the shortest possible match
|
||||
tive algorithm works, this is necessarily the shortest possible match
|
||||
at the first possible matching point in the subject string.
|
||||
|
||||
PCRE2_DFA_RESTART
|
||||
|
||||
When pcre2_dfa_match() returns a partial match, it is possible to call
|
||||
When pcre2_dfa_match() returns a partial match, it is possible to call
|
||||
it again, with additional subject characters, and have it continue with
|
||||
the same match. The PCRE2_DFA_RESTART option requests this action; when
|
||||
it is set, the workspace and wscount options must reference the same
|
||||
vector as before because data about the match so far is left in them
|
||||
it is set, the workspace and wscount options must reference the same
|
||||
vector as before because data about the match so far is left in them
|
||||
after a partial match. There is more discussion of this facility in the
|
||||
pcre2partial documentation.
|
||||
|
||||
|
@ -2435,8 +2438,8 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
|
|||
|
||||
When pcre2_dfa_match() succeeds, it may have matched more than one sub-
|
||||
string in the subject. Note, however, that all the matches from one run
|
||||
of the function start at the same point in the subject. The shorter
|
||||
matches are all initial substrings of the longer matches. For example,
|
||||
of the function start at the same point in the subject. The shorter
|
||||
matches are all initial substrings of the longer matches. For example,
|
||||
if the pattern
|
||||
|
||||
<.*>
|
||||
|
@ -2451,66 +2454,66 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
|
|||
<something> <something else>
|
||||
<something> <something else> <something further>
|
||||
|
||||
On success, the yield of the function is a number greater than zero,
|
||||
which is the number of matched substrings. The offsets of the sub-
|
||||
strings are returned in the ovector, and can be extracted in the same
|
||||
way as for pcre2_match(). They are returned in reverse order of
|
||||
length; that is, the longest matching string is given first. If there
|
||||
were too many matches to fit into the ovector, the yield of the func-
|
||||
On success, the yield of the function is a number greater than zero,
|
||||
which is the number of matched substrings. The offsets of the sub-
|
||||
strings are returned in the ovector, and can be extracted in the same
|
||||
way as for pcre2_match(). They are returned in reverse order of
|
||||
length; that is, the longest matching string is given first. If there
|
||||
were too many matches to fit into the ovector, the yield of the func-
|
||||
tion is zero, and the vector is filled with the longest matches.
|
||||
|
||||
NOTE: PCRE2's "auto-possessification" optimization usually applies to
|
||||
character repeats at the end of a pattern (as well as internally). For
|
||||
example, the pattern "a\d+" is compiled as if it were "a\d++" because
|
||||
there is no point in backtracking into the repeated digits. For DFA
|
||||
matching, this means that only one possible match is found. If you
|
||||
really do want multiple matches in such cases, either use an ungreedy
|
||||
repeat ("a\d+?") or set the PCRE2_NO_AUTO_POSSESS option when compil-
|
||||
NOTE: PCRE2's "auto-possessification" optimization usually applies to
|
||||
character repeats at the end of a pattern (as well as internally). For
|
||||
example, the pattern "a\d+" is compiled as if it were "a\d++" because
|
||||
there is no point in backtracking into the repeated digits. For DFA
|
||||
matching, this means that only one possible match is found. If you
|
||||
really do want multiple matches in such cases, either use an ungreedy
|
||||
repeat ("a\d+?") or set the PCRE2_NO_AUTO_POSSESS option when compil-
|
||||
ing.
|
||||
|
||||
Error returns from pcre2_dfa_match()
|
||||
|
||||
The pcre2_dfa_match() function returns a negative number when it fails.
|
||||
Many of the errors are the same as for pcre2_match(), as described
|
||||
Many of the errors are the same as for pcre2_match(), as described
|
||||
above. There are in addition the following errors that are specific to
|
||||
pcre2_dfa_match():
|
||||
|
||||
PCRE2_ERROR_DFA_UITEM
|
||||
|
||||
This return is given if pcre2_dfa_match() encounters an item in the
|
||||
This return is given if pcre2_dfa_match() encounters an item in the
|
||||
pattern that it does not support, for instance, the use of \C or a back
|
||||
reference.
|
||||
|
||||
PCRE2_ERROR_DFA_UCOND
|
||||
|
||||
This return is given if pcre2_dfa_match() encounters a condition item
|
||||
that uses a back reference for the condition, or a test for recursion
|
||||
This return is given if pcre2_dfa_match() encounters a condition item
|
||||
that uses a back reference for the condition, or a test for recursion
|
||||
in a specific group. These are not supported.
|
||||
|
||||
PCRE2_ERROR_DFA_WSSIZE
|
||||
|
||||
This return is given if pcre2_dfa_match() runs out of space in the
|
||||
This return is given if pcre2_dfa_match() runs out of space in the
|
||||
workspace vector.
|
||||
|
||||
PCRE2_ERROR_DFA_RECURSE
|
||||
|
||||
When a recursive subpattern is processed, the matching function calls
|
||||
When a recursive subpattern is processed, the matching function calls
|
||||
itself recursively, using private memory for the ovector and workspace.
|
||||
This error is given if the internal ovector is not large enough. This
|
||||
This error is given if the internal ovector is not large enough. This
|
||||
should be extremely rare, as a vector of size 1000 is used.
|
||||
|
||||
PCRE2_ERROR_DFA_BADRESTART
|
||||
|
||||
When pcre2_dfa_match() is called with the pcre2_dfa_RESTART option,
|
||||
some plausibility checks are made on the contents of the workspace,
|
||||
which should contain data about the previous partial match. If any of
|
||||
When pcre2_dfa_match() is called with the pcre2_dfa_RESTART option,
|
||||
some plausibility checks are made on the contents of the workspace,
|
||||
which should contain data about the previous partial match. If any of
|
||||
these checks fail, this error is given.
|
||||
|
||||
|
||||
SEE ALSO
|
||||
|
||||
pcre2build(3), pcre2libs(3), pcre2callout(3), pcre2matching(3),
|
||||
pcre2partial(3), pcre2posix(3), pcre2demo(3), pcre2sample(3),
|
||||
pcre2build(3), pcre2libs(3), pcre2callout(3), pcre2matching(3),
|
||||
pcre2partial(3), pcre2posix(3), pcre2demo(3), pcre2sample(3),
|
||||
pcre2stack(3).
|
||||
|
||||
|
||||
|
@ -2523,7 +2526,7 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 16 October 2014
|
||||
Last updated: 03 November 2014
|
||||
Copyright (c) 1997-2014 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
@ -3411,8 +3414,6 @@ NAME
|
|||
|
||||
PCRE2 JUST-IN-TIME COMPILER SUPPORT
|
||||
|
||||
FIXME: This needs checking over once JIT support is implemented.
|
||||
|
||||
Just-in-time compiling is a heavyweight optimization that can greatly
|
||||
speed up pattern matching. However, it comes at the cost of extra pro-
|
||||
cessing before the match is performed. Therefore, it is of most benefit
|
||||
|
@ -3462,100 +3463,97 @@ SIMPLE USE OF JIT
|
|||
second is a set of option bits, which must include at least one of
|
||||
PCRE2_JIT_COMPLETE, PCRE2_JIT_PARTIAL_HARD, or PCRE2_JIT_PARTIAL_SOFT.
|
||||
|
||||
The returned value from pcre2_jit_compile() is zero on success, or a
|
||||
negative error code. In particular, PCRE2_ERROR_JIT_BADOPTION is
|
||||
returned if JIT is not supported or if an unknown options bit is set.
|
||||
If JIT support is not available, a call to pcre2_jit_comple() does
|
||||
nothing and returns PCRE2_ERROR_JIT_BADOPTION. Otherwise, the compiled
|
||||
pattern is passed to the JIT compiler, which turns it into machine code
|
||||
that executes much faster than the normal interpretive code, but yields
|
||||
exactly the same results. The returned value from pcre2_jit_compile()
|
||||
is zero on success, or a negative error code.
|
||||
|
||||
PCRE2_JIT_COMPLETE requests the JIT compiler to generate code for com-
|
||||
plete matches. If you want to run partial matches using the PCRE2_PAR-
|
||||
TIAL_HARD or PCRE2_PARTIAL_SOFT options of pcre2_match(), you should
|
||||
set one or both of the other options as well as, or instead of
|
||||
PCRE2_JIT_COMPLETE requests the JIT compiler to generate code for com-
|
||||
plete matches. If you want to run partial matches using the PCRE2_PAR-
|
||||
TIAL_HARD or PCRE2_PARTIAL_SOFT options of pcre2_match(), you should
|
||||
set one or both of the other options as well as, or instead of
|
||||
PCRE2_JIT_COMPLETE. The JIT compiler generates different optimized code
|
||||
for each of the three modes (normal, soft partial, hard partial). When
|
||||
pcre2_match() is called, the appropriate code is run if it is avail-
|
||||
for each of the three modes (normal, soft partial, hard partial). When
|
||||
pcre2_match() is called, the appropriate code is run if it is avail-
|
||||
able. Otherwise, the pattern is matched using interpretive code.
|
||||
|
||||
In some circumstances you may need to call additional functions. These
|
||||
are described in the section entitled "Controlling the JIT stack"
|
||||
In some circumstances you may need to call additional functions. These
|
||||
are described in the section entitled "Controlling the JIT stack"
|
||||
below.
|
||||
|
||||
If JIT support is not available, a call to pcre2_jit_comple() does
|
||||
nothing and returns FIXME. Otherwise, the compiled pattern is passed to
|
||||
the JIT compiler, which turns it into machine code that executes much
|
||||
faster than the normal interpretive code, but yields exactly the same
|
||||
results.
|
||||
|
||||
There are some pcre2_match() options that are not supported by JIT, and
|
||||
there are also some pattern items that JIT cannot handle. Details are
|
||||
given below. In both cases, matching automatically falls back to the
|
||||
interpretive code. If you want to know whether JIT was actually used
|
||||
for a particular match, you should arrange for a JIT callback function
|
||||
to be set up as described in the section entitled "Controlling the JIT
|
||||
stack" below, even if you do not need to supply a non-default JIT
|
||||
there are also some pattern items that JIT cannot handle. Details are
|
||||
given below. In both cases, matching automatically falls back to the
|
||||
interpretive code. If you want to know whether JIT was actually used
|
||||
for a particular match, you should arrange for a JIT callback function
|
||||
to be set up as described in the section entitled "Controlling the JIT
|
||||
stack" below, even if you do not need to supply a non-default JIT
|
||||
stack. Such a callback function is called whenever JIT code is about to
|
||||
be obeyed. If the match-time options are not right for JIT execution,
|
||||
be obeyed. If the match-time options are not right for JIT execution,
|
||||
the callback function is not obeyed.
|
||||
|
||||
If the JIT compiler finds an unsupported item, no JIT data is gener-
|
||||
ated. You can find out if JIT matching is available after compiling a
|
||||
If the JIT compiler finds an unsupported item, no JIT data is gener-
|
||||
ated. You can find out if JIT matching is available after compiling a
|
||||
pattern by calling pcre2_pattern_info() with the PCRE2_INFO_JIT option.
|
||||
A result of 1 means that JIT compilation was successful. A result of 0
|
||||
means that JIT support is not available, or the pattern was not pro-
|
||||
A result of 1 means that JIT compilation was successful. A result of 0
|
||||
means that JIT support is not available, or the pattern was not pro-
|
||||
cessed by pcre2_jit_compile(), or the JIT compiler was not able to han-
|
||||
dle the pattern.
|
||||
|
||||
|
||||
UNSUPPORTED OPTIONS AND PATTERN ITEMS
|
||||
|
||||
The pcre2_match() options that are supported for JIT matching are
|
||||
PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
|
||||
The pcre2_match() options that are supported for JIT matching are
|
||||
PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART,
|
||||
PCRE2_NO_UTF_CHECK, PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. The
|
||||
PCRE2_ANCHORED option is not supported at match time.
|
||||
|
||||
The only unsupported pattern items are \C (match a single data unit)
|
||||
when running in a UTF mode, and a callout immediately before an asser-
|
||||
The only unsupported pattern items are \C (match a single data unit)
|
||||
when running in a UTF mode, and a callout immediately before an asser-
|
||||
tion condition in a conditional group.
|
||||
|
||||
|
||||
RETURN VALUES FROM JIT MATCHING
|
||||
|
||||
When a pattern is matched using JIT matching, the return values are the
|
||||
same as those given by the interpretive pcre2_match() code, with the
|
||||
addition of one new error code: PCRE2_ERROR_JIT_STACKLIMIT. This means
|
||||
that the memory used for the JIT stack was insufficient. See "Control-
|
||||
same as those given by the interpretive pcre2_match() code, with the
|
||||
addition of one new error code: PCRE2_ERROR_JIT_STACKLIMIT. This means
|
||||
that the memory used for the JIT stack was insufficient. See "Control-
|
||||
ling the JIT stack" below for a discussion of JIT stack usage.
|
||||
|
||||
The error code PCRE2_ERROR_MATCHLIMIT is returned by the JIT code if
|
||||
searching a very large pattern tree goes on for too long, as it is in
|
||||
the same circumstance when JIT is not used, but the details of exactly
|
||||
what is counted are not the same. The PCRE2_ERROR_RECURSIONLIMIT error
|
||||
The error code PCRE2_ERROR_MATCHLIMIT is returned by the JIT code if
|
||||
searching a very large pattern tree goes on for too long, as it is in
|
||||
the same circumstance when JIT is not used, but the details of exactly
|
||||
what is counted are not the same. The PCRE2_ERROR_RECURSIONLIMIT error
|
||||
code is never returned when JIT matching is used.
|
||||
|
||||
|
||||
CONTROLLING THE JIT STACK
|
||||
|
||||
When the compiled JIT code runs, it needs a block of memory to use as a
|
||||
stack. By default, it uses 32K on the machine stack. However, some
|
||||
large or complicated patterns need more than this. The error
|
||||
PCRE2_ERROR_JIT_STACKLIMIT is given when there is not enough stack.
|
||||
Three functions are provided for managing blocks of memory for use as
|
||||
JIT stacks. There is further discussion about the use of JIT stacks in
|
||||
stack. By default, it uses 32K on the machine stack. However, some
|
||||
large or complicated patterns need more than this. The error
|
||||
PCRE2_ERROR_JIT_STACKLIMIT is given when there is not enough stack.
|
||||
Three functions are provided for managing blocks of memory for use as
|
||||
JIT stacks. There is further discussion about the use of JIT stacks in
|
||||
the section entitled "JIT stack FAQ" below.
|
||||
|
||||
The pcre2_jit_stack_alloc() function creates a JIT stack. Its arguments
|
||||
are a general context (for memory allocation functions, or NULL for
|
||||
standard memory allocation), a starting size and a maximum size, and it
|
||||
returns a pointer to an opaque structure of type pcre2_jit_stack, or
|
||||
NULL if there is an error. The pcre2_jit_stack_free() function is used
|
||||
to free a stack that is no longer needed. (For the technically minded:
|
||||
the address space is allocated by mmap or VirtualAlloc.) FIXME Is this
|
||||
right?
|
||||
The pcre2_jit_stack_create() function creates a JIT stack. Its argu-
|
||||
ments are a general context (for memory allocation functions, or NULL
|
||||
for standard memory allocation), a starting size and a maximum size,
|
||||
and it returns a pointer to an opaque structure of type
|
||||
pcre2_jit_stack, or NULL if there is an error. The
|
||||
pcre2_jit_stack_free() function is used to free a stack that is no
|
||||
longer needed. (For the technically minded: the address space is allo-
|
||||
cated by mmap or VirtualAlloc.) FIXME Is this right?
|
||||
|
||||
JIT uses far less memory for recursion than the interpretive code, and
|
||||
a maximum stack size of 512K to 1M should be more than enough for any
|
||||
JIT uses far less memory for recursion than the interpretive code, and
|
||||
a maximum stack size of 512K to 1M should be more than enough for any
|
||||
pattern.
|
||||
|
||||
The pcre2_jit_stack_assign() function specifies which stack JIT code
|
||||
The pcre2_jit_stack_assign() function specifies which stack JIT code
|
||||
should use. Its arguments are as follows:
|
||||
|
||||
pcre2_code *code
|
||||
|
@ -3563,21 +3561,22 @@ CONTROLLING THE JIT STACK
|
|||
void *data
|
||||
|
||||
The code argument is a pointer to a compiled pattern, after it has been
|
||||
processed by pcre2_jit_compile(). There are three cases for the values
|
||||
processed by pcre2_jit_compile(). There are three cases for the values
|
||||
of the other two options:
|
||||
|
||||
(1) If callback is NULL and data is NULL, an internal 32K block
|
||||
on the machine stack is used.
|
||||
|
||||
(2) If callback is NULL and data is not NULL, data must be
|
||||
a valid JIT stack, the result of calling pcre2_jit_stack_alloc().
|
||||
a valid JIT stack, the result of calling pcre2_jit_stack_cre-
|
||||
ate().
|
||||
|
||||
(3) If callback is not NULL, it must point to a function that is
|
||||
called with data as an argument at the start of matching, in
|
||||
order to set up a JIT stack. If the return from the callback
|
||||
function is NULL, the internal 32K stack is used; otherwise the
|
||||
return value must be a valid JIT stack, the result of calling
|
||||
pcre2_jit_stack_alloc().
|
||||
pcre2_jit_stack_create().
|
||||
|
||||
A callback function is obeyed whenever JIT code is about to be run; it
|
||||
is not obeyed when pcre2_match() is called with options that are incom-
|
||||
|
@ -3605,7 +3604,7 @@ CONTROLLING THE JIT STACK
|
|||
up non-default JIT stacks might operate:
|
||||
|
||||
During thread initalization
|
||||
thread_local_var = pcre2_jit_stack_alloc(...)
|
||||
thread_local_var = pcre2_jit_stack_create(...)
|
||||
|
||||
During thread exit
|
||||
pcre2_jit_stack_free(thread_local_var)
|
||||
|
@ -3687,6 +3686,19 @@ JIT STACK FAQ
|
|||
throw out this complicated API.
|
||||
|
||||
|
||||
FREEING JIT SPECULATIVE MEMORY
|
||||
|
||||
void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext);
|
||||
|
||||
The JIT executable allocator does not free all memory when it is possi-
|
||||
ble. It expects new allocations, and keeps some free memory around to
|
||||
improve allocation speed. However, in low memory conditions, it might
|
||||
be better to free all possible memory. You can cause this to happen by
|
||||
calling pcre2_jit_free_unused_memory(). Its argument is a general con-
|
||||
text, for custom memory management, or NULL for standard memory manage-
|
||||
ment.
|
||||
|
||||
|
||||
EXAMPLE CODE
|
||||
|
||||
This is a single-threaded example that specifies a JIT stack without
|
||||
|
@ -3702,7 +3714,7 @@ EXAMPLE CODE
|
|||
/* Check for errors */
|
||||
rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
|
||||
/* Check for errors */
|
||||
jit_stack = pcre2_jit_stack_alloc(NULL, 32*1024, 512*1024);
|
||||
jit_stack = pcre2_jit_stack_create(NULL, 32*1024, 512*1024);
|
||||
/* Check for error (NULL) */
|
||||
pcre2_jit_stack_assign(re, NULL, jit_stack);
|
||||
match_data = pcre2_match_data_create(re, 10);
|
||||
|
@ -3727,19 +3739,20 @@ JIT FAST PATH API
|
|||
exactly the same arguments as pcre2_match(), plus one additional argu-
|
||||
ment that must either point to a JIT stack or be NULL. In the latter
|
||||
case, if a callback function has been set up by
|
||||
pcre2_jit_stack_alloc(), it is called. Otherwise the system stack is
|
||||
pcre2_jit_stack_assign(), it is called. Otherwise the system stack is
|
||||
used. The return values are the same as for pcre2_match(), plus
|
||||
PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial or complete) is
|
||||
requested that was not compiled. Unsupported option bits are ignored.
|
||||
requested that was not compiled. Unsupported option bits (for example,
|
||||
PCRE2_ANCHORED) are ignored.
|
||||
|
||||
When you call pcre2_match(), as well as testing for invalid options, a
|
||||
When you call pcre2_match(), as well as testing for invalid options, a
|
||||
number of other sanity checks are performed on the arguments. For exam-
|
||||
ple, if the subject pointer is NULL, an immediate error is given. Also,
|
||||
unless PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested for
|
||||
validity. In the interests of speed, these checks do not happen on the
|
||||
unless PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested for
|
||||
validity. In the interests of speed, these checks do not happen on the
|
||||
JIT fast path, and if invalid data is passed, the result is undefined.
|
||||
|
||||
Bypassing the sanity checks and the pcre2_match() wrapping can give
|
||||
Bypassing the sanity checks and the pcre2_match() wrapping can give
|
||||
speedups of more than 10%.
|
||||
|
||||
|
||||
|
@ -3757,7 +3770,7 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 21 October 2014
|
||||
Last updated: 03 November 2014
|
||||
Copyright (c) 1997-2014 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
|
|
@ -15,9 +15,9 @@ PCRE2 - Perl-compatible regular expressions (revised API)
|
|||
.sp
|
||||
This function frees unused JIT executable memory. The argument is a general
|
||||
context, for custom memory management, or NULL for standard memory management.
|
||||
JIT memory allocation retains some memory in order to improve future JIT
|
||||
compilation speed. In low memory conditions,
|
||||
\fBpcre2_jit_free_unused_memory()\fB can be used to cause this memory to be
|
||||
JIT memory allocation retains some memory in order to improve future JIT
|
||||
compilation speed. In low memory conditions,
|
||||
\fBpcre2_jit_free_unused_memory()\fB can be used to cause this memory to be
|
||||
freed.
|
||||
.P
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2_JIT_MATCH 3 "21 October 2014" "PCRE2 10.0"
|
||||
.TH PCRE2_JIT_MATCH 3 "03 November 2014" "PCRE2 10.0"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
@ -26,7 +26,7 @@ Its arguments are exactly the same as for
|
|||
.\"
|
||||
plus one additional argument that must either point to a JIT stack or be NULL.
|
||||
In the latter case, if a callback function has been set up by
|
||||
\fBpcre2_jit_stack_alloc()\fP, it is called. Otherwise the system stack is
|
||||
\fBpcre2_jit_stack_create()\fP, it is called. Otherwise the system stack is
|
||||
used.
|
||||
.P
|
||||
The supported options are PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY,
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2_JIT_STACK_ASSIGN 3 "21 October 2014" "PCRE2 10.0"
|
||||
.TH PCRE2_JIT_STACK_ASSIGN 3 "03 November 2014" "PCRE2 10.0"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
@ -27,12 +27,12 @@ block on the machine stack is used.
|
|||
.P
|
||||
If \fIcallback\fP is NULL and \fIcallback_data\fP is not NULL,
|
||||
\fIcallback_data\fP must be a valid JIT stack, the result of calling
|
||||
\fBpcre2_jit_stack_alloc()\fP.
|
||||
\fBpcre2_jit_stack_create()\fP.
|
||||
.P
|
||||
If \fIcallback\fP not NULL, it is called with \fIcallback_data\fP as an
|
||||
argument at the start of matching, in order to set up a JIT stack. If the
|
||||
result is NULL, the internal 32K stack is used; otherwise the return value must
|
||||
be a valid JIT stack, the result of calling \fBpcre2_jit_stack_alloc()\fP.
|
||||
be a valid JIT stack, the result of calling \fBpcre2_jit_stack_create()\fP.
|
||||
.P
|
||||
You may safely assign the same JIT stack to multiple patterns, as long as they
|
||||
are all matched in the same thread. In a multithread application, each thread
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2_JIT_STACK_ALLOC 3 "21 October 2014" "PCRE2 10.00"
|
||||
.TH PCRE2_JIT_STACK_CREATE 3 "03 November 2014" "PCRE2 10.00"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
@ -7,7 +7,7 @@ PCRE2 - Perl-compatible regular expressions (revised API)
|
|||
.B #include <pcre2.h>
|
||||
.PP
|
||||
.nf
|
||||
.B pcre2_jit_stack *pcre2_jit_stack_alloc(pcre2_general_context *\fIgcontext\fP,
|
||||
.B pcre2_jit_stack *pcre2_jit_stack_create(pcre2_general_context *\fIgcontext\fP,
|
||||
.B " PCRE2_SIZE \fIstartsize\fP, PCRE2_SIZE \fImaxsize\fP);"
|
||||
.fi
|
||||
.
|
|
@ -13,7 +13,7 @@ PCRE2 - Perl-compatible regular expressions (revised API)
|
|||
.rs
|
||||
.sp
|
||||
This function is used to free a JIT stack that was created by
|
||||
\fBpcre2_jit_stack_alloc()\fP when it is no longer needed. For more details,
|
||||
\fBpcre2_jit_stack_create()\fP when it is no longer needed. For more details,
|
||||
see the
|
||||
.\" HREF
|
||||
\fBpcre2jit\fP
|
||||
|
|
|
@ -15,7 +15,7 @@ PCRE2 - Perl-compatible regular expressions (revised API)
|
|||
.rs
|
||||
.sp
|
||||
This convenience function finds the number of a named substring capturing
|
||||
parenthesis in a compiled pattern, provided that it is a unique name. The
|
||||
parenthesis in a compiled pattern, provided that it is a unique name. The
|
||||
function arguments are:
|
||||
.sp
|
||||
\fIcode\fP Compiled regular expression
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2API 3 "29 October 2014" "PCRE2 10.00"
|
||||
.TH PCRE2API 3 "03 November 2014" "PCRE2 10.00"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.sp
|
||||
|
@ -182,7 +182,7 @@ document for an overview of all the PCRE2 documentation.
|
|||
.sp
|
||||
.B void pcre2_jit_free_unused_memory(pcre2_general_context *\fIgcontext\fP);
|
||||
.sp
|
||||
.B pcre2_jit_stack *pcre2_jit_stack_alloc(pcre2_general_context *\fIgcontext\fP,
|
||||
.B pcre2_jit_stack *pcre2_jit_stack_create(pcre2_general_context *\fIgcontext\fP,
|
||||
.B " PCRE2_SIZE \fIstartsize\fP, PCRE2_SIZE \fImaxsize\fP);"
|
||||
.sp
|
||||
.B void pcre2_jit_stack_assign(const pcre2_code *\fIcode\fP,
|
||||
|
@ -308,7 +308,7 @@ successfully compiled by \fBpcre2_compile()\fP. This does nothing if JIT
|
|||
support is not available.
|
||||
.P
|
||||
More complicated programs might need to make use of the specialist functions
|
||||
\fBpcre2_jit_stack_alloc()\fP, \fBpcre2_jit_stack_free()\fP, and
|
||||
\fBpcre2_jit_stack_create()\fP, \fBpcre2_jit_stack_free()\fP, and
|
||||
\fBpcre2_jit_stack_assign()\fP in order to control the JIT code's memory usage.
|
||||
.P
|
||||
JIT matching is automatically used by \fBpcre2_match()\fP if it is available.
|
||||
|
@ -1265,7 +1265,7 @@ textual error message from any error code.
|
|||
.sp
|
||||
.B void pcre2_jit_free_unused_memory(pcre2_general_context *\fIgcontext\fP);
|
||||
.sp
|
||||
.B pcre2_jit_stack *pcre2_jit_stack_alloc(pcre2_general_context *\fIgcontext\fP,
|
||||
.B pcre2_jit_stack *pcre2_jit_stack_create(pcre2_general_context *\fIgcontext\fP,
|
||||
.B " PCRE2_SIZE \fIstartsize\fP, PCRE2_SIZE \fImaxsize\fP);"
|
||||
.sp
|
||||
.B void pcre2_jit_stack_assign(const pcre2_code *\fIcode\fP,
|
||||
|
@ -2072,7 +2072,7 @@ match or a partial match, as well as after a successful one.
|
|||
The code unit offset of the character at which a successful match started is
|
||||
returned by \fBpcre2_get_startchar()\fP. For a non-partial match, this can be
|
||||
different to the value of \fIovector[0]\fP if the pattern contains the \eK
|
||||
escape sequence. After a partial match, however, this value is always the same
|
||||
escape sequence. After a partial match, however, this value is always the same
|
||||
as \fIovector[0]\fP because \eK does not affect the result of a partial match.
|
||||
.
|
||||
.
|
||||
|
@ -2333,7 +2333,7 @@ unique (PCRE2_DUPNAMES was not set), you can find the number from the name by
|
|||
calling \fBpcre2_substring_number_from_name()\fP. The first argument is the
|
||||
compiled pattern, and the second is the name. The yield of the function is the
|
||||
subpattern number, PCRE2_ERROR_NOSUBSTRING if there is no subpattern of that
|
||||
name, or PCRE2_ERROR_NOUNIQUESUBSTRING if there is more than one subpattern of
|
||||
name, or PCRE2_ERROR_NOUNIQUESUBSTRING if there is more than one subpattern of
|
||||
that name.
|
||||
.P
|
||||
Given the number, you can extract the substring directly, or use one of the
|
||||
|
@ -2631,6 +2631,6 @@ Cambridge CB2 3QH, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 29 October 2014
|
||||
Last updated: 03 November 2014
|
||||
Copyright (c) 1997-2014 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2JIT 3 "02 November 2014" "PCRE2 10.00"
|
||||
.TH PCRE2JIT 3 "03 November 2014" "PCRE2 10.00"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH "PCRE2 JUST-IN-TIME COMPILER SUPPORT"
|
||||
|
@ -149,7 +149,7 @@ about the use of JIT stacks in the section entitled
|
|||
.\"
|
||||
below.
|
||||
.P
|
||||
The \fBpcre2_jit_stack_alloc()\fP function creates a JIT stack. Its arguments
|
||||
The \fBpcre2_jit_stack_create()\fP function creates a JIT stack. Its arguments
|
||||
are a general context (for memory allocation functions, or NULL for standard
|
||||
memory allocation), a starting size and a maximum size, and it returns a
|
||||
pointer to an opaque structure of type \fBpcre2_jit_stack\fP, or NULL if there
|
||||
|
@ -176,14 +176,14 @@ the other two options:
|
|||
on the machine stack is used.
|
||||
.sp
|
||||
(2) If \fIcallback\fP is NULL and \fIdata\fP is not NULL, \fIdata\fP must be
|
||||
a valid JIT stack, the result of calling \fBpcre2_jit_stack_alloc()\fP.
|
||||
a valid JIT stack, the result of calling \fBpcre2_jit_stack_create()\fP.
|
||||
.sp
|
||||
(3) If \fIcallback\fP is not NULL, it must point to a function that is
|
||||
called with \fIdata\fP as an argument at the start of matching, in
|
||||
order to set up a JIT stack. If the return from the callback
|
||||
function is NULL, the internal 32K stack is used; otherwise the
|
||||
return value must be a valid JIT stack, the result of calling
|
||||
\fBpcre2_jit_stack_alloc()\fP.
|
||||
\fBpcre2_jit_stack_create()\fP.
|
||||
.sp
|
||||
A callback function is obeyed whenever JIT code is about to be run; it is not
|
||||
obeyed when \fBpcre2_match()\fP is called with options that are incompatible
|
||||
|
@ -209,7 +209,7 @@ This is a suggestion for how a multithreaded program that needs to set up
|
|||
non-default JIT stacks might operate:
|
||||
.sp
|
||||
During thread initalization
|
||||
thread_local_var = pcre2_jit_stack_alloc(...)
|
||||
thread_local_var = pcre2_jit_stack_create(...)
|
||||
.sp
|
||||
During thread exit
|
||||
pcre2_jit_stack_free(thread_local_var)
|
||||
|
@ -323,7 +323,7 @@ callback.
|
|||
/* Check for errors */
|
||||
rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
|
||||
/* Check for errors */
|
||||
jit_stack = pcre2_jit_stack_alloc(NULL, 32*1024, 512*1024);
|
||||
jit_stack = pcre2_jit_stack_create(NULL, 32*1024, 512*1024);
|
||||
/* Check for error (NULL) */
|
||||
pcre2_jit_stack_assign(re, NULL, jit_stack);
|
||||
match_data = pcre2_match_data_create(re, 10);
|
||||
|
@ -352,7 +352,7 @@ must either point to a JIT stack or be NULL. In the latter case, if a callback
|
|||
function has been set up by \fBpcre2_jit_stack_assign()\fP, it is called.
|
||||
Otherwise the system stack is used. The return values are the same as for
|
||||
\fBpcre2_match()\fP, plus PCRE2_ERROR_JIT_BADOPTION if a matching mode (partial
|
||||
or complete) is requested that was not compiled. Unsupported option bits (for
|
||||
or complete) is requested that was not compiled. Unsupported option bits (for
|
||||
example, PCRE2_ANCHORED) are ignored.
|
||||
.P
|
||||
When you call \fBpcre2_match()\fP, as well as testing for invalid options, a
|
||||
|
@ -386,6 +386,6 @@ Cambridge CB2 3QH, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 02 November 2014
|
||||
Last updated: 03 November 2014
|
||||
Copyright (c) 1997-2014 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -437,7 +437,7 @@ about the pattern:
|
|||
/I info show info about compiled pattern
|
||||
hex pattern is coded in hexadecimal
|
||||
jit[=<number>] use JIT
|
||||
jitfast use JIT fast path
|
||||
jitfast use JIT fast path
|
||||
jitverify verify JIT use
|
||||
locale=<name> use this locale
|
||||
memory show memory used
|
||||
|
@ -577,8 +577,8 @@ also output.
|
|||
.sp
|
||||
The \fBparens_nest_limit\fP modifier sets a limit on the depth of nested
|
||||
parentheses in a pattern. Breaching the limit causes a compilation error.
|
||||
The default for the library is set when PCRE2 is built, but \fBpcre2test\fP
|
||||
sets its own default of 220, which is required for running the standard test
|
||||
The default for the library is set when PCRE2 is built, but \fBpcre2test\fP
|
||||
sets its own default of 220, which is required for running the standard test
|
||||
suite.
|
||||
.
|
||||
.
|
||||
|
@ -651,7 +651,7 @@ not affect the compilation process.
|
|||
allusedtext show all consulted text
|
||||
/g global global matching
|
||||
mark show mark values
|
||||
startchar show starting character when relevant
|
||||
startchar show starting character when relevant
|
||||
.sp
|
||||
These modifiers may not appear in a \fB#pattern\fP command. If you want them as
|
||||
defaults, set them in a \fB#subject\fP command.
|
||||
|
@ -725,7 +725,7 @@ pattern.
|
|||
offset=<n> set starting offset
|
||||
ovector=<n> set size of output vector
|
||||
recursion_limit=<n> set a recursion limit
|
||||
startchar show startchar when relevant
|
||||
startchar show startchar when relevant
|
||||
.sp
|
||||
The effects of these modifiers are described in the following sections.
|
||||
FIXME: Give more examples.
|
||||
|
@ -759,17 +759,17 @@ underneath them. Here is an example:
|
|||
This shows that the matched string is "abc", with the preceding and following
|
||||
strings "pqr" and "xyz" also consulted during the match.
|
||||
.P
|
||||
The \fBstartchar\fP modifier requests that the starting character for the match
|
||||
be indicated, if it is different to the start of the matched string. The only
|
||||
time when this occurs is when \eK has been processed as part of the match. In
|
||||
this situation, the output for the matched string is displayed from the
|
||||
starting character instead of from the match point, with circumflex characters
|
||||
The \fBstartchar\fP modifier requests that the starting character for the match
|
||||
be indicated, if it is different to the start of the matched string. The only
|
||||
time when this occurs is when \eK has been processed as part of the match. In
|
||||
this situation, the output for the matched string is displayed from the
|
||||
starting character instead of from the match point, with circumflex characters
|
||||
under the earlier characters. For example:
|
||||
.sp
|
||||
re> /abc\eKxyz/
|
||||
data> abcxyz\e=startchar
|
||||
0: abcxyz
|
||||
^^^
|
||||
^^^
|
||||
.sp
|
||||
Unlike \fBallusedtext\fP, the \fBstartchar\fP modifier can be used with JIT.
|
||||
However, these two modifiers are mutually exclusive.
|
||||
|
@ -856,7 +856,7 @@ The \fBjitstack\fP modifier provides a way of setting the maximum stack size
|
|||
that is used by the just-in-time optimization code. It is ignored if JIT
|
||||
optimization is not being used. The value is a number of kilobytes. Providing a
|
||||
stack that is larger than the default 32K is necessary only for very
|
||||
complicated patterns.
|
||||
complicated patterns.
|
||||
.
|
||||
.
|
||||
.SS "Setting match and recursion limits"
|
||||
|
@ -925,9 +925,9 @@ available for storing matching information. The default is 15.
|
|||
A value of zero is useful when testing the POSIX API because it causes
|
||||
\fBregexec()\fP to be called with a NULL capture vector. When not testing the
|
||||
POSIX API, a value of zero is used to cause
|
||||
\fBpcre2_match_data_create_from_pattern\fP to be called, in order to create a
|
||||
match block of exactly the right size for the pattern. (It is not possible to
|
||||
create a match block with a zero-length ovector; there is always one pair of
|
||||
\fBpcre2_match_data_create_from_pattern\fP to be called, in order to create a
|
||||
match block of exactly the right size for the pattern. (It is not possible to
|
||||
create a match block with a zero-length ovector; there is always one pair of
|
||||
offsets.)
|
||||
.
|
||||
.
|
||||
|
|
|
@ -169,137 +169,138 @@ COMMAND LINE OPTIONS
|
|||
DESCRIPTION
|
||||
|
||||
If pcre2test is given two filename arguments, it reads from the first
|
||||
and writes to the second. If it is given only one filename argument, it
|
||||
reads from that file and writes to stdout. Otherwise, it reads from
|
||||
stdin and writes to stdout, and prompts for each line of input, using
|
||||
"re>" to prompt for regular expression patterns, and "data>" to prompt
|
||||
for subject lines.
|
||||
and writes to the second. If the first name is "-", input is taken from
|
||||
the standard input. If pcre2test is given only one argument, it reads
|
||||
from that file and writes to stdout. Otherwise, it reads from stdin and
|
||||
writes to stdout. When the input is a terminal, it prompts for each
|
||||
line of input, using "re>" to prompt for regular expression patterns,
|
||||
and "data>" to prompt for subject lines.
|
||||
|
||||
When pcre2test is built, a configuration option can specify that it
|
||||
should be linked with the libreadline or libedit library. When this is
|
||||
done, if the input is from a terminal, it is read using the readline()
|
||||
When pcre2test is built, a configuration option can specify that it
|
||||
should be linked with the libreadline or libedit library. When this is
|
||||
done, if the input is from a terminal, it is read using the readline()
|
||||
function. This provides line-editing and history facilities. The output
|
||||
from the -help option states whether or not readline() will be used.
|
||||
|
||||
The program handles any number of tests, each of which consists of a
|
||||
set of input lines. Each set starts with a regular expression pattern,
|
||||
The program handles any number of tests, each of which consists of a
|
||||
set of input lines. Each set starts with a regular expression pattern,
|
||||
followed by any number of subject lines to be matched against that pat-
|
||||
tern. In between sets of test data, command lines that begin with a
|
||||
hash (#) character may appear. This file format, with some restric-
|
||||
tern. In between sets of test data, command lines that begin with a
|
||||
hash (#) character may appear. This file format, with some restric-
|
||||
tions, can also be processed by the perltest.pl script that is distrib-
|
||||
uted with PCRE2 as a means of checking that the behaviour of PCRE2 and
|
||||
uted with PCRE2 as a means of checking that the behaviour of PCRE2 and
|
||||
Perl is the same.
|
||||
|
||||
Each subject line is matched separately and independently. If you want
|
||||
Each subject line is matched separately and independently. If you want
|
||||
to do multi-line matches, you have to use the \n escape sequence (or \r
|
||||
or \r\n, etc., depending on the newline setting) in a single line of
|
||||
input to encode the newline sequences. There is no limit on the length
|
||||
of subject lines; the input buffer is automatically extended if it is
|
||||
too small. There is a replication feature that makes it possible to
|
||||
or \r\n, etc., depending on the newline setting) in a single line of
|
||||
input to encode the newline sequences. There is no limit on the length
|
||||
of subject lines; the input buffer is automatically extended if it is
|
||||
too small. There is a replication feature that makes it possible to
|
||||
generate long subject lines without having to supply them explicitly.
|
||||
|
||||
An empty line or the end of the file signals the end of the subject
|
||||
lines for a test, at which point a new pattern or command line is
|
||||
An empty line or the end of the file signals the end of the subject
|
||||
lines for a test, at which point a new pattern or command line is
|
||||
expected if there is still input to be read.
|
||||
|
||||
|
||||
COMMAND LINES
|
||||
|
||||
In between sets of test data, a line that begins with a hash (#) char-
|
||||
acter is interpreted as a command line. If the first character is fol-
|
||||
lowed by white space or an exclamation mark, the line is treated as a
|
||||
comment, and ignored. Otherwise, the following commands are recog-
|
||||
In between sets of test data, a line that begins with a hash (#) char-
|
||||
acter is interpreted as a command line. If the first character is fol-
|
||||
lowed by white space or an exclamation mark, the line is treated as a
|
||||
comment, and ignored. Otherwise, the following commands are recog-
|
||||
nized:
|
||||
|
||||
#forbid_utf
|
||||
|
||||
Subsequent patterns automatically have the PCRE2_NEVER_UTF and
|
||||
Subsequent patterns automatically have the PCRE2_NEVER_UTF and
|
||||
PCRE2_NEVER_UCP options set, which locks out the use of UTF and Unicode
|
||||
property features. This is a trigger guard that is used in test files
|
||||
to ensure that UTF/Unicode tests are not accidentally added to files
|
||||
that are used when UTF support is not included in the library. This
|
||||
effect can also be obtained by the use of #pattern; the difference is
|
||||
that #forbid_utf cannot be unset, and the automatic options are not
|
||||
property features. This is a trigger guard that is used in test files
|
||||
to ensure that UTF/Unicode tests are not accidentally added to files
|
||||
that are used when UTF support is not included in the library. This
|
||||
effect can also be obtained by the use of #pattern; the difference is
|
||||
that #forbid_utf cannot be unset, and the automatic options are not
|
||||
displayed in pattern information, to avoid cluttering up test output.
|
||||
|
||||
#pattern <modifier-list>
|
||||
|
||||
This command sets a default modifier list that applies to all subse-
|
||||
This command sets a default modifier list that applies to all subse-
|
||||
quent patterns. Modifiers on a pattern can change these settings.
|
||||
|
||||
#perltest
|
||||
|
||||
The appearance of this line causes all subsequent modifier settings to
|
||||
The appearance of this line causes all subsequent modifier settings to
|
||||
be checked for compatibility with the perltest.pl script, which is used
|
||||
to confirm that Perl gives the same results as PCRE2. Also, apart from
|
||||
comment lines, none of the other command lines are permitted, because
|
||||
they and many of the modifiers are specific to pcre2test, and should
|
||||
not be used in test files that are also processed by perltest.pl. The
|
||||
#perltest command helps detect tests that are accidentally put in the
|
||||
to confirm that Perl gives the same results as PCRE2. Also, apart from
|
||||
comment lines, none of the other command lines are permitted, because
|
||||
they and many of the modifiers are specific to pcre2test, and should
|
||||
not be used in test files that are also processed by perltest.pl. The
|
||||
#perltest command helps detect tests that are accidentally put in the
|
||||
wrong file.
|
||||
|
||||
#subject <modifier-list>
|
||||
|
||||
This command sets a default modifier list that applies to all subse-
|
||||
quent subject lines. Modifiers on a subject line can change these set-
|
||||
This command sets a default modifier list that applies to all subse-
|
||||
quent subject lines. Modifiers on a subject line can change these set-
|
||||
tings.
|
||||
|
||||
|
||||
MODIFIER SYNTAX
|
||||
|
||||
Modifier lists are used with both pattern and subject lines. Items in a
|
||||
list are separated by commas and optional white space. Some modifiers
|
||||
may be given for both patterns and subject lines, whereas others are
|
||||
valid for one or the other only. Each modifier has a long name, for
|
||||
list are separated by commas and optional white space. Some modifiers
|
||||
may be given for both patterns and subject lines, whereas others are
|
||||
valid for one or the other only. Each modifier has a long name, for
|
||||
example "anchored", and some of them must be followed by an equals sign
|
||||
and a value, for example, "offset=12". Modifiers that do not take val-
|
||||
ues may be preceded by a minus sign to turn off a previous default set-
|
||||
ting.
|
||||
|
||||
A few of the more common modifiers can also be specified as single let-
|
||||
ters, for example "i" for "caseless". In documentation, following the
|
||||
ters, for example "i" for "caseless". In documentation, following the
|
||||
Perl convention, these are written with a slash ("the /i modifier") for
|
||||
clarity. Abbreviated modifiers must all be concatenated in the first
|
||||
item of a modifier list. If the first item is not recognized as a long
|
||||
modifier name, it is interpreted as a sequence of these abbreviations.
|
||||
clarity. Abbreviated modifiers must all be concatenated in the first
|
||||
item of a modifier list. If the first item is not recognized as a long
|
||||
modifier name, it is interpreted as a sequence of these abbreviations.
|
||||
For example:
|
||||
|
||||
/abc/ig,newline=cr,jit=3
|
||||
|
||||
This is a pattern line whose modifier list starts with two one-letter
|
||||
modifiers (/i and /g). The lower-case abbreviated modifiers are the
|
||||
This is a pattern line whose modifier list starts with two one-letter
|
||||
modifiers (/i and /g). The lower-case abbreviated modifiers are the
|
||||
same as used in Perl.
|
||||
|
||||
|
||||
PATTERN SYNTAX
|
||||
|
||||
A pattern line must start with one of the following characters (common
|
||||
A pattern line must start with one of the following characters (common
|
||||
symbols, excluding pattern meta-characters):
|
||||
|
||||
/ ! " ' ` - = _ : ; , % & @ ~
|
||||
|
||||
This is interpreted as the pattern's delimiter. A regular expression
|
||||
may be continued over several input lines, in which case the newline
|
||||
This is interpreted as the pattern's delimiter. A regular expression
|
||||
may be continued over several input lines, in which case the newline
|
||||
characters are included within it. It is possible to include the delim-
|
||||
iter within the pattern by escaping it with a backslash, for example
|
||||
|
||||
/abc\/def/
|
||||
|
||||
If you do this, the escape and the delimiter form part of the pattern,
|
||||
If you do this, the escape and the delimiter form part of the pattern,
|
||||
but since the delimiters are all non-alphanumeric, this does not affect
|
||||
its interpretation. If the terminating delimiter is immediately fol-
|
||||
its interpretation. If the terminating delimiter is immediately fol-
|
||||
lowed by a backslash, for example,
|
||||
|
||||
/abc/\
|
||||
|
||||
then a backslash is added to the end of the pattern. This is done to
|
||||
provide a way of testing the error condition that arises if a pattern
|
||||
then a backslash is added to the end of the pattern. This is done to
|
||||
provide a way of testing the error condition that arises if a pattern
|
||||
finishes with a backslash, because
|
||||
|
||||
/abc\/
|
||||
|
||||
is interpreted as the first line of a pattern that starts with "abc/",
|
||||
causing pcre2test to read the next line as a continuation of the regu-
|
||||
is interpreted as the first line of a pattern that starts with "abc/",
|
||||
causing pcre2test to read the next line as a continuation of the regu-
|
||||
lar expression.
|
||||
|
||||
A pattern can be followed by a modifier list (details below).
|
||||
|
@ -307,7 +308,7 @@ PATTERN SYNTAX
|
|||
|
||||
SUBJECT LINE SYNTAX
|
||||
|
||||
Before each subject line is passed to pcre2_match() or
|
||||
Before each subject line is passed to pcre2_match() or
|
||||
pcre2_dfa_match(), leading and trailing white space is removed, and the
|
||||
line is scanned for backslash escapes. The following provide a means of
|
||||
encoding non-printing characters in a visible way:
|
||||
|
@ -327,23 +328,23 @@ SUBJECT LINE SYNTAX
|
|||
\x{hh...} hexadecimal character (any number of hex digits)
|
||||
|
||||
The use of \x{hh...} is not dependent on the use of the utf modifier on
|
||||
the pattern. It is recognized always. There may be any number of hexa-
|
||||
decimal digits inside the braces; invalid values provoke error mes-
|
||||
the pattern. It is recognized always. There may be any number of hexa-
|
||||
decimal digits inside the braces; invalid values provoke error mes-
|
||||
sages.
|
||||
|
||||
Note that \xhh specifies one byte rather than one character in UTF-8
|
||||
mode; this makes it possible to construct invalid UTF-8 sequences for
|
||||
testing purposes. On the other hand, \x{hh} is interpreted as a UTF-8
|
||||
character in UTF-8 mode, generating more than one byte if the value is
|
||||
greater than 127. When testing the 8-bit library not in UTF-8 mode,
|
||||
Note that \xhh specifies one byte rather than one character in UTF-8
|
||||
mode; this makes it possible to construct invalid UTF-8 sequences for
|
||||
testing purposes. On the other hand, \x{hh} is interpreted as a UTF-8
|
||||
character in UTF-8 mode, generating more than one byte if the value is
|
||||
greater than 127. When testing the 8-bit library not in UTF-8 mode,
|
||||
\x{hh} generates one byte for values less than 256, and causes an error
|
||||
for greater values.
|
||||
|
||||
In UTF-16 mode, all 4-digit \x{hhhh} values are accepted. This makes it
|
||||
possible to construct invalid UTF-16 sequences for testing purposes.
|
||||
|
||||
In UTF-32 mode, all 4- to 8-digit \x{...} values are accepted. This
|
||||
makes it possible to construct invalid UTF-32 sequences for testing
|
||||
In UTF-32 mode, all 4- to 8-digit \x{...} values are accepted. This
|
||||
makes it possible to construct invalid UTF-32 sequences for testing
|
||||
purposes.
|
||||
|
||||
There is a special backslash sequence that specifies replication of one
|
||||
|
@ -351,38 +352,38 @@ SUBJECT LINE SYNTAX
|
|||
|
||||
\[<characters>]{<count>}
|
||||
|
||||
This makes it possible to test long strings without having to provide
|
||||
This makes it possible to test long strings without having to provide
|
||||
them as part of the file. For example:
|
||||
|
||||
\[abc]{4}
|
||||
|
||||
is converted to "abcabcabcabc". This feature does not support nesting.
|
||||
is converted to "abcabcabcabc". This feature does not support nesting.
|
||||
To include a closing square bracket in the characters, code it as \x5D.
|
||||
|
||||
A backslash followed by an equals sign marke the end of the subject
|
||||
A backslash followed by an equals sign marke the end of the subject
|
||||
string and the start of a modifier list. For example:
|
||||
|
||||
abc\=notbol,notempty
|
||||
|
||||
A backslash followed by any other non-alphanumeric character just
|
||||
A backslash followed by any other non-alphanumeric character just
|
||||
escapes that character. A backslash followed by anything else causes an
|
||||
error. However, if the very last character in the line is a backslash
|
||||
(and there is no modifier list), it is ignored. This gives a way of
|
||||
passing an empty line as data, since a real empty line terminates the
|
||||
error. However, if the very last character in the line is a backslash
|
||||
(and there is no modifier list), it is ignored. This gives a way of
|
||||
passing an empty line as data, since a real empty line terminates the
|
||||
data input.
|
||||
|
||||
|
||||
PATTERN MODIFIERS
|
||||
|
||||
There are three types of modifier that can appear in pattern lines, two
|
||||
of which may also be used in a #pattern command. A pattern's modifier
|
||||
of which may also be used in a #pattern command. A pattern's modifier
|
||||
list can add to or override default modifiers that were set by a previ-
|
||||
ous #pattern command.
|
||||
|
||||
Setting compilation options
|
||||
|
||||
The following modifiers set options for pcre2_compile(). The most com-
|
||||
mon ones have single-letter abbreviations. See pcreapi for a descrip-
|
||||
The following modifiers set options for pcre2_compile(). The most com-
|
||||
mon ones have single-letter abbreviations. See pcreapi for a descrip-
|
||||
tion of their effects.
|
||||
|
||||
allow_empty_class set PCRE2_ALLOW_EMPTY_CLASS
|
||||
|
@ -408,13 +409,13 @@ PATTERN MODIFIERS
|
|||
utf set PCRE2_UTF
|
||||
|
||||
As well as turning on the PCRE2_UTF option, the utf modifier causes all
|
||||
non-printing characters in output strings to be printed using the
|
||||
\x{hh...} notation. Otherwise, those less than 0x100 are output in hex
|
||||
non-printing characters in output strings to be printed using the
|
||||
\x{hh...} notation. Otherwise, those less than 0x100 are output in hex
|
||||
without the curly brackets.
|
||||
|
||||
Setting compilation controls
|
||||
|
||||
The following modifiers affect the compilation process or request
|
||||
The following modifiers affect the compilation process or request
|
||||
information about the pattern:
|
||||
|
||||
bsr=[anycrlf|unicode] specify \R handling
|
||||
|
@ -424,6 +425,7 @@ PATTERN MODIFIERS
|
|||
/I info show info about compiled pattern
|
||||
hex pattern is coded in hexadecimal
|
||||
jit[=<number>] use JIT
|
||||
jitfast use JIT fast path
|
||||
jitverify verify JIT use
|
||||
locale=<name> use this locale
|
||||
memory show memory used
|
||||
|
@ -440,55 +442,55 @@ PATTERN MODIFIERS
|
|||
|
||||
Newline and \R handling
|
||||
|
||||
The bsr modifier specifies what \R in a pattern should match. If it is
|
||||
set to "anycrlf", \R matches CR, LF, or CRLF only. If it is set to
|
||||
"unicode", \R matches any Unicode newline sequence. The default is
|
||||
The bsr modifier specifies what \R in a pattern should match. If it is
|
||||
set to "anycrlf", \R matches CR, LF, or CRLF only. If it is set to
|
||||
"unicode", \R matches any Unicode newline sequence. The default is
|
||||
specified when PCRE2 is built, with the default default being Unicode.
|
||||
|
||||
The newline modifier specifies which characters are to be interpreted
|
||||
The newline modifier specifies which characters are to be interpreted
|
||||
as newlines, both in the pattern and (by default) in subject lines. The
|
||||
type must be one of CR, LF, CRLF, ANYCRLF, or ANY.
|
||||
|
||||
Information about a pattern
|
||||
|
||||
The debug modifier is a shorthand for info,fullbincode, requesting all
|
||||
The debug modifier is a shorthand for info,fullbincode, requesting all
|
||||
available information.
|
||||
|
||||
The bincode modifier causes a representation of the compiled code to be
|
||||
output after compilation. This information does not contain length and
|
||||
output after compilation. This information does not contain length and
|
||||
offset values, which ensures that the same output is generated for dif-
|
||||
ferent internal link sizes and different code unit widths. By using
|
||||
bincode, the same regression tests can be used in different environ-
|
||||
ferent internal link sizes and different code unit widths. By using
|
||||
bincode, the same regression tests can be used in different environ-
|
||||
ments.
|
||||
|
||||
The fullbincode modifier, by contrast, does include length and offset
|
||||
The fullbincode modifier, by contrast, does include length and offset
|
||||
values. This is used in a few special tests and is also useful for one-
|
||||
off tests.
|
||||
|
||||
The info modifier requests information about the compiled pattern
|
||||
(whether it is anchored, has a fixed first character, and so on). The
|
||||
The info modifier requests information about the compiled pattern
|
||||
(whether it is anchored, has a fixed first character, and so on). The
|
||||
information is obtained from the pcre2_pattern_info() function.
|
||||
|
||||
Specifying a pattern in hex
|
||||
|
||||
The hex modifier specifies that the characters of the pattern are to be
|
||||
interpreted as pairs of hexadecimal digits. White space is permitted
|
||||
interpreted as pairs of hexadecimal digits. White space is permitted
|
||||
between pairs. For example:
|
||||
|
||||
/ab 32 59/hex
|
||||
|
||||
This feature is provided as a way of creating patterns that contain
|
||||
This feature is provided as a way of creating patterns that contain
|
||||
binary zero characters. When hex is set, it implies use_length.
|
||||
|
||||
Using the pattern's length
|
||||
|
||||
By default, pcre2test passes patterns as zero-terminated strings to
|
||||
pcre2_compile(), giving the length as -1. If use_length is set, the
|
||||
By default, pcre2test passes patterns as zero-terminated strings to
|
||||
pcre2_compile(), giving the length as -1. If use_length is set, the
|
||||
length of the pattern is passed. This is implied if hex is set.
|
||||
|
||||
JIT compilation
|
||||
|
||||
The /jit modifier may optionally be followed by and equals sign and a
|
||||
The /jit modifier may optionally be followed by and equals sign and a
|
||||
number in the range 0 to 7:
|
||||
|
||||
0 disable JIT
|
||||
|
@ -499,17 +501,23 @@ PATTERN MODIFIERS
|
|||
6 use JIT for soft and hard partial match
|
||||
7 all three modes
|
||||
|
||||
If no number is given, 7 is assumed. If JIT compilation is successful,
|
||||
the compiled JIT code will automatically be used when pcre2_match() is
|
||||
run for the appropriate type of match, except when incompatible run-
|
||||
If no number is given, 7 is assumed. If JIT compilation is successful,
|
||||
the compiled JIT code will automatically be used when pcre2_match() is
|
||||
run for the appropriate type of match, except when incompatible run-
|
||||
time options are specified. For more details, see the pcre2jit documen-
|
||||
tation. See also the jitstack modifier below for a way of setting the
|
||||
tation. See also the jitstack modifier below for a way of setting the
|
||||
size of the JIT stack.
|
||||
|
||||
If the jitverify modifier is specified, information about the compiled
|
||||
pattern shows whether JIT compilation was or was not successful. If
|
||||
jitverify is specified without jit, jit=7 is assumed. If JIT compila-
|
||||
tion is successful when jitverify is set, the text "(JIT)" is added to
|
||||
If the jitfast modifier is specified, matching is done using the JIT
|
||||
"fast path" interface (pcre2_jit_match()), which skips some of the san-
|
||||
ity checks that are done by pcre2_match(), and of course does not work
|
||||
when JIT is not supported. If jitfast is specified without jit, jit=7
|
||||
is assumed.
|
||||
|
||||
If the jitverify modifier is specified, information about the compiled
|
||||
pattern shows whether JIT compilation was or was not successful. If
|
||||
jitverify is specified without jit, jit=7 is assumed. If JIT compila-
|
||||
tion is successful when jitverify is set, the text "(JIT)" is added to
|
||||
the first output line after a match or non match when JIT-compiled code
|
||||
was actually used.
|
||||
|
||||
|
@ -520,31 +528,33 @@ PATTERN MODIFIERS
|
|||
/pattern/locale=fr_FR
|
||||
|
||||
The given locale is set, pcre2_maketables() is called to build a set of
|
||||
character tables for the locale, and this is then passed to pcre2_com-
|
||||
pile() when compiling the regular expression. The same tables are used
|
||||
character tables for the locale, and this is then passed to pcre2_com-
|
||||
pile() when compiling the regular expression. The same tables are used
|
||||
when matching the following subject lines. The /locale modifier applies
|
||||
only to the pattern on which it appears, but can be given in a #pattern
|
||||
command if a default is needed. Setting a locale and alternate charac-
|
||||
command if a default is needed. Setting a locale and alternate charac-
|
||||
ter tables are mutually exclusive.
|
||||
|
||||
Showing pattern memory
|
||||
|
||||
The /memory modifier causes the size in bytes of the memory block used
|
||||
to hold the compiled pattern to be output. This does not include the
|
||||
size of the pcre2_code block; it is just the actual compiled data. If
|
||||
The /memory modifier causes the size in bytes of the memory block used
|
||||
to hold the compiled pattern to be output. This does not include the
|
||||
size of the pcre2_code block; it is just the actual compiled data. If
|
||||
the pattern is subsequently passed to the JIT compiler, the size of the
|
||||
JIT compiled code is also output.
|
||||
|
||||
Limiting nested parentheses
|
||||
|
||||
The parens_nest_limit modifier sets a limit on the depth of nested
|
||||
parentheses in a pattern. Breaching the limit causes a compilation
|
||||
error.
|
||||
The parens_nest_limit modifier sets a limit on the depth of nested
|
||||
parentheses in a pattern. Breaching the limit causes a compilation
|
||||
error. The default for the library is set when PCRE2 is built, but
|
||||
pcre2test sets its own default of 220, which is required for running
|
||||
the standard test suite.
|
||||
|
||||
Using the POSIX wrapper API
|
||||
|
||||
The /posix modifier causes pcre2test to call PCRE2 via the POSIX wrap-
|
||||
per API rather than its native API. This supports only the 8-bit
|
||||
The /posix modifier causes pcre2test to call PCRE2 via the POSIX wrap-
|
||||
per API rather than its native API. This supports only the 8-bit
|
||||
library. When the POSIX API is being used, the following pattern modi-
|
||||
fiers set options for the regcomp() function:
|
||||
|
||||
|
@ -556,25 +566,25 @@ PATTERN MODIFIERS
|
|||
ucp REG_UCP ) the POSIX standard
|
||||
utf REG_UTF8 )
|
||||
|
||||
The aftertext and allaftertext subject modifiers work as described
|
||||
The aftertext and allaftertext subject modifiers work as described
|
||||
below. All other modifiers cause an error.
|
||||
|
||||
Testing the stack guard feature
|
||||
|
||||
The /stackguard modifier is used to test the use of pcre2_set_com-
|
||||
pile_recursion_guard(), a function that is provided to enable stack
|
||||
availability to be checked during compilation (see the pcre2api docu-
|
||||
mentation for details). If the number specified by the modifier is
|
||||
The /stackguard modifier is used to test the use of pcre2_set_com-
|
||||
pile_recursion_guard(), a function that is provided to enable stack
|
||||
availability to be checked during compilation (see the pcre2api docu-
|
||||
mentation for details). If the number specified by the modifier is
|
||||
greater than zero, pcre2_set_compile_recursion_guard() is called to set
|
||||
up callback from pcre2_compile() to a local function. The argument it
|
||||
is passed is the current nesting parenthesis depth; if this is greater
|
||||
up callback from pcre2_compile() to a local function. The argument it
|
||||
is passed is the current nesting parenthesis depth; if this is greater
|
||||
than the value given by the modifier, non-zero is returned, causing the
|
||||
compilation to be aborted.
|
||||
|
||||
Using alternative character tables
|
||||
|
||||
The /tables modifier must be followed by a single digit. It causes a
|
||||
specific set of built-in character tables to be passed to pcre2_com-
|
||||
The /tables modifier must be followed by a single digit. It causes a
|
||||
specific set of built-in character tables to be passed to pcre2_com-
|
||||
pile(). This is used in the PCRE2 tests to check behaviour with differ-
|
||||
ent character tables. The digit specifies the tables as follows:
|
||||
|
||||
|
@ -583,25 +593,26 @@ PATTERN MODIFIERS
|
|||
pcre2_chartables.c.dist
|
||||
2 a set of tables defining ISO 8859 characters
|
||||
|
||||
In table 2, some characters whose codes are greater than 128 are iden-
|
||||
tified as letters, digits, spaces, etc. Setting alternate character
|
||||
In table 2, some characters whose codes are greater than 128 are iden-
|
||||
tified as letters, digits, spaces, etc. Setting alternate character
|
||||
tables and a locale are mutually exclusive.
|
||||
|
||||
Setting certain match controls
|
||||
|
||||
The following modifiers are really subject modifiers, and are described
|
||||
below. However, they may be included in a pattern's modifier list, in
|
||||
which case they are applied to every subject line that is processed
|
||||
below. However, they may be included in a pattern's modifier list, in
|
||||
which case they are applied to every subject line that is processed
|
||||
with that pattern. They do not affect the compilation process.
|
||||
|
||||
aftertext show text after match
|
||||
allaftertext show text after captures
|
||||
allcaptures show all captures
|
||||
allusedtext show all consulted text
|
||||
/g global global matching
|
||||
mark show mark values
|
||||
aftertext show text after match
|
||||
allaftertext show text after captures
|
||||
allcaptures show all captures
|
||||
allusedtext show all consulted text
|
||||
/g global global matching
|
||||
mark show mark values
|
||||
startchar show starting character when relevant
|
||||
|
||||
These modifiers may not appear in a #pattern command. If you want them
|
||||
These modifiers may not appear in a #pattern command. If you want them
|
||||
as defaults, set them in a #subject command.
|
||||
|
||||
|
||||
|
@ -612,7 +623,7 @@ SUBJECT MODIFIERS
|
|||
|
||||
Setting match options
|
||||
|
||||
The following modifiers set options for pcre2_match() or
|
||||
The following modifiers set options for pcre2_match() or
|
||||
pcre2_dfa_match(). See pcreapi for a description of their effects.
|
||||
|
||||
anchored set PCRE2_ANCHORED
|
||||
|
@ -626,20 +637,20 @@ SUBJECT MODIFIERS
|
|||
partial_hard (or ph) set PCRE2_PARTIAL_HARD
|
||||
partial_soft (or ps) set PCRE2_PARTIAL_SOFT
|
||||
|
||||
The partial matching modifiers are provided with abbreviations because
|
||||
The partial matching modifiers are provided with abbreviations because
|
||||
they appear frequently in tests.
|
||||
|
||||
If the /posix modifier was present on the pattern, causing the POSIX
|
||||
If the /posix modifier was present on the pattern, causing the POSIX
|
||||
wrapper API to be used, the only option-setting modifiers that have any
|
||||
effect are notbol, notempty, and noteol, causing REG_NOTBOL,
|
||||
REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to regexec().
|
||||
effect are notbol, notempty, and noteol, causing REG_NOTBOL,
|
||||
REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to regexec().
|
||||
Any other modifiers cause an error.
|
||||
|
||||
Setting match controls
|
||||
|
||||
The following modifiers affect the matching process or request addi-
|
||||
tional information. Some of them may also be specified on a pattern
|
||||
line (see above), in which case they apply to every subject line that
|
||||
The following modifiers affect the matching process or request addi-
|
||||
tional information. Some of them may also be specified on a pattern
|
||||
line (see above), in which case they apply to every subject line that
|
||||
is matched against that pattern.
|
||||
|
||||
aftertext show text after match
|
||||
|
@ -664,39 +675,56 @@ SUBJECT MODIFIERS
|
|||
offset=<n> set starting offset
|
||||
ovector=<n> set size of output vector
|
||||
recursion_limit=<n> set a recursion limit
|
||||
startchar show startchar when relevant
|
||||
|
||||
The effects of these modifiers are described in the following sections.
|
||||
FIXME: Give more examples.
|
||||
|
||||
Showing more text
|
||||
|
||||
The aftertext modifier requests that as well as outputting the sub-
|
||||
string that matched the entire pattern, pcre2test should in addition
|
||||
output the remainder of the subject string. This is useful for tests
|
||||
where the subject contains multiple copies of the same substring. The
|
||||
allaftertext modifier requests the same action for captured substrings
|
||||
as well as the main matched substring. In each case the remainder is
|
||||
output on the following line with a plus character following the cap-
|
||||
The aftertext modifier requests that as well as outputting the sub-
|
||||
string that matched the entire pattern, pcre2test should in addition
|
||||
output the remainder of the subject string. This is useful for tests
|
||||
where the subject contains multiple copies of the same substring. The
|
||||
allaftertext modifier requests the same action for captured substrings
|
||||
as well as the main matched substring. In each case the remainder is
|
||||
output on the following line with a plus character following the cap-
|
||||
ture number.
|
||||
|
||||
The allusedtext modifier requests that all the text that was consulted
|
||||
during a successful pattern match by the interpreter should be shown.
|
||||
This feature is not supported for JIT matching, and if requested with
|
||||
JIT it is ignored (with a warning message). Setting this modifier
|
||||
The allusedtext modifier requests that all the text that was consulted
|
||||
during a successful pattern match by the interpreter should be shown.
|
||||
This feature is not supported for JIT matching, and if requested with
|
||||
JIT it is ignored (with a warning message). Setting this modifier
|
||||
affects the output if there is a lookbehind at the start of a match, or
|
||||
a lookahead at the end, or if \K is used in the pattern. Characters
|
||||
that precede or follow the start and end of the actual match are indi-
|
||||
cated in the output by '<' or '>' characters underneath them. Here is
|
||||
a lookahead at the end, or if \K is used in the pattern. Characters
|
||||
that precede or follow the start and end of the actual match are indi-
|
||||
cated in the output by '<' or '>' characters underneath them. Here is
|
||||
an example:
|
||||
|
||||
/(?<=pqr)abc(?=xyz)/
|
||||
123pqrabcxyz456\=allusedtext
|
||||
re> /(?<=pqr)abc(?=xyz)/
|
||||
data> 123pqrabcxyz456\=allusedtext
|
||||
0: pqrabcxyz
|
||||
<<< >>>
|
||||
|
||||
This shows that the matched string is "abc", with the preceding and
|
||||
This shows that the matched string is "abc", with the preceding and
|
||||
following strings "pqr" and "xyz" also consulted during the match.
|
||||
|
||||
The startchar modifier requests that the starting character for the
|
||||
match be indicated, if it is different to the start of the matched
|
||||
string. The only time when this occurs is when \K has been processed as
|
||||
part of the match. In this situation, the output for the matched string
|
||||
is displayed from the starting character instead of from the match
|
||||
point, with circumflex characters under the earlier characters. For
|
||||
example:
|
||||
|
||||
re> /abc\Kxyz/
|
||||
data> abcxyz\=startchar
|
||||
0: abcxyz
|
||||
^^^
|
||||
|
||||
Unlike allusedtext, the startchar modifier can be used with JIT. How-
|
||||
ever, these two modifiers are mutually exclusive.
|
||||
|
||||
Showing the value of all capture groups
|
||||
|
||||
The allcaptures modifier requests that the values of all potential cap-
|
||||
|
@ -768,66 +796,70 @@ SUBJECT MODIFIERS
|
|||
|
||||
The jitstack modifier provides a way of setting the maximum stack size
|
||||
that is used by the just-in-time optimization code. It is ignored if
|
||||
JIT optimization is not being used. Providing a stack that is larger
|
||||
than the default 32K is necessary only for very complicated patterns.
|
||||
JIT optimization is not being used. The value is a number of kilobytes.
|
||||
Providing a stack that is larger than the default 32K is necessary only
|
||||
for very complicated patterns.
|
||||
|
||||
Setting match and recursion limits
|
||||
|
||||
The match_limit and recursion_limit modifiers set the appropriate lim-
|
||||
The match_limit and recursion_limit modifiers set the appropriate lim-
|
||||
its in the match context. These values are ignored when the find_limits
|
||||
modifier is specified.
|
||||
|
||||
Finding minimum limits
|
||||
|
||||
If the find_limits modifier is present, pcre2test calls pcre2_match()
|
||||
several times, setting different values in the match context via
|
||||
pcre2_set_match_limit() and pcre2_set_recursion_limit() until it finds
|
||||
the minimum values for each parameter that allow pcre2_match() to com-
|
||||
If the find_limits modifier is present, pcre2test calls pcre2_match()
|
||||
several times, setting different values in the match context via
|
||||
pcre2_set_match_limit() and pcre2_set_recursion_limit() until it finds
|
||||
the minimum values for each parameter that allow pcre2_match() to com-
|
||||
plete without error.
|
||||
|
||||
If JIT is being used, only the match limit is relevant. If DFA matching
|
||||
is being used, neither limit is relevant, and this modifier is ignored
|
||||
is being used, neither limit is relevant, and this modifier is ignored
|
||||
(with a warning message).
|
||||
|
||||
The match_limit number is a measure of the amount of backtracking that
|
||||
takes place, and learning the minimum value can be instructive. For
|
||||
most simple matches, the number is quite small, but for patterns with
|
||||
very large numbers of matching possibilities, it can become large very
|
||||
quickly with increasing length of subject string. The
|
||||
match_limit_recursion number is a measure of how much stack (or, if
|
||||
PCRE2 is compiled with NO_RECURSE, how much heap) memory is needed to
|
||||
The match_limit number is a measure of the amount of backtracking that
|
||||
takes place, and learning the minimum value can be instructive. For
|
||||
most simple matches, the number is quite small, but for patterns with
|
||||
very large numbers of matching possibilities, it can become large very
|
||||
quickly with increasing length of subject string. The
|
||||
match_limit_recursion number is a measure of how much stack (or, if
|
||||
PCRE2 is compiled with NO_RECURSE, how much heap) memory is needed to
|
||||
complete the match attempt.
|
||||
|
||||
Showing MARK names
|
||||
|
||||
|
||||
The mark modifier causes the names from backtracking control verbs that
|
||||
are returned from calls to pcre2_match() to be displayed. If a mark is
|
||||
returned for a match, non-match, or partial match, pcre2test shows it.
|
||||
For a match, it is on a line by itself, tagged with "MK:". Otherwise,
|
||||
are returned from calls to pcre2_match() to be displayed. If a mark is
|
||||
returned for a match, non-match, or partial match, pcre2test shows it.
|
||||
For a match, it is on a line by itself, tagged with "MK:". Otherwise,
|
||||
it is added to the non-match message.
|
||||
|
||||
Showing memory usage
|
||||
|
||||
The memory modifier causes pcre2test to log all memory allocation and
|
||||
The memory modifier causes pcre2test to log all memory allocation and
|
||||
freeing calls that occur during a match operation.
|
||||
|
||||
Setting a starting offset
|
||||
|
||||
The offset modifier sets an offset in the subject string at which
|
||||
The offset modifier sets an offset in the subject string at which
|
||||
matching starts. Its value is a number of code units, not characters.
|
||||
|
||||
Setting the size of the output vector
|
||||
|
||||
The ovector modifier applies only to the subject line in which it
|
||||
appears, though of course it can also be used to set a default in a
|
||||
#subject command. It specifies the number of pairs of offsets that are
|
||||
The ovector modifier applies only to the subject line in which it
|
||||
appears, though of course it can also be used to set a default in a
|
||||
#subject command. It specifies the number of pairs of offsets that are
|
||||
available for storing matching information. The default is 15.
|
||||
|
||||
At least one pair of offsets is always created by pcre2_match_data_cre-
|
||||
ate(), for matching with PCRE2's native API, so a value of 0 is the
|
||||
same as 1. However a value of 0 is useful when testing the POSIX API
|
||||
because it causes regexec() to be called with a NULL capture vector.
|
||||
A value of zero is useful when testing the POSIX API because it causes
|
||||
regexec() to be called with a NULL capture vector. When not testing the
|
||||
POSIX API, a value of zero is used to cause pcre2_match_data_cre-
|
||||
ate_from_pattern to be called, in order to create a match block of
|
||||
exactly the right size for the pattern. (It is not possible to create a
|
||||
match block with a zero-length ovector; there is always one pair of
|
||||
offsets.)
|
||||
|
||||
|
||||
THE ALTERNATIVE MATCHING FUNCTION
|
||||
|
@ -1058,8 +1090,8 @@ NON-PRINTING CHARACTERS
|
|||
|
||||
SEE ALSO
|
||||
|
||||
pcre2(3), pcre16(3), pcre32(3), pcre2api(3), pcre2callout(3), pcre2jit,
|
||||
pcre2matching(3), pcre2partial(d), pcre2pattern(3), pcre2precompile(3).
|
||||
pcre2(3), pcre2api(3), pcre2callout(3), pcre2jit, pcre2matching(3),
|
||||
pcre2partial(d), pcre2pattern(3).
|
||||
|
||||
|
||||
AUTHOR
|
||||
|
@ -1071,5 +1103,5 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 11 October 2014
|
||||
Last updated: 02 November 2014
|
||||
Copyright (c) 1997-2014 University of Cambridge.
|
||||
|
|
24
perltest.sh
24
perltest.sh
|
@ -8,7 +8,7 @@
|
|||
#
|
||||
# The desired effect is achieved by making this a shell script that passes the
|
||||
# Perl script to Perl through a pipe. If the first argument is "-utf8", a
|
||||
# suitable prefix is set up.
|
||||
# suitable prefix is set up.
|
||||
#
|
||||
# The remaining arguments, if any, are passed to Perl. They are an input file
|
||||
# and an output file. If there is one argument, the output is written to
|
||||
|
@ -20,7 +20,7 @@ perl=perl
|
|||
prefix=''
|
||||
if [ $# > 0 -a "$1" = "-utf8" ] ; then
|
||||
prefix="use utf8; require Encode;"
|
||||
shift
|
||||
shift
|
||||
fi
|
||||
|
||||
|
||||
|
@ -28,23 +28,23 @@ fi
|
|||
# can be given identical input, except that input patterns can be followed only
|
||||
# by Perl's lower case modifiers and certain other pcre2test modifiers that are
|
||||
# either handled or ignored:
|
||||
#
|
||||
#
|
||||
# aftertext interpreted as "print $' afterwards"
|
||||
# afteralltext ignored
|
||||
# dupnames ignored (Perl always allows)
|
||||
# mark ignored
|
||||
# no_auto_possess ignored
|
||||
# no_start_optimize ignored
|
||||
# ucp sets Perl's /u modifier
|
||||
# utf invoke UTF-8 functionality
|
||||
#
|
||||
# no_start_optimize ignored
|
||||
# ucp sets Perl's /u modifier
|
||||
# utf invoke UTF-8 functionality
|
||||
#
|
||||
# The data lines must not have any pcre2test modifiers. They are processed as
|
||||
# Perl double-quoted strings, so if they contain " $ or @ characters, these
|
||||
# have to be escaped. For this reason, all such characters in the
|
||||
# Perl-compatible testinput1 and testinput4 files are escaped so that they can
|
||||
# be used for perltest as well as for pcre2test. The output from this script
|
||||
# should be same as from pcre2test, apart from the initial identifying banner.
|
||||
#
|
||||
#
|
||||
# The other testinput files are not suitable for feeding to perltest.sh,
|
||||
# because they make use of the special modifiers that pcre2test uses for
|
||||
# testing features of PCRE2. Some of these files also contain malformed regular
|
||||
|
@ -90,11 +90,11 @@ if (@ARGV > 0)
|
|||
$infile = "INFILE";
|
||||
$interact = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
else
|
||||
{
|
||||
open(INFILE, "</dev/tty") || die "Failed to open /dev/tty\n";
|
||||
$infile = "INFILE";
|
||||
$interact = 1;
|
||||
$interact = 1;
|
||||
}
|
||||
|
||||
if (@ARGV > 1)
|
||||
|
@ -291,5 +291,5 @@ for (;;)
|
|||
|
||||
PERLEND
|
||||
) | $perl - $@
|
||||
|
||||
|
||||
# End
|
||||
|
|
|
@ -86,8 +86,7 @@ passed. Put these bits at the most significant end of the options word so
|
|||
others can be added next to them */
|
||||
|
||||
#define PCRE2_ANCHORED 0x80000000u
|
||||
#define PCRE2_NO_START_OPTIMIZE 0x40000000u
|
||||
#define PCRE2_NO_UTF_CHECK 0x20000000u
|
||||
#define PCRE2_NO_UTF_CHECK 0x40000000u
|
||||
|
||||
/* Other options that can be passed to pcre2_compile(). They may affect
|
||||
compilation, JIT compilation, and/or interpretive execution. The following tags
|
||||
|
@ -95,7 +94,7 @@ indicate which:
|
|||
|
||||
C alters what is compiled
|
||||
J alters what JIT compiles
|
||||
E is inspected during pcre2_match() execution
|
||||
M is inspected during pcre2_match() execution
|
||||
D is inspected during pcre2_dfa_match() execution
|
||||
*/
|
||||
|
||||
|
@ -103,24 +102,25 @@ D is inspected during pcre2_dfa_match() execution
|
|||
#define PCRE2_ALT_BSUX 0x00000002u /* C */
|
||||
#define PCRE2_AUTO_CALLOUT 0x00000004u /* C */
|
||||
#define PCRE2_CASELESS 0x00000008u /* C */
|
||||
#define PCRE2_DOLLAR_ENDONLY 0x00000010u /* J E D */
|
||||
#define PCRE2_DOLLAR_ENDONLY 0x00000010u /* J M D */
|
||||
#define PCRE2_DOTALL 0x00000020u /* C */
|
||||
#define PCRE2_DUPNAMES 0x00000040u /* C */
|
||||
#define PCRE2_EXTENDED 0x00000080u /* C */
|
||||
#define PCRE2_FIRSTLINE 0x00000100u /* J E D */
|
||||
#define PCRE2_MATCH_UNSET_BACKREF 0x00000200u /* C J E */
|
||||
#define PCRE2_FIRSTLINE 0x00000100u /* J M D */
|
||||
#define PCRE2_MATCH_UNSET_BACKREF 0x00000200u /* C J M */
|
||||
#define PCRE2_MULTILINE 0x00000400u /* C */
|
||||
#define PCRE2_NEVER_UCP 0x00000800u /* C */
|
||||
#define PCRE2_NEVER_UTF 0x00001000u /* C */
|
||||
#define PCRE2_NO_AUTO_CAPTURE 0x00002000u /* C */
|
||||
#define PCRE2_NO_AUTO_POSSESS 0x00004000u /* C */
|
||||
#define PCRE2_UCP 0x00008000u /* C J E D */
|
||||
#define PCRE2_UNGREEDY 0x00010000u /* C */
|
||||
#define PCRE2_UTF 0x00020000u /* C J E D */
|
||||
#define PCRE2_NO_START_OPTIMIZE 0x00008000u /* J M D */
|
||||
#define PCRE2_UCP 0x00010000u /* C J M D */
|
||||
#define PCRE2_UNGREEDY 0x00020000u /* C */
|
||||
#define PCRE2_UTF 0x00040000u /* C J M D */
|
||||
|
||||
/* These are for pcre2_jit_compile(). */
|
||||
|
||||
#define PCRE2_JIT 0x00000001u /* For full matching */
|
||||
#define PCRE2_JIT_COMPLETE 0x00000001u /* For full matching */
|
||||
#define PCRE2_JIT_PARTIAL_SOFT 0x00000002u
|
||||
#define PCRE2_JIT_PARTIAL_HARD 0x00000004u
|
||||
|
||||
|
@ -130,8 +130,8 @@ functions, so take care not to define synonyms by mistake. */
|
|||
|
||||
#define PCRE2_NOTBOL 0x00000001u
|
||||
#define PCRE2_NOTEOL 0x00000002u
|
||||
#define PCRE2_NOTEMPTY 0x00000004u
|
||||
#define PCRE2_NOTEMPTY_ATSTART 0x00000008u
|
||||
#define PCRE2_NOTEMPTY 0x00000004u /* ) These two must be kept */
|
||||
#define PCRE2_NOTEMPTY_ATSTART 0x00000008u /* ) adjacent to each other. */
|
||||
#define PCRE2_PARTIAL_SOFT 0x00000010u
|
||||
#define PCRE2_PARTIAL_HARD 0x00000020u
|
||||
|
||||
|
@ -140,9 +140,9 @@ functions, so take care not to define synonyms by mistake. */
|
|||
#define PCRE2_DFA_RESTART 0x00000040u
|
||||
#define PCRE2_DFA_SHORTEST 0x00000080u
|
||||
|
||||
/* Newline and \R settings, for use in the compile and match contexts. The
|
||||
newline values must be kept in step with values set in config.h and both sets
|
||||
must all be greater than zero. */
|
||||
/* Newline and \R settings, for use in compile contexts. The newline values
|
||||
must be kept in step with values set in config.h and both sets must all be
|
||||
greater than zero. */
|
||||
|
||||
#define PCRE2_NEWLINE_CR 1
|
||||
#define PCRE2_NEWLINE_LF 2
|
||||
|
@ -193,32 +193,33 @@ must all be greater than zero. */
|
|||
#define PCRE2_ERROR_UTF32_ERR1 (-27)
|
||||
#define PCRE2_ERROR_UTF32_ERR2 (-28)
|
||||
|
||||
/* Error codes for pcre2[_dfa]_match() */
|
||||
/* Error codes for pcre2[_dfa]_match(), substring extraction functions, and
|
||||
context functions. */
|
||||
|
||||
#define PCRE2_ERROR_BADCOUNT (-29)
|
||||
#define PCRE2_ERROR_BADENDIANNESS (-30)
|
||||
#define PCRE2_ERROR_BADLENGTH (-31)
|
||||
#define PCRE2_ERROR_BADMAGIC (-32)
|
||||
#define PCRE2_ERROR_BADMODE (-33)
|
||||
#define PCRE2_ERROR_BADOFFSET (-34)
|
||||
#define PCRE2_ERROR_BADOPTION (-35)
|
||||
#define PCRE2_ERROR_BADUTFOFFSET (-36)
|
||||
#define PCRE2_ERROR_CALLOUT (-37) /* Never used by PCRE2 itself */
|
||||
#define PCRE2_ERROR_DFA_BADRESTART (-38)
|
||||
#define PCRE2_ERROR_DFA_RECURSE (-39)
|
||||
#define PCRE2_ERROR_DFA_UCOND (-40)
|
||||
#define PCRE2_ERROR_DFA_UITEM (-41)
|
||||
#define PCRE2_ERROR_DFA_UMLIMIT (-42)
|
||||
#define PCRE2_ERROR_DFA_WSSIZE (-43)
|
||||
#define PCRE2_ERROR_INTERNAL (-44)
|
||||
#define PCRE2_ERROR_JIT_BADOPTION (-45)
|
||||
#define PCRE2_ERROR_JIT_STACKLIMIT (-46)
|
||||
#define PCRE2_ERROR_MATCHLIMIT (-47)
|
||||
#define PCRE2_ERROR_NOMEMORY (-48)
|
||||
#define PCRE2_ERROR_NOSUBSTRING (-49)
|
||||
#define PCRE2_ERROR_NULL (-50)
|
||||
#define PCRE2_ERROR_RECURSELOOP (-51)
|
||||
#define PCRE2_ERROR_RECURSIONLIMIT (-52)
|
||||
#define PCRE2_ERROR_BADDATA (-29)
|
||||
#define PCRE2_ERROR_BADLENGTH (-30)
|
||||
#define PCRE2_ERROR_BADMAGIC (-31)
|
||||
#define PCRE2_ERROR_BADMODE (-32)
|
||||
#define PCRE2_ERROR_BADOFFSET (-33)
|
||||
#define PCRE2_ERROR_BADOPTION (-34)
|
||||
#define PCRE2_ERROR_BADUTFOFFSET (-35)
|
||||
#define PCRE2_ERROR_CALLOUT (-36) /* Never used by PCRE2 itself */
|
||||
#define PCRE2_ERROR_DFA_BADRESTART (-37)
|
||||
#define PCRE2_ERROR_DFA_RECURSE (-38)
|
||||
#define PCRE2_ERROR_DFA_UCOND (-39)
|
||||
#define PCRE2_ERROR_DFA_UITEM (-40)
|
||||
#define PCRE2_ERROR_DFA_WSSIZE (-41)
|
||||
#define PCRE2_ERROR_INTERNAL (-42)
|
||||
#define PCRE2_ERROR_JIT_BADOPTION (-43)
|
||||
#define PCRE2_ERROR_JIT_STACKLIMIT (-44)
|
||||
#define PCRE2_ERROR_MATCHLIMIT (-45)
|
||||
#define PCRE2_ERROR_NOMEMORY (-46)
|
||||
#define PCRE2_ERROR_NOSUBSTRING (-47)
|
||||
#define PCRE2_ERROR_NOUNIQUESUBSTRING (-48)
|
||||
#define PCRE2_ERROR_NULL (-49)
|
||||
#define PCRE2_ERROR_RECURSELOOP (-50)
|
||||
#define PCRE2_ERROR_RECURSIONLIMIT (-51)
|
||||
#define PCRE2_ERROR_UNSET (-52)
|
||||
|
||||
/* Request types for pcre2_pattern_info() */
|
||||
|
||||
|
@ -257,8 +258,8 @@ must all be greater than zero. */
|
|||
#define PCRE2_CONFIG_PARENSLIMIT 7
|
||||
#define PCRE2_CONFIG_RECURSIONLIMIT 5
|
||||
#define PCRE2_CONFIG_STACKRECURSE 8
|
||||
#define PCRE2_CONFIG_UNICODE_VERSION 9
|
||||
#define PCRE2_CONFIG_UTF 10
|
||||
#define PCRE2_CONFIG_UNICODE 9
|
||||
#define PCRE2_CONFIG_UNICODE_VERSION 10
|
||||
#define PCRE2_CONFIG_VERSION 11
|
||||
|
||||
/* Types for code units in patterns and subject strings. */
|
||||
|
@ -271,12 +272,14 @@ typedef const PCRE2_UCHAR8 *PCRE2_SPTR8;
|
|||
typedef const PCRE2_UCHAR16 *PCRE2_SPTR16;
|
||||
typedef const PCRE2_UCHAR32 *PCRE2_SPTR32;
|
||||
|
||||
/* Offsets in the pattern (for errors) and in the subject (after a match) are
|
||||
unsigned 32-bit numbers. We also define a value to indicate "unset" in the
|
||||
offset vector (ovector). */
|
||||
/* The PCRE2_SIZE type is used for all string lengths and offsets in PCRE2,
|
||||
including pattern offsets for errors and subject offsets after a match. We
|
||||
define special values to indicate zero-terminated strings and unset offsets in
|
||||
the offset vector (ovector). */
|
||||
|
||||
#define PCRE2_OFFSET PCRE2_UCHAR32
|
||||
#define PCRE2_UNSET (~(PCRE2_OFFSET)0)
|
||||
#define PCRE2_SIZE size_t
|
||||
#define PCRE2_ZERO_TERMINATED (~(PCRE2_SIZE)0)
|
||||
#define PCRE2_UNSET (~(PCRE2_SIZE)0)
|
||||
|
||||
/* Generic types for opaque structures and JIT callback functions. These
|
||||
declarations are defined in a macro that is expanded for each width later. */
|
||||
|
@ -311,22 +314,20 @@ versions are generated from this macro below. */
|
|||
|
||||
#define PCRE2_STRUCTURE_LIST \
|
||||
typedef struct pcre2_callout_block { \
|
||||
int version; /* Identifies version of block */ \
|
||||
uint32_t version; /* Identifies version of block */ \
|
||||
/* ------------------------ Version 0 ------------------------------- */ \
|
||||
uint32_t callout_number; /* Number compiled into pattern */ \
|
||||
PCRE2_OFFSET *offset_vector; /* The offset vector */ \
|
||||
PCRE2_SPTR subject; /* The subject being matched */ \
|
||||
size_t subject_length; /* The length of the subject */ \
|
||||
PCRE2_OFFSET start_match; /* Offset to start of this match attempt */ \
|
||||
PCRE2_OFFSET current_position; /* Where we currently are in the subject */ \
|
||||
uint32_t capture_top; /* Max current capture */ \
|
||||
uint32_t capture_last; /* Most recently closed capture */ \
|
||||
void *callout_data; /* Data passed in with the call */ \
|
||||
/* ------------------- Added for Version 1 -------------------------- */ \
|
||||
PCRE2_OFFSET pattern_position; /* Offset to next item in the pattern */ \
|
||||
PCRE2_OFFSET next_item_length; /* Length of next item in the pattern */ \
|
||||
/* ------------------- Added for Version 2 -------------------------- */ \
|
||||
PCRE2_SPTR mark; /* Pointer to current mark or NULL */ \
|
||||
PCRE2_SIZE *offset_vector; /* The offset vector */ \
|
||||
PCRE2_SPTR mark; /* Pointer to current mark or NULL */ \
|
||||
PCRE2_SPTR subject; /* The subject being matched */ \
|
||||
PCRE2_SIZE subject_length; /* The length of the subject */ \
|
||||
PCRE2_SIZE start_match; /* Offset to start of this match attempt */ \
|
||||
PCRE2_SIZE current_position; /* Where we currently are in the subject */ \
|
||||
PCRE2_SIZE pattern_position; /* Offset to next item in the pattern */ \
|
||||
PCRE2_SIZE next_item_length; /* Length of next item in the pattern */ \
|
||||
/* ------------------------------------------------------------------ */ \
|
||||
} pcre2_callout_block;
|
||||
|
||||
|
@ -336,7 +337,7 @@ expanded for each width below. Start with functions that give general
|
|||
information. */
|
||||
|
||||
#define PCRE2_GENERAL_INFO_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int pcre2_config(int, void *, size_t);
|
||||
PCRE2_EXP_DECL int pcre2_config(uint32_t, void *);
|
||||
|
||||
|
||||
/* Functions for manipulating contexts. */
|
||||
|
@ -346,7 +347,7 @@ PCRE2_EXP_DECL \
|
|||
pcre2_general_context *pcre2_general_context_copy(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL \
|
||||
pcre2_general_context *pcre2_general_context_create( \
|
||||
void *(*)(size_t, void *), \
|
||||
void *(*)(PCRE2_SIZE, void *), \
|
||||
void (*)(void *, void *), void *); \
|
||||
PCRE2_EXP_DECL void pcre2_general_context_free(pcre2_general_context *);
|
||||
|
||||
|
@ -356,12 +357,10 @@ PCRE2_EXP_DECL \
|
|||
PCRE2_EXP_DECL \
|
||||
pcre2_compile_context *pcre2_compile_context_create(pcre2_general_context *);\
|
||||
PCRE2_EXP_DECL void pcre2_compile_context_free(pcre2_compile_context *); \
|
||||
PCRE2_EXP_DECL int pcre2_set_bsr_compile(pcre2_compile_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_bsr(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_character_tables(pcre2_compile_context *, \
|
||||
const unsigned char *); \
|
||||
PCRE2_EXP_DECL int pcre2_set_newline_compile(pcre2_compile_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_newline(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_parens_nest_limit(pcre2_compile_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_compile_recursion_guard(\
|
||||
|
@ -373,18 +372,14 @@ PCRE2_EXP_DECL \
|
|||
PCRE2_EXP_DECL \
|
||||
pcre2_match_context *pcre2_match_context_create(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL void pcre2_match_context_free(pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL int pcre2_set_bsr_match(pcre2_match_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_callout(pcre2_match_context *, \
|
||||
int (*)(pcre2_callout_block *), void *); \
|
||||
PCRE2_EXP_DECL int pcre2_set_match_limit(pcre2_match_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_newline_match(pcre2_match_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_recursion_limit(pcre2_match_context *, \
|
||||
uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_recursion_memory_management( \
|
||||
pcre2_match_context *, void *(*)(size_t, void *), \
|
||||
pcre2_match_context *, void *(*)(PCRE2_SIZE, void *), \
|
||||
void (*)(void *, void *), void *);
|
||||
|
||||
|
||||
|
@ -392,8 +387,8 @@ PCRE2_EXP_DECL int pcre2_set_recursion_memory_management( \
|
|||
|
||||
#define PCRE2_COMPILE_FUNCTIONS \
|
||||
PCRE2_EXP_DECL \
|
||||
pcre2_code *pcre2_compile(PCRE2_SPTR, int, uint32_t, \
|
||||
int *, PCRE2_OFFSET *, pcre2_compile_context *); \
|
||||
pcre2_code *pcre2_compile(PCRE2_SPTR, PCRE2_SIZE, uint32_t, \
|
||||
int *, PCRE2_SIZE *, pcre2_compile_context *); \
|
||||
PCRE2_EXP_DECL void pcre2_code_free(pcre2_code *);
|
||||
|
||||
|
||||
|
@ -408,65 +403,62 @@ PCRE2_EXP_DECL int pcre2_pattern_info(const pcre2_code *, uint32_t, \
|
|||
|
||||
#define PCRE2_MATCH_FUNCTIONS \
|
||||
PCRE2_EXP_DECL \
|
||||
pcre2_match_data *pcre2_match_data_create(uint32_t, \
|
||||
pcre2_general_context *); \
|
||||
pcre2_match_data *pcre2_match_data_create(uint32_t, \
|
||||
pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL \
|
||||
pcre2_match_data *pcre2_match_data_create_from_pattern(pcre2_code *, \
|
||||
pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL int pcre2_dfa_match(const pcre2_code *, \
|
||||
PCRE2_SPTR, int, PCRE2_OFFSET, uint32_t, \
|
||||
pcre2_match_data *, pcre2_match_context *, int *, \
|
||||
size_t); \
|
||||
PCRE2_EXP_DECL int pcre2_match(const pcre2_code *, \
|
||||
PCRE2_SPTR, int, PCRE2_OFFSET, uint32_t, \
|
||||
pcre2_match_data *, pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL void pcre2_match_data_free(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_OFFSET pcre2_get_leftchar(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SPTR pcre2_get_mark(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL uint32_t pcre2_get_ovector_count(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_OFFSET *pcre2_get_ovector_pointer(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_OFFSET pcre2_get_rightchar(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_OFFSET pcre2_get_startchar(pcre2_match_data *);
|
||||
pcre2_match_data *pcre2_match_data_create_from_pattern(pcre2_code *, \
|
||||
pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL int pcre2_dfa_match(const pcre2_code *, PCRE2_SPTR, \
|
||||
PCRE2_SIZE, PCRE2_SIZE, uint32_t, \
|
||||
pcre2_match_data *, pcre2_match_context *, int *, \
|
||||
PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int pcre2_match(const pcre2_code *, \
|
||||
PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, \
|
||||
pcre2_match_data *, pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL void pcre2_match_data_free(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SPTR pcre2_get_mark(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL uint32_t pcre2_get_ovector_count(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *); \
|
||||
PCRE2_EXP_DECL PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *);
|
||||
|
||||
|
||||
/* Convenience functions for handling matched substrings. */
|
||||
|
||||
#define PCRE2_SUBSTRING_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int pcre2_substring_copy_byname(pcre2_match_data *, \
|
||||
PCRE2_SPTR, PCRE2_UCHAR *, size_t); \
|
||||
PCRE2_SPTR, PCRE2_UCHAR *, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_copy_bynumber(pcre2_match_data *, \
|
||||
int, PCRE2_UCHAR *, size_t); \
|
||||
unsigned int, PCRE2_UCHAR *, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL void pcre2_substring_free(PCRE2_UCHAR *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_get_byname(pcre2_match_data *, \
|
||||
PCRE2_SPTR, PCRE2_UCHAR **); \
|
||||
PCRE2_SPTR, PCRE2_UCHAR **, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_get_bynumber(pcre2_match_data *, \
|
||||
int, PCRE2_UCHAR **); \
|
||||
unsigned int, PCRE2_UCHAR **, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_length_byname(pcre2_match_data *, \
|
||||
PCRE2_SPTR); \
|
||||
PCRE2_SPTR, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_length_bynumber(pcre2_match_data *, \
|
||||
int); \
|
||||
unsigned int, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_nametable_scan(const pcre2_code *, \
|
||||
PCRE2_SPTR, PCRE2_SPTR *, PCRE2_SPTR *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_number_from_name(\
|
||||
const pcre2_code *, PCRE2_SPTR); \
|
||||
PCRE2_EXP_DECL void pcre2_substring_list_free(PCRE2_SPTR *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_list_get(pcre2_match_data *, \
|
||||
PCRE2_UCHAR ***, size_t **);
|
||||
PCRE2_UCHAR ***, PCRE2_SIZE **);
|
||||
|
||||
|
||||
/* Functions for JIT processing */
|
||||
|
||||
#define PCRE2_JIT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int pcre2_jit_compile(pcre2_code *, uint32_t, \
|
||||
pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL int pcre2_jit_compile(pcre2_code *, uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_jit_match(const pcre2_code *, \
|
||||
PCRE2_SPTR, int, PCRE2_OFFSET, uint32_t, \
|
||||
PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, \
|
||||
pcre2_match_data *, pcre2_match_context *, \
|
||||
pcre2_jit_stack *); \
|
||||
PCRE2_EXP_DECL void pcre2_jit_free_unused_memory(pcre2_general_context *);\
|
||||
PCRE2_EXP_DECL \
|
||||
pcre2_jit_stack *pcre2_jit_stack_alloc(pcre2_general_context *, \
|
||||
size_t, size_t); \
|
||||
pcre2_jit_stack *pcre2_jit_stack_create(pcre2_general_context *, \
|
||||
PCRE2_SIZE, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL void pcre2_jit_stack_assign(const pcre2_code *, \
|
||||
pcre2_jit_callback, void *); \
|
||||
PCRE2_EXP_DECL void pcre2_jit_stack_free(pcre2_jit_stack *);
|
||||
|
@ -475,7 +467,7 @@ PCRE2_EXP_DECL void pcre2_jit_stack_free(pcre2_jit_stack *);
|
|||
/* Other miscellaneous functions. */
|
||||
|
||||
#define PCRE2_OTHER_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int pcre2_get_error_message(int, PCRE2_UCHAR *, size_t); \
|
||||
PCRE2_EXP_DECL int pcre2_get_error_message(int, PCRE2_UCHAR *, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL \
|
||||
const uint8_t *pcre2_maketables(pcre2_general_context *); \
|
||||
|
||||
|
@ -532,17 +524,15 @@ pcre2_compile are called by application code. */
|
|||
#define pcre2_general_context_create PCRE2_SUFFIX(pcre2_general_context_create_)
|
||||
#define pcre2_general_context_free PCRE2_SUFFIX(pcre2_general_context_free_)
|
||||
#define pcre2_get_error_message PCRE2_SUFFIX(pcre2_get_error_message_)
|
||||
#define pcre2_get_leftchar PCRE2_SUFFIX(pcre2_get_leftchar_)
|
||||
#define pcre2_get_mark PCRE2_SUFFIX(pcre2_get_mark_)
|
||||
#define pcre2_get_ovector_pointer PCRE2_SUFFIX(pcre2_get_ovector_pointer_)
|
||||
#define pcre2_get_ovector_count PCRE2_SUFFIX(pcre2_get_ovector_count_)
|
||||
#define pcre2_get_rightchar PCRE2_SUFFIX(pcre2_get_rightchar_)
|
||||
#define pcre2_get_startchar PCRE2_SUFFIX(pcre2_get_startchar_)
|
||||
#define pcre2_jit_compile PCRE2_SUFFIX(pcre2_jit_compile_)
|
||||
#define pcre2_jit_match PCRE2_SUFFIX(pcre2_jit_match_)
|
||||
#define pcre2_jit_free_unused_memory PCRE2_SUFFIX(pcre2_jit_free_unused_memory_)
|
||||
#define pcre2_jit_stack_alloc PCRE2_SUFFIX(pcre2_jit_stack_alloc_)
|
||||
#define pcre2_jit_stack_assign PCRE2_SUFFIX(pcre2_jit_stack_assign_)
|
||||
#define pcre2_jit_stack_create PCRE2_SUFFIX(pcre2_jit_stack_create_)
|
||||
#define pcre2_jit_stack_free PCRE2_SUFFIX(pcre2_jit_stack_free_)
|
||||
#define pcre2_maketables PCRE2_SUFFIX(pcre2_maketables_)
|
||||
#define pcre2_match_context_copy PCRE2_SUFFIX(pcre2_match_context_copy_)
|
||||
|
@ -552,14 +542,12 @@ pcre2_compile are called by application code. */
|
|||
#define pcre2_match_data_create_from_pattern PCRE2_SUFFIX(pcre2_match_data_create_from_pattern_)
|
||||
#define pcre2_match_data_free PCRE2_SUFFIX(pcre2_match_data_free_)
|
||||
#define pcre2_pattern_info PCRE2_SUFFIX(pcre2_pattern_info_)
|
||||
#define pcre2_set_bsr_compile PCRE2_SUFFIX(pcre2_set_bsr_compile_)
|
||||
#define pcre2_set_bsr_match PCRE2_SUFFIX(pcre2_set_bsr_match_)
|
||||
#define pcre2_set_bsr PCRE2_SUFFIX(pcre2_set_bsr_)
|
||||
#define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_)
|
||||
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
|
||||
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
|
||||
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
|
||||
#define pcre2_set_newline_compile PCRE2_SUFFIX(pcre2_set_newline_compile_)
|
||||
#define pcre2_set_newline_match PCRE2_SUFFIX(pcre2_set_newline_match_)
|
||||
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
|
||||
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
|
||||
#define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_)
|
||||
#define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_)
|
||||
|
@ -621,24 +609,27 @@ PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
|||
#undef PCRE2_OTHER_FUNCTIONS
|
||||
#undef PCRE2_TYPES_STRUCTURES_AND_FUNCTIONS
|
||||
|
||||
/* Re-define PCRE2_SUFFIX to use the external width value, if defined.
|
||||
Otherwise, undefine the other macros and make PCRE2_SUFFIX a no-op, to reduce
|
||||
confusion. */
|
||||
/* PCRE2_CODE_UNIT_WIDTH must be defined. If it is 8, 16, or 32, redefine
|
||||
PCRE2_SUFFIX to use it. If it is 0, undefine the other macros and make
|
||||
PCRE2_SUFFIX a no-op. Otherwise, generate an error. */
|
||||
|
||||
#undef PCRE2_SUFFIX
|
||||
#ifdef PCRE2_CODE_UNIT_WIDTH
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8 && \
|
||||
PCRE2_CODE_UNIT_WIDTH != 16 && \
|
||||
PCRE2_CODE_UNIT_WIDTH != 32
|
||||
#error PCRE2_CODE_UNIT_WIDTH must be 8, 16, or 32
|
||||
#endif
|
||||
#ifndef PCRE2_CODE_UNIT_WIDTH
|
||||
#error PCRE2_CODE_UNIT_WIDTH must be defined before including pcre2.h.
|
||||
#error Use 8, 16, or 32; or 0 for a multi-width application.
|
||||
#else /* PCRE2_CODE_UNIT_WIDTH is defined */
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8 || \
|
||||
PCRE2_CODE_UNIT_WIDTH == 16 || \
|
||||
PCRE2_CODE_UNIT_WIDTH == 32
|
||||
#define PCRE2_SUFFIX(a) PCRE2_GLUE(a, PCRE2_CODE_UNIT_WIDTH)
|
||||
#else
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 0
|
||||
#undef PCRE2_JOIN
|
||||
#undef PCRE2_GLUE
|
||||
#define PCRE2_SUFFIX(a) a
|
||||
#else
|
||||
#error PCRE2_CODE_UNIT_WIDTH must be 0, 8, 16, or 32.
|
||||
#endif
|
||||
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH is defined */
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
|
|
|
@ -457,7 +457,7 @@ PCRE2_EXP_DECL int pcre2_jit_match(const pcre2_code *, \
|
|||
pcre2_jit_stack *); \
|
||||
PCRE2_EXP_DECL void pcre2_jit_free_unused_memory(pcre2_general_context *);\
|
||||
PCRE2_EXP_DECL \
|
||||
pcre2_jit_stack *pcre2_jit_stack_alloc(pcre2_general_context *, \
|
||||
pcre2_jit_stack *pcre2_jit_stack_create(pcre2_general_context *, \
|
||||
PCRE2_SIZE, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL void pcre2_jit_stack_assign(const pcre2_code *, \
|
||||
pcre2_jit_callback, void *); \
|
||||
|
@ -531,8 +531,8 @@ pcre2_compile are called by application code. */
|
|||
#define pcre2_jit_compile PCRE2_SUFFIX(pcre2_jit_compile_)
|
||||
#define pcre2_jit_match PCRE2_SUFFIX(pcre2_jit_match_)
|
||||
#define pcre2_jit_free_unused_memory PCRE2_SUFFIX(pcre2_jit_free_unused_memory_)
|
||||
#define pcre2_jit_stack_alloc PCRE2_SUFFIX(pcre2_jit_stack_alloc_)
|
||||
#define pcre2_jit_stack_assign PCRE2_SUFFIX(pcre2_jit_stack_assign_)
|
||||
#define pcre2_jit_stack_create PCRE2_SUFFIX(pcre2_jit_stack_create_)
|
||||
#define pcre2_jit_stack_free PCRE2_SUFFIX(pcre2_jit_stack_free_)
|
||||
#define pcre2_maketables PCRE2_SUFFIX(pcre2_maketables_)
|
||||
#define pcre2_match_context_copy PCRE2_SUFFIX(pcre2_match_context_copy_)
|
||||
|
|
|
@ -304,8 +304,8 @@ static const short int escapes[] = {
|
|||
#else
|
||||
|
||||
/* This is the "abnormal" table for EBCDIC systems without UTF-8 support.
|
||||
It runs from 'a' to '9'. For some minimal testing of EBCDIC features, the code
|
||||
is sometimes compiled on an ASCII system. In this case, we must not use CHAR_a
|
||||
It runs from 'a' to '9'. For some minimal testing of EBCDIC features, the code
|
||||
is sometimes compiled on an ASCII system. In this case, we must not use CHAR_a
|
||||
because it is defined as 'a', which of course picks up the ASCII value. */
|
||||
|
||||
#if 'a' == 0x81 /* Check for a real EBCDIC environment */
|
||||
|
@ -7786,7 +7786,7 @@ if (cb.hwm > cb.start_workspace)
|
|||
NULL to indicate that forward references have been filled in. */
|
||||
|
||||
if (cb.workspace_size > COMPILE_WORK_SIZE)
|
||||
ccontext->memctl.free((void *)cb.start_workspace,
|
||||
ccontext->memctl.free((void *)cb.start_workspace,
|
||||
ccontext->memctl.memory_data);
|
||||
cb.start_workspace = NULL;
|
||||
|
||||
|
|
|
@ -221,7 +221,7 @@ static const char match_error_texts[] =
|
|||
"match limit exceeded\0"
|
||||
"no more memory\0"
|
||||
"unknown or unset substring\0"
|
||||
"non-unique substring name\0"
|
||||
"non-unique substring name\0"
|
||||
"NULL argument passed\0"
|
||||
/* 50 */
|
||||
"nested recursion at the same subject position\0"
|
||||
|
|
|
@ -97,7 +97,7 @@ sljit_free_unused_memory_exec();
|
|||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_jit_stack * PCRE2_CALL_CONVENTION
|
||||
pcre2_jit_stack_alloc(pcre2_general_context *gcontext, size_t startsize,
|
||||
pcre2_jit_stack_create(pcre2_general_context *gcontext, size_t startsize,
|
||||
size_t maxsize)
|
||||
{
|
||||
#ifndef SUPPORT_JIT
|
||||
|
|
|
@ -854,7 +854,7 @@ static pcre2_jit_stack_8 *stack8;
|
|||
static pcre2_jit_stack_8 *getstack8(void)
|
||||
{
|
||||
if (!stack8)
|
||||
stack8 = pcre2_jit_stack_alloc_8(NULL, 1, 1024 * 1024);
|
||||
stack8 = pcre2_jit_stack_create_8(NULL, 1, 1024 * 1024);
|
||||
return stack8;
|
||||
}
|
||||
|
||||
|
@ -877,7 +877,7 @@ static pcre2_jit_stack_16 *stack16;
|
|||
static pcre2_jit_stack_16 *getstack16(void)
|
||||
{
|
||||
if (!stack16)
|
||||
stack16 = pcre2_jit_stack_alloc_16(NULL, 1, 1024 * 1024);
|
||||
stack16 = pcre2_jit_stack_create_16(NULL, 1, 1024 * 1024);
|
||||
return stack16;
|
||||
}
|
||||
|
||||
|
@ -900,7 +900,7 @@ static pcre2_jit_stack_32 *stack32;
|
|||
static pcre2_jit_stack_32 *getstack32(void)
|
||||
{
|
||||
if (!stack32)
|
||||
stack32 = pcre2_jit_stack_alloc_32(NULL, 1, 1024 * 1024);
|
||||
stack32 = pcre2_jit_stack_create_32(NULL, 1, 1024 * 1024);
|
||||
return stack32;
|
||||
}
|
||||
|
||||
|
|
|
@ -446,7 +446,7 @@ while (top > bot)
|
|||
if (PRIV(strcmp)(stringname, (last + entrysize + IMM2_SIZE)) != 0) break;
|
||||
last += entrysize;
|
||||
}
|
||||
if (firstptr == NULL)
|
||||
if (firstptr == NULL)
|
||||
return (first == last)? (int)GET2(entry, 0) : PCRE2_ERROR_NOUNIQUESUBSTRING;
|
||||
*firstptr = first;
|
||||
*lastptr = last;
|
||||
|
|
|
@ -3115,7 +3115,7 @@ for (fn = pattern_files; fn != NULL; fn = fn->next)
|
|||
|
||||
#ifdef SUPPORT_PCRE2GREP_JIT
|
||||
if (use_jit)
|
||||
jit_stack = pcre2_jit_stack_alloc(NULL, 32*1024, 1024*1024);
|
||||
jit_stack = pcre2_jit_stack_create(NULL, 32*1024, 1024*1024);
|
||||
#endif
|
||||
|
||||
for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
|
||||
|
|
|
@ -88,7 +88,7 @@ that first, falling back to readline/readline.h. */
|
|||
#endif
|
||||
#endif
|
||||
|
||||
/* Put the test for interactive input into a macro so that it can be changed if
|
||||
/* Put the test for interactive input into a macro so that it can be changed if
|
||||
required for different environments. */
|
||||
|
||||
#define INTERACTIVE(f) isatty(fileno(f))
|
||||
|
@ -822,13 +822,13 @@ are supported. */
|
|||
a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \
|
||||
(pcre2_jit_stack_32 *)i)
|
||||
|
||||
#define PCRE2_JIT_STACK_ALLOC(a,b,c,d) \
|
||||
#define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
|
||||
if (test_mode == PCRE8_MODE) \
|
||||
a = (PCRE2_JIT_STACK *)pcre2_jit_stack_alloc_8(b,c,d); \
|
||||
a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d); \
|
||||
else if (test_mode == PCRE16_MODE) \
|
||||
a = (PCRE2_JIT_STACK *)pcre2_jit_stack_alloc_16(b,c,d); \
|
||||
a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d); \
|
||||
else \
|
||||
a = (PCRE2_JIT_STACK *)pcre2_jit_stack_alloc_32(b,c,d);
|
||||
a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
|
||||
|
||||
#define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
|
||||
if (test_mode == PCRE8_MODE) \
|
||||
|
@ -1200,11 +1200,11 @@ the three different cases. */
|
|||
a = G(pcre2_jit_match_,BITTWO)(G(b,BITTWO),(G(PCRE2_SPTR,BITTWO))c,d,e,f, \
|
||||
G(g,BITTWO),G(h,BITTWO),(G(pcre2_jit_stack_,BITTWO) *)i)
|
||||
|
||||
#define PCRE2_JIT_STACK_ALLOC(a,b,c,d) \
|
||||
#define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
|
||||
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
||||
a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_alloc_,BITONE)(b,c,d); \
|
||||
a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITONE)(b,c,d); \
|
||||
else \
|
||||
a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_alloc_,BITTWO)(b,c,d); \
|
||||
a = (PCRE2_JIT_STACK *)G(pcre2_jit_stack_create_,BITTWO)(b,c,d); \
|
||||
|
||||
#define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
|
||||
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
||||
|
@ -1447,8 +1447,8 @@ the three different cases. */
|
|||
#define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h,(pcre2_jit_stack_8 *)i) \
|
||||
a = pcre2_jit_match_8(G(b,8),(PCRE2_SPTR8)c,d,e,f,G(g,8),G(h,8), \
|
||||
(pcre2_jit_stack_8 *)i)
|
||||
#define PCRE2_JIT_STACK_ALLOC(a,b,c,d) \
|
||||
a = (PCRE2_JIT_STACK *)pcre2_jit_stack_alloc_8(b,c,d);
|
||||
#define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
|
||||
a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_8(b,c,d);
|
||||
#define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
|
||||
pcre2_jit_stack_assign_8(G(a,8),(pcre2_jit_callback_8)b,c);
|
||||
#define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_8((pcre2_jit_stack_8 *)a);
|
||||
|
@ -1526,8 +1526,8 @@ the three different cases. */
|
|||
#define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h,i) \
|
||||
a = pcre2_jit_match_16(G(b,16),(PCRE2_SPTR16)c,d,e,f,G(g,16),G(h,16), \
|
||||
(pcre2_jit_stack_16 *)i)
|
||||
#define PCRE2_JIT_STACK_ALLOC(a,b,c,d) \
|
||||
a = (PCRE2_JIT_STACK *)pcre2_jit_stack_alloc_16(b,c,d);
|
||||
#define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
|
||||
a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_16(b,c,d);
|
||||
#define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
|
||||
pcre2_jit_stack_assign_16(G(a,16),(pcre2_jit_callback_16)b,c);
|
||||
#define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_16((pcre2_jit_stack_16 *)a);
|
||||
|
@ -1605,8 +1605,8 @@ the three different cases. */
|
|||
#define PCRE2_JIT_MATCH(a,b,c,d,e,f,g,h,i) \
|
||||
a = pcre2_jit_match_32(G(b,32),(PCRE2_SPTR32)c,d,e,f,G(g,32),G(h,32), \
|
||||
(pcre2_jit_stack_32 *)i)
|
||||
#define PCRE2_JIT_STACK_ALLOC(a,b,c,d) \
|
||||
a = (PCRE2_JIT_STACK *)pcre2_jit_stack_alloc_32(b,c,d);
|
||||
#define PCRE2_JIT_STACK_CREATE(a,b,c,d) \
|
||||
a = (PCRE2_JIT_STACK *)pcre2_jit_stack_create_32(b,c,d);
|
||||
#define PCRE2_JIT_STACK_ASSIGN(a,b,c) \
|
||||
pcre2_jit_stack_assign_32(G(a,32),(pcre2_jit_callback_32)b,c);
|
||||
#define PCRE2_JIT_STACK_FREE(a) pcre2_jit_stack_free_32((pcre2_jit_stack_32 *)a);
|
||||
|
@ -3681,7 +3681,7 @@ if (!decode_modifiers(p, CTX_PAT, &pat_patctl, NULL)) return PR_SKIP;
|
|||
/* Assume full JIT compile for jitverify and/or jitfast if nothing else was
|
||||
specified. */
|
||||
|
||||
if (pat_patctl.jit == 0 &&
|
||||
if (pat_patctl.jit == 0 &&
|
||||
(pat_patctl.control & (CTL_JITVERIFY|CTL_JITFAST)) != 0)
|
||||
pat_patctl.jit = 7;
|
||||
utf = (pat_patctl.options & PCRE2_UTF) != 0;
|
||||
|
@ -3996,7 +3996,7 @@ for (;;)
|
|||
if ((pat_patctl.control & CTL_JITFAST) != 0)
|
||||
PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
|
||||
dat_datctl.options, match_data, dat_context, jit_stack);
|
||||
else
|
||||
else
|
||||
PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
|
||||
dat_datctl.options, match_data, dat_context);
|
||||
|
||||
|
@ -4637,7 +4637,7 @@ if (dat_datctl.jitstack != 0)
|
|||
if (dat_datctl.jitstack != jit_stack_size)
|
||||
{
|
||||
PCRE2_JIT_STACK_FREE(jit_stack);
|
||||
PCRE2_JIT_STACK_ALLOC(jit_stack, NULL, 1, dat_datctl.jitstack * 1024);
|
||||
PCRE2_JIT_STACK_CREATE(jit_stack, NULL, 1, dat_datctl.jitstack * 1024);
|
||||
jit_stack_size = dat_datctl.jitstack;
|
||||
}
|
||||
PCRE2_JIT_STACK_ASSIGN(compiled_code, jit_callback, jit_stack);
|
||||
|
@ -4690,10 +4690,10 @@ for (gmatched = 0;; gmatched++)
|
|||
PCRE2_SIZE ovecsave[2];
|
||||
|
||||
ovector = FLD(match_data, ovector);
|
||||
|
||||
|
||||
/* When matching is via pcre2_match(), we will detect the use of JIT via the
|
||||
stack callback function. */
|
||||
|
||||
|
||||
jit_was_used = (pat_patctl.control & CTL_JITFAST) != 0;
|
||||
|
||||
/* After the first time round a global loop, save the current ovector[0,1] so
|
||||
|
@ -4722,7 +4722,7 @@ for (gmatched = 0;; gmatched++)
|
|||
}
|
||||
if (dfa_workspace == NULL)
|
||||
dfa_workspace = (int *)malloc(DFA_WS_DIMENSION*sizeof(int));
|
||||
start_time = clock();
|
||||
start_time = clock();
|
||||
for (i = 0; i < timeitm; i++)
|
||||
{
|
||||
PCRE2_DFA_MATCH(capcount, compiled_code, pp, ulen,
|
||||
|
@ -4730,7 +4730,7 @@ for (gmatched = 0;; gmatched++)
|
|||
dat_context, dfa_workspace, DFA_WS_DIMENSION);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
else if ((pat_patctl.control & CTL_JITFAST) != 0)
|
||||
{
|
||||
start_time = clock();
|
||||
|
@ -4740,9 +4740,9 @@ for (gmatched = 0;; gmatched++)
|
|||
dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
|
||||
dat_context, jit_stack);
|
||||
}
|
||||
}
|
||||
|
||||
else
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
start_time = clock();
|
||||
for (i = 0; i < timeitm; i++)
|
||||
|
@ -4751,7 +4751,7 @@ for (gmatched = 0;; gmatched++)
|
|||
dat_datctl.offset, dat_datctl.options | g_notempty, match_data,
|
||||
dat_context);
|
||||
}
|
||||
}
|
||||
}
|
||||
total_match_time += (time_taken = clock() - start_time);
|
||||
fprintf(outfile, "Match time %.4f milliseconds\n",
|
||||
(((double)time_taken * 1000.0) / (double)timeitm) /
|
||||
|
@ -4809,7 +4809,7 @@ for (gmatched = 0;; gmatched++)
|
|||
if ((pat_patctl.control & CTL_JITFAST) != 0)
|
||||
PCRE2_JIT_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
|
||||
dat_datctl.options | g_notempty, match_data, dat_context, jit_stack);
|
||||
else
|
||||
else
|
||||
PCRE2_MATCH(capcount, compiled_code, pp, ulen, dat_datctl.offset,
|
||||
dat_datctl.options | g_notempty, match_data, dat_context);
|
||||
if (capcount == 0)
|
||||
|
|
|
@ -245,6 +245,11 @@ Minimum match limit = 6
|
|||
0: aabbccddee (JIT)
|
||||
1: aa
|
||||
2: cc
|
||||
3: ee
|
||||
aabbccddee\=jitstack=1
|
||||
0: aabbccddee (JIT)
|
||||
1: aa
|
||||
2: cc
|
||||
3: ee
|
||||
|
||||
/(a+)*zz/
|
||||
|
|
Loading…
Reference in New Issue