Add pcre2_code_copy_with_tables().
This commit is contained in:
parent
43e541adda
commit
2aec84e37e
|
@ -181,6 +181,9 @@ wrong name.
|
|||
|
||||
27. In pcre2test, give some offset information for errors in hex patterns.
|
||||
|
||||
28. Implemented pcre2_code_copy_with_tables(), and added pushtablescopy to
|
||||
pcre2test for testing it.
|
||||
|
||||
|
||||
Version 10.22 29-July-2016
|
||||
--------------------------
|
||||
|
|
|
@ -25,6 +25,7 @@ dist_html_DATA = \
|
|||
doc/html/pcre2.html \
|
||||
doc/html/pcre2_callout_enumerate.html \
|
||||
doc/html/pcre2_code_copy.html \
|
||||
doc/html/pcre2_code_copy_with_tables.html \
|
||||
doc/html/pcre2_code_free.html \
|
||||
doc/html/pcre2_compile.html \
|
||||
doc/html/pcre2_compile_context_copy.html \
|
||||
|
@ -107,6 +108,7 @@ dist_man_MANS = \
|
|||
doc/pcre2.3 \
|
||||
doc/pcre2_callout_enumerate.3 \
|
||||
doc/pcre2_code_copy.3 \
|
||||
doc/pcre2_code_copy_with_tables.3 \
|
||||
doc/pcre2_code_free.3 \
|
||||
doc/pcre2_compile.3 \
|
||||
doc/pcre2_compile_context_copy.3 \
|
||||
|
|
|
@ -174,7 +174,11 @@ can skip ahead to the CMake section.
|
|||
|
||||
(11) If you want to use the pcre2grep command, compile and link
|
||||
src/pcre2grep.c; it uses only the basic 8-bit PCRE2 library (it does not
|
||||
need the pcre2posix library).
|
||||
need the pcre2posix library). If you have built the PCRE2 library with JIT
|
||||
support by defining SUPPORT_JIT in src/config.h, you can also define
|
||||
SUPPORT_PCRE2GREP_JIT, which causes pcre2grep to make use of JIT (unless
|
||||
it is run with --no-jit). If you define SUPPORT_PCRE2GREP_JIT without
|
||||
defining SUPPORT_JIT, pcre2grep does not try to make use of JIT.
|
||||
|
||||
|
||||
STACK SIZE IN WINDOWS ENVIRONMENTS
|
||||
|
@ -389,4 +393,4 @@ and executable, is in EBCDIC and native z/OS file formats and this is the
|
|||
recommended download site.
|
||||
|
||||
=============================
|
||||
Last Updated: 16 July 2015
|
||||
Last Updated: 13 October 2016
|
||||
|
|
|
@ -44,7 +44,7 @@ wrappers.
|
|||
|
||||
The distribution does contain a set of C wrapper functions for the 8-bit
|
||||
library that are based on the POSIX regular expression API (see the pcre2posix
|
||||
man page). These can be found in a library called libpcre2posix. Note that this
|
||||
man page). These can be found in a library called libpcre2-posix. Note that this
|
||||
just provides a POSIX calling interface to PCRE2; the regular expressions
|
||||
themselves still follow Perl syntax and semantics. The POSIX API is restricted,
|
||||
and does not give full access to all of PCRE2's facilities.
|
||||
|
@ -58,8 +58,8 @@ renamed or pointed at by a link.
|
|||
If you are using the POSIX interface to PCRE2 and there is already a POSIX
|
||||
regex library installed on your system, as well as worrying about the regex.h
|
||||
header file (as mentioned above), you must also take care when linking programs
|
||||
to ensure that they link with PCRE2's libpcre2posix library. Otherwise they may
|
||||
pick up the POSIX functions of the same name from the other library.
|
||||
to ensure that they link with PCRE2's libpcre2-posix library. Otherwise they
|
||||
may pick up the POSIX functions of the same name from the other library.
|
||||
|
||||
One way of avoiding this confusion is to compile PCRE2 with the addition of
|
||||
-Dregcomp=PCRE2regcomp (and similarly for the other POSIX functions) to the
|
||||
|
@ -204,13 +204,6 @@ library. They are also documented in the pcre2build man page.
|
|||
--enable-newline-is-crlf, --enable-newline-is-anycrlf, or
|
||||
--enable-newline-is-any to the "configure" command, respectively.
|
||||
|
||||
If you specify --enable-newline-is-cr or --enable-newline-is-crlf, some of
|
||||
the standard tests will fail, because the lines in the test files end with
|
||||
LF. Even if the files are edited to change the line endings, there are likely
|
||||
to be some failures. With --enable-newline-is-anycrlf or
|
||||
--enable-newline-is-any, many tests should succeed, but there may be some
|
||||
failures.
|
||||
|
||||
. By default, the sequence \R in a pattern matches any Unicode line ending
|
||||
sequence. This is independent of the option specifying what PCRE2 considers
|
||||
to be the end of a line (see above). However, the caller of PCRE2 can
|
||||
|
@ -253,13 +246,13 @@ library. They are also documented in the pcre2build man page.
|
|||
sizes in the pcre2stack man page.
|
||||
|
||||
. In the 8-bit library, the default maximum compiled pattern size is around
|
||||
64K. You can increase this by adding --with-link-size=3 to the "configure"
|
||||
command. PCRE2 then uses three bytes instead of two for offsets to different
|
||||
parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is
|
||||
the same as --with-link-size=4, which (in both libraries) uses four-byte
|
||||
offsets. Increasing the internal link size reduces performance in the 8-bit
|
||||
and 16-bit libraries. In the 32-bit library, the link size setting is
|
||||
ignored, as 4-byte offsets are always used.
|
||||
64K bytes. You can increase this by adding --with-link-size=3 to the
|
||||
"configure" command. PCRE2 then uses three bytes instead of two for offsets
|
||||
to different parts of the compiled pattern. In the 16-bit library,
|
||||
--with-link-size=3 is the same as --with-link-size=4, which (in both
|
||||
libraries) uses four-byte offsets. Increasing the internal link size reduces
|
||||
performance in the 8-bit and 16-bit libraries. In the 32-bit library, the
|
||||
link size setting is ignored, as 4-byte offsets are always used.
|
||||
|
||||
. You can build PCRE2 so that its internal match() function that is called from
|
||||
pcre2_match() does not call itself recursively. Instead, it uses memory
|
||||
|
@ -339,12 +332,23 @@ library. They are also documented in the pcre2build man page.
|
|||
|
||||
Of course, the relevant libraries must be installed on your system.
|
||||
|
||||
. The default size (in bytes) of the internal buffer used by pcre2grep can be
|
||||
set by, for example:
|
||||
. The default starting size (in bytes) of the internal buffer used by pcre2grep
|
||||
can be set by, for example:
|
||||
|
||||
--with-pcre2grep-bufsize=51200
|
||||
|
||||
The value must be a plain integer. The default is 20480.
|
||||
The value must be a plain integer. The default is 20480. The amount of memory
|
||||
used by pcre2grep is actually three times this number, to allow for "before"
|
||||
and "after" lines. If very long lines are encountered, the buffer is
|
||||
automatically enlarged, up to a fixed maximum size.
|
||||
|
||||
. The default maximum size of pcre2grep's internal buffer can be set by, for
|
||||
example:
|
||||
|
||||
--with-pcre2grep-max-bufsize=2097152
|
||||
|
||||
The default is either 1048576 or the value of --with-pcre2grep-bufsize,
|
||||
whichever is the larger.
|
||||
|
||||
. It is possible to compile pcre2test so that it links with the libreadline
|
||||
or libedit libraries, by specifying, respectively,
|
||||
|
@ -369,6 +373,22 @@ library. They are also documented in the pcre2build man page.
|
|||
tgetflag, or tgoto, this is the problem, and linking with the ncurses library
|
||||
should fix it.
|
||||
|
||||
. There is a special option called --enable-fuzz-support for use by people who
|
||||
want to run fuzzing tests on PCRE2. At present this applies only to the 8-bit
|
||||
library. If set, it causes an extra library called libpcre2-fuzzsupport.a to
|
||||
be built, but not installed. This contains a single function called
|
||||
LLVMFuzzerTestOneInput() whose arguments are a pointer to a string and the
|
||||
length of the string. When called, this function tries to compile the string
|
||||
as a pattern, and if that succeeds, to match it. This is done both with no
|
||||
options and with some random options bits that are generated from the string.
|
||||
Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck to
|
||||
be created. This is normally run under valgrind or used when PCRE2 is
|
||||
compiled with address sanitizing enabled. It calls the fuzzing function and
|
||||
outputs information about it is doing. The input strings are specified by
|
||||
arguments: if an argument starts with "=" the rest of it is a literal input
|
||||
string. Otherwise, it is assumed to be a file name, and the contents of the
|
||||
file are the test string.
|
||||
|
||||
The "configure" script builds the following files for the basic C library:
|
||||
|
||||
. Makefile the makefile that builds the library
|
||||
|
@ -543,7 +563,7 @@ script creates the .txt and HTML forms of the documentation from the man pages.
|
|||
|
||||
|
||||
Testing PCRE2
|
||||
------------
|
||||
-------------
|
||||
|
||||
To test the basic PCRE2 library on a Unix-like system, run the RunTest script.
|
||||
There is another script called RunGrepTest that tests the pcre2grep command.
|
||||
|
@ -757,6 +777,7 @@ The distribution should contain the files listed below.
|
|||
src/pcre2_xclass.c )
|
||||
|
||||
src/pcre2_printint.c debugging function that is used by pcre2test,
|
||||
src/pcre2_fuzzsupport.c function for (optional) fuzzing support
|
||||
|
||||
src/config.h.in template for config.h, when built by "configure"
|
||||
src/pcre2.h.in template for pcre2.h when built by "configure"
|
||||
|
@ -814,7 +835,7 @@ The distribution should contain the files listed below.
|
|||
libpcre2-8.pc.in template for libpcre2-8.pc for pkg-config
|
||||
libpcre2-16.pc.in template for libpcre2-16.pc for pkg-config
|
||||
libpcre2-32.pc.in template for libpcre2-32.pc for pkg-config
|
||||
libpcre2posix.pc.in template for libpcre2posix.pc for pkg-config
|
||||
libpcre2-posix.pc.in template for libpcre2-posix.pc for pkg-config
|
||||
ltmain.sh file used to build a libtool script
|
||||
missing ) common stub for a few missing GNU programs while
|
||||
) installing, generated by automake
|
||||
|
@ -845,4 +866,4 @@ The distribution should contain the files listed below.
|
|||
Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
Last updated: 01 April 2016
|
||||
Last updated: 01 November 2016
|
||||
|
|
|
@ -94,6 +94,9 @@ in the library.
|
|||
<tr><td><a href="pcre2_code_copy.html">pcre2_code_copy</a></td>
|
||||
<td> Copy a compiled pattern</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_code_copy_with_tables.html">pcre2_code_copy_with_tables</a></td>
|
||||
<td> Copy a compiled pattern and its character tables</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_code_free.html">pcre2_code_free</a></td>
|
||||
<td> Free a compiled pattern</td></tr>
|
||||
|
||||
|
|
|
@ -28,8 +28,9 @@ DESCRIPTION
|
|||
This function makes a copy of the memory used for a compiled pattern, excluding
|
||||
any memory used by the JIT compiler. Without a subsequent call to
|
||||
<b>pcre2_jit_compile()</b>, the copy can be used only for non-JIT matching. The
|
||||
yield of the function is NULL if <i>code</i> is NULL or if sufficient memory
|
||||
cannot be obtained.
|
||||
pointer to the character tables is copied, not the tables themselves (see
|
||||
<b>pcre2_code_copy_with_tables()</b>). The yield of the function is NULL if
|
||||
<i>code</i> is NULL or if sufficient memory cannot be obtained.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre2_code_copy_with_tables specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_code_copy_with_tables man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *<i>code</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function makes a copy of the memory used for a compiled pattern, excluding
|
||||
any memory used by the JIT compiler. Without a subsequent call to
|
||||
<b>pcre2_jit_compile()</b>, the copy can be used only for non-JIT matching.
|
||||
Unlike <b>pcre2_code_copy()</b>, a separate copy of the character tables is also
|
||||
made, with the new code pointing to it. This memory will be automatically freed
|
||||
when <b>pcre2_code_free()</b> is called. The yield of the function is NULL if
|
||||
<i>code</i> is NULL or if sufficient memory cannot be obtained.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
|
@ -26,8 +26,11 @@ SYNOPSIS
|
|||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function sets, in a compile context, the maximum length (in code units) of
|
||||
the pattern that can be compiled. The result is always zero.
|
||||
This function sets, in a compile context, the maximum text length (in code
|
||||
units) of the pattern that can be compiled. The result is always zero. If a
|
||||
longer pattern is passed to <b>pcre2_compile()</b> there is an immediate error
|
||||
return. The default is effectively unlimited, being the largest value a
|
||||
PCRE2_SIZE variable can hold.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
|
|
|
@ -294,6 +294,9 @@ document for an overview of all the PCRE2 documentation.
|
|||
<b>pcre2_code *pcre2_code_copy(const pcre2_code *<i>code</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *<i>code</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre2_get_error_message(int <i>errorcode</i>, PCRE2_UCHAR *<i>buffer</i>,</b>
|
||||
<b> PCRE2_SIZE <i>bufflen</i>);</b>
|
||||
<br>
|
||||
|
@ -567,8 +570,9 @@ If JIT is being used, but the JIT compilation is not being done immediately,
|
|||
(perhaps waiting to see if the pattern is used often enough) similar logic is
|
||||
required. JIT compilation updates a pointer within the compiled code block, so
|
||||
a thread must gain unique write access to the pointer before calling
|
||||
<b>pcre2_jit_compile()</b>. Alternatively, <b>pcre2_code_copy()</b> can be used
|
||||
to obtain a private copy of the compiled code.
|
||||
<b>pcre2_jit_compile()</b>. Alternatively, <b>pcre2_code_copy()</b> or
|
||||
<b>pcre2_code_copy_with_tables()</b> can be used to obtain a private copy of the
|
||||
compiled code.
|
||||
</P>
|
||||
<br><b>
|
||||
Context blocks
|
||||
|
@ -736,7 +740,8 @@ functions, <i>pcre2_match()</i> and <i>pcre2_dfa_match()</i>.
|
|||
<br>
|
||||
This parameter ajusts the limit, set when PCRE2 is built (default 250), on the
|
||||
depth of parenthesis nesting in a pattern. This limit stops rogue patterns
|
||||
using up too much system stack when being compiled.
|
||||
using up too much system stack when being compiled. The limit applies to
|
||||
parentheses of all kinds, not just capturing parentheses.
|
||||
<b>int pcre2_set_compile_recursion_guard(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||
<b> int (*<i>guard_function</i>)(uint32_t, void *), void *<i>user_data</i>);</b>
|
||||
<br>
|
||||
|
@ -1058,6 +1063,9 @@ zero.
|
|||
<br>
|
||||
<br>
|
||||
<b>pcre2_code *pcre2_code_copy(const pcre2_code *<i>code</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *<i>code</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
The <b>pcre2_compile()</b> function compiles a pattern into an internal form.
|
||||
|
@ -1079,9 +1087,22 @@ if the code has been processed by the JIT compiler (see
|
|||
<a href="#jitcompiling">below),</a>
|
||||
the JIT information cannot be copied (because it is position-dependent).
|
||||
The new copy can initially be used only for non-JIT matching, though it can be
|
||||
passed to <b>pcre2_jit_compile()</b> if required. The <b>pcre2_code_copy()</b>
|
||||
function provides a way for individual threads in a multithreaded application
|
||||
to acquire a private copy of shared compiled code.
|
||||
passed to <b>pcre2_jit_compile()</b> if required.
|
||||
</P>
|
||||
<P>
|
||||
The <b>pcre2_code_copy()</b> function provides a way for individual threads in a
|
||||
multithreaded application to acquire a private copy of shared compiled code.
|
||||
However, it does not make a copy of the character tables used by the compiled
|
||||
pattern; the new pattern code points to the same tables as the original code.
|
||||
(See
|
||||
<a href="#jitcompiling">"Locale Support"</a>
|
||||
below for details of these character tables.) In many applications the same
|
||||
tables are used throughout, so this behaviour is appropriate. Nevertheless,
|
||||
there are occasions when a copy of a compiled pattern and the relevant tables
|
||||
are needed. The <b>pcre2_code_copy_with_tables()</b> provides this facility.
|
||||
Copies of both the code and the tables are made, with the new code pointing to
|
||||
the new tables. The memory for the new tables is automatically freed when
|
||||
<b>pcre2_code_free()</b> is called for the new copy of the compiled code.
|
||||
</P>
|
||||
<P>
|
||||
NOTE: When one of the matching functions is called, pointers to the compiled
|
||||
|
@ -1122,6 +1143,13 @@ error has occurred. The values are not defined when compilation is successful
|
|||
and <b>pcre2_compile()</b> returns a non-NULL value.
|
||||
</P>
|
||||
<P>
|
||||
The value returned in <i>erroroffset</i> is an indication of where in the
|
||||
pattern the error occurred. It is not necessarily the furthest point in the
|
||||
pattern that was read. For example, after the error "lookbehind assertion is
|
||||
not fixed length", the error offset points to the start of the failing
|
||||
assertion.
|
||||
</P>
|
||||
<P>
|
||||
The <b>pcre2_get_error_message()</b> function (see "Obtaining a textual error
|
||||
message"
|
||||
<a href="#geterrormessage">below)</a>
|
||||
|
@ -1215,8 +1243,8 @@ recognized, exactly as in the rest of the pattern.
|
|||
PCRE2_AUTO_CALLOUT
|
||||
</pre>
|
||||
If this bit is set, <b>pcre2_compile()</b> automatically inserts callout items,
|
||||
all with number 255, before each pattern item. For discussion of the callout
|
||||
facility, see the
|
||||
all with number 255, before each pattern item, except immediately before or
|
||||
after a callout in the pattern. For discussion of the callout facility, see the
|
||||
<a href="pcre2callout.html"><b>pcre2callout</b></a>
|
||||
documentation.
|
||||
<pre>
|
||||
|
@ -3235,7 +3263,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC41" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 17 June 2016
|
||||
Last updated: 22 November 2016
|
||||
<br>
|
||||
Copyright © 1997-2016 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -34,9 +34,10 @@ please consult the man page, in case the conversion went wrong.
|
|||
<li><a name="TOC19" href="#SEC19">INCLUDING DEBUGGING CODE</a>
|
||||
<li><a name="TOC20" href="#SEC20">DEBUGGING WITH VALGRIND SUPPORT</a>
|
||||
<li><a name="TOC21" href="#SEC21">CODE COVERAGE REPORTING</a>
|
||||
<li><a name="TOC22" href="#SEC22">SEE ALSO</a>
|
||||
<li><a name="TOC23" href="#SEC23">AUTHOR</a>
|
||||
<li><a name="TOC24" href="#SEC24">REVISION</a>
|
||||
<li><a name="TOC22" href="#SEC22">SUPPORT FOR FUZZERS</a>
|
||||
<li><a name="TOC23" href="#SEC23">SEE ALSO</a>
|
||||
<li><a name="TOC24" href="#SEC24">AUTHOR</a>
|
||||
<li><a name="TOC25" href="#SEC25">REVISION</a>
|
||||
</ul>
|
||||
<br><a name="SEC1" href="#TOC1">BUILDING PCRE2</a><br>
|
||||
<P>
|
||||
|
@ -376,16 +377,19 @@ they are not.
|
|||
<P>
|
||||
<b>pcre2grep</b> uses an internal buffer to hold a "window" on the file it is
|
||||
scanning, in order to be able to output "before" and "after" lines when it
|
||||
finds a match. The size of the buffer is controlled by a parameter whose
|
||||
default value is 20K. The buffer itself is three times this size, but because
|
||||
of the way it is used for holding "before" lines, the longest line that is
|
||||
guaranteed to be processable is the parameter size. You can change the default
|
||||
parameter value by adding, for example,
|
||||
finds a match. The starting size of the buffer is controlled by a parameter
|
||||
whose default value is 20K. The buffer itself is three times this size, but
|
||||
because of the way it is used for holding "before" lines, the longest line that
|
||||
is guaranteed to be processable is the parameter size. If a longer line is
|
||||
encountered, <b>pcre2grep</b> automatically expands the buffer, up to a
|
||||
specified maximum size, whose default is 1M or the starting size, whichever is
|
||||
the larger. You can change the default parameter values by adding, for example,
|
||||
<pre>
|
||||
--with-pcre2grep-bufsize=50K
|
||||
--with-pcre2grep-bufsize=51200
|
||||
--with-pcre2grep-max-bufsize=2097152
|
||||
</pre>
|
||||
to the <b>configure</b> command. The caller of \fPpcre2grep\fP can override this
|
||||
value by using --buffer-size on the command line.
|
||||
to the <b>configure</b> command. The caller of \fPpcre2grep\fP can override
|
||||
these values by using --buffer-size and --max-buffer-size on the command line.
|
||||
</P>
|
||||
<br><a name="SEC18" href="#TOC1">PCRE2TEST OPTION FOR LIBREADLINE SUPPORT</a><br>
|
||||
<P>
|
||||
|
@ -497,11 +501,32 @@ This cleans all coverage data including the generated coverage report. For more
|
|||
information about code coverage, see the <b>gcov</b> and <b>lcov</b>
|
||||
documentation.
|
||||
</P>
|
||||
<br><a name="SEC22" href="#TOC1">SEE ALSO</a><br>
|
||||
<br><a name="SEC22" href="#TOC1">SUPPORT FOR FUZZERS</a><br>
|
||||
<P>
|
||||
There is a special option for use by people who want to run fuzzing tests on
|
||||
PCRE2:
|
||||
<pre>
|
||||
--enable-fuzz-support
|
||||
</pre>
|
||||
At present this applies only to the 8-bit library. If set, it causes an extra
|
||||
library called libpcre2-fuzzsupport.a to be built, but not installed. This
|
||||
contains a single function called LLVMFuzzerTestOneInput() whose arguments are
|
||||
a pointer to a string and the length of the string. When called, this function
|
||||
tries to compile the string as a pattern, and if that succeeds, to match it.
|
||||
This is done both with no options and with some random options bits that are
|
||||
generated from the string. Setting --enable-fuzz-support also causes a binary
|
||||
called <b>pcre2fuzzcheck</b> to be created. This is normally run under valgrind
|
||||
or used when PCRE2 is compiled with address sanitizing enabled. It calls the
|
||||
fuzzing function and outputs information about it is doing. The input strings
|
||||
are specified by arguments: if an argument starts with "=" the rest of it is a
|
||||
literal input string. Otherwise, it is assumed to be a file name, and the
|
||||
contents of the file are the test string.
|
||||
</P>
|
||||
<br><a name="SEC23" href="#TOC1">SEE ALSO</a><br>
|
||||
<P>
|
||||
<b>pcre2api</b>(3), <b>pcre2-config</b>(3).
|
||||
</P>
|
||||
<br><a name="SEC23" href="#TOC1">AUTHOR</a><br>
|
||||
<br><a name="SEC24" href="#TOC1">AUTHOR</a><br>
|
||||
<P>
|
||||
Philip Hazel
|
||||
<br>
|
||||
|
@ -510,9 +535,9 @@ University Computing Service
|
|||
Cambridge, England.
|
||||
<br>
|
||||
</P>
|
||||
<br><a name="SEC24" href="#TOC1">REVISION</a><br>
|
||||
<br><a name="SEC25" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 01 April 2016
|
||||
Last updated: 01 November 2016
|
||||
<br>
|
||||
Copyright © 1997-2016 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -57,11 +57,20 @@ two callout points:
|
|||
</pre>
|
||||
If the PCRE2_AUTO_CALLOUT option bit is set when a pattern is compiled, PCRE2
|
||||
automatically inserts callouts, all with number 255, before each item in the
|
||||
pattern. For example, if PCRE2_AUTO_CALLOUT is used with the pattern
|
||||
pattern except for immediately before or after a callout item in the pattern.
|
||||
For example, if PCRE2_AUTO_CALLOUT is used with the pattern
|
||||
<pre>
|
||||
A(?C3)B
|
||||
</pre>
|
||||
it is processed as if it were
|
||||
<pre>
|
||||
(?C255)A(?C3)B(?C255)
|
||||
</pre>
|
||||
Here is a more complicated example:
|
||||
<pre>
|
||||
A(\d{2}|--)
|
||||
</pre>
|
||||
it is processed as if it were
|
||||
With PCRE2_AUTO_CALLOUT, this pattern is processed as if it were
|
||||
<br>
|
||||
<br>
|
||||
(?C255)A(?C255)((?C255)\d{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255)
|
||||
|
@ -107,10 +116,10 @@ with PCRE2_ANCHORED and PCRE2_AUTO_CALLOUT and then applied to the string
|
|||
No match
|
||||
</pre>
|
||||
This indicates that when matching [bc] fails, there is no backtracking into a+
|
||||
and therefore the callouts that would be taken for the backtracks do not occur.
|
||||
You can disable the auto-possessify feature by passing PCRE2_NO_AUTO_POSSESS to
|
||||
<b>pcre2_compile()</b>, or starting the pattern with (*NO_AUTO_POSSESS). In this
|
||||
case, the output changes to this:
|
||||
(because it is being treated as a++) and therefore the callouts that would be
|
||||
taken for the backtracks do not occur. You can disable the auto-possessify
|
||||
feature by passing PCRE2_NO_AUTO_POSSESS to <b>pcre2_compile()</b>, or starting
|
||||
the pattern with (*NO_AUTO_POSSESS). In this case, the output changes to this:
|
||||
<pre>
|
||||
--->aaaa
|
||||
+0 ^ a+
|
||||
|
@ -235,8 +244,8 @@ Fields for numerical callouts
|
|||
<P>
|
||||
For a numerical callout, <i>callout_string</i> is NULL, and <i>callout_number</i>
|
||||
contains the number of the callout, in the range 0-255. This is the number
|
||||
that follows (?C for manual callouts; it is 255 for automatically generated
|
||||
callouts.
|
||||
that follows (?C for callouts that part of the pattern; it is 255 for
|
||||
automatically generated callouts.
|
||||
</P>
|
||||
<br><b>
|
||||
Fields for string callouts
|
||||
|
@ -310,10 +319,15 @@ the next item to be matched.
|
|||
</P>
|
||||
<P>
|
||||
The <i>next_item_length</i> field contains the length of the next item to be
|
||||
matched in the pattern string. When the callout immediately precedes an
|
||||
alternation bar, a closing parenthesis, or the end of the pattern, the length
|
||||
is zero. When the callout precedes an opening parenthesis, the length is that
|
||||
of the entire subpattern.
|
||||
processed in the pattern string. When the callout is at the end of the pattern,
|
||||
the length is zero. When the callout precedes an opening parenthesis, the
|
||||
length includes meta characters that follow the parenthesis. For example, in a
|
||||
callout before an assertion such as (?=ab) the length is 3. For an an
|
||||
alternation bar or a closing parenthesis, the length is one, unless a closing
|
||||
parenthesis is followed by a quantifier, in which case its length is included.
|
||||
(This changed in release 10.23. In earlier releases, before an opening
|
||||
parenthesis the length was that of the entire subpattern, and before an
|
||||
alternation bar or a closing parenthesis the length was zero.)
|
||||
</P>
|
||||
<P>
|
||||
The <i>pattern_position</i> and <i>next_item_length</i> fields are intended to
|
||||
|
@ -399,9 +413,9 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC8" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 23 March 2015
|
||||
Last updated: 29 September 2016
|
||||
<br>
|
||||
Copyright © 1997-2015 University of Cambridge.
|
||||
Copyright © 1997-2016 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
|
|
|
@ -107,7 +107,7 @@ processed as anchored at the point where they are tested.
|
|||
one that is backtracked onto acts. For example, in the pattern
|
||||
A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure in C
|
||||
triggers (*PRUNE). Perl's behaviour is more complex; in many cases it is the
|
||||
same as PCRE2, but there are examples where it differs.
|
||||
same as PCRE2, but there are cases where it differs.
|
||||
</P>
|
||||
<P>
|
||||
11. Most backtracking verbs in assertions have their normal actions. They are
|
||||
|
@ -123,7 +123,7 @@ the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE2 it is set to
|
|||
13. PCRE2's handling of duplicate subpattern numbers and duplicate subpattern
|
||||
names is not as general as Perl's. This is a consequence of the fact the PCRE2
|
||||
works internally just with numbers, using an external table to translate
|
||||
between numbers and names. In particular, a pattern such as (?|(?<a>A)|(?<b)B),
|
||||
between numbers and names. In particular, a pattern such as (?|(?<a>A)|(?<b>B),
|
||||
where the two capturing parentheses have the same number but different names,
|
||||
is not supported, and causes an error at compile time. If it were allowed, it
|
||||
would not be possible to distinguish which parentheses matched, because both
|
||||
|
@ -131,10 +131,11 @@ names map to capturing subpattern number 1. To avoid this confusing situation,
|
|||
an error is given at compile time.
|
||||
</P>
|
||||
<P>
|
||||
14. Perl recognizes comments in some places that PCRE2 does not, for example,
|
||||
between the ( and ? at the start of a subpattern. If the /x modifier is set,
|
||||
Perl allows white space between ( and ? (though current Perls warn that this is
|
||||
deprecated) but PCRE2 never does, even if the PCRE2_EXTENDED option is set.
|
||||
14. Perl used to recognize comments in some places that PCRE2 does not, for
|
||||
example, between the ( and ? at the start of a subpattern. If the /x modifier
|
||||
is set, Perl allowed white space between ( and ? though the latest Perls give
|
||||
an error (for a while it was just deprecated). There may still be some cases
|
||||
where Perl behaves differently.
|
||||
</P>
|
||||
<P>
|
||||
15. Perl, when in warning mode, gives warnings for character classes such as
|
||||
|
@ -161,42 +162,47 @@ each alternative branch of a lookbehind assertion can match a different length
|
|||
of string. Perl requires them all to have the same length.
|
||||
<br>
|
||||
<br>
|
||||
(b) If PCRE2_DOLLAR_ENDONLY is set and PCRE2_MULTILINE is not set, the $
|
||||
(b) From PCRE2 10.23, back references to groups of fixed length are supported
|
||||
in lookbehinds, provided that there is no possibility of referencing a
|
||||
non-unique number or name. Perl does not support backreferences in lookbehinds.
|
||||
<br>
|
||||
<br>
|
||||
(c) If PCRE2_DOLLAR_ENDONLY is set and PCRE2_MULTILINE is not set, the $
|
||||
meta-character matches only at the very end of the string.
|
||||
<br>
|
||||
<br>
|
||||
(c) A backslash followed by a letter with no special meaning is faulted. (Perl
|
||||
(d) A backslash followed by a letter with no special meaning is faulted. (Perl
|
||||
can be made to issue a warning.)
|
||||
<br>
|
||||
<br>
|
||||
(d) If PCRE2_UNGREEDY is set, the greediness of the repetition quantifiers is
|
||||
(e) If PCRE2_UNGREEDY is set, the greediness of the repetition quantifiers is
|
||||
inverted, that is, by default they are not greedy, but if followed by a
|
||||
question mark they are.
|
||||
<br>
|
||||
<br>
|
||||
(e) PCRE2_ANCHORED can be used at matching time to force a pattern to be tried
|
||||
(f) PCRE2_ANCHORED can be used at matching time to force a pattern to be tried
|
||||
only at the first matching position in the subject string.
|
||||
<br>
|
||||
<br>
|
||||
(f) The PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, and
|
||||
(g) The PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, and
|
||||
PCRE2_NO_AUTO_CAPTURE options have no Perl equivalents.
|
||||
<br>
|
||||
<br>
|
||||
(g) The \R escape sequence can be restricted to match only CR, LF, or CRLF
|
||||
(h) The \R escape sequence can be restricted to match only CR, LF, or CRLF
|
||||
by the PCRE2_BSR_ANYCRLF option.
|
||||
<br>
|
||||
<br>
|
||||
(h) The callout facility is PCRE2-specific.
|
||||
(i) The callout facility is PCRE2-specific.
|
||||
<br>
|
||||
<br>
|
||||
(i) The partial matching facility is PCRE2-specific.
|
||||
(j) The partial matching facility is PCRE2-specific.
|
||||
<br>
|
||||
<br>
|
||||
(j) The alternative matching function (<b>pcre2_dfa_match()</b> matches in a
|
||||
(k) The alternative matching function (<b>pcre2_dfa_match()</b> matches in a
|
||||
different way and is not Perl-compatible.
|
||||
<br>
|
||||
<br>
|
||||
(k) PCRE2 recognizes some special sequences such as (*CR) at the start of
|
||||
(l) PCRE2 recognizes some special sequences such as (*CR) at the start of
|
||||
a pattern that set overall options that cannot be changed within the pattern.
|
||||
</P>
|
||||
<br><b>
|
||||
|
@ -214,9 +220,9 @@ Cambridge, England.
|
|||
REVISION
|
||||
</b><br>
|
||||
<P>
|
||||
Last updated: 15 March 2015
|
||||
Last updated: 18 October 2016
|
||||
<br>
|
||||
Copyright © 1997-2015 University of Cambridge.
|
||||
Copyright © 1997-2016 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
|
|
|
@ -80,11 +80,19 @@ span line boundaries. What defines a line boundary is controlled by the
|
|||
</P>
|
||||
<P>
|
||||
The amount of memory used for buffering files that are being scanned is
|
||||
controlled by a parameter that can be set by the <b>--buffer-size</b> option.
|
||||
The default value for this parameter is specified when <b>pcre2grep</b> is
|
||||
built, with the default default being 20K. A block of memory three times this
|
||||
size is used (to allow for buffering "before" and "after" lines). An error
|
||||
occurs if a line overflows the buffer.
|
||||
controlled by parameters that can be set by the <b>--buffer-size</b> and
|
||||
<b>--max-buffer-size</b> options. The first of these sets the size of buffer
|
||||
that is obtained at the start of processing. If an input file contains very
|
||||
long lines, a larger buffer may be needed; this is handled by automatically
|
||||
extending the buffer, up to the limit specified by <b>--max-buffer-size</b>. The
|
||||
default values for these parameters are specified when <b>pcre2grep</b> is
|
||||
built, with the default defaults being 20K and 1M respectively. An error occurs
|
||||
if a line is too long and the buffer can no longer be expanded.
|
||||
</P>
|
||||
<P>
|
||||
The block of memory that is actually used is three times the "buffer size", to
|
||||
allow for buffering "before" and "after" lines. If the buffer size is too
|
||||
small, fewer than requested "before" and "after" lines may be output.
|
||||
</P>
|
||||
<P>
|
||||
Patterns can be no longer than 8K or BUFSIZ bytes, whichever is the greater.
|
||||
|
@ -155,12 +163,13 @@ processing of patterns and file names that start with hyphens.
|
|||
</P>
|
||||
<P>
|
||||
<b>-A</b> <i>number</i>, <b>--after-context=</b><i>number</i>
|
||||
Output <i>number</i> lines of context after each matching line. If file names
|
||||
and/or line numbers are being output, a hyphen separator is used instead of a
|
||||
colon for the context lines. A line containing "--" is output between each
|
||||
group of lines, unless they are in fact contiguous in the input file. The value
|
||||
of <i>number</i> is expected to be relatively small. However, <b>pcre2grep</b>
|
||||
guarantees to have up to 8K of following text available for context output.
|
||||
Output up to <i>number</i> lines of context after each matching line. Fewer
|
||||
lines are output if the next match or the end of the file is reached, or if the
|
||||
processing buffer size has been set too small. If file names and/or line
|
||||
numbers are being output, a hyphen separator is used instead of a colon for the
|
||||
context lines. A line containing "--" is output between each group of lines,
|
||||
unless they are in fact contiguous in the input file. The value of <i>number</i>
|
||||
is expected to be relatively small. When <b>-c</b> is used, <b>-A</b> is ignored.
|
||||
</P>
|
||||
<P>
|
||||
<b>-a</b>, <b>--text</b>
|
||||
|
@ -169,12 +178,14 @@ Treat binary files as text. This is equivalent to
|
|||
</P>
|
||||
<P>
|
||||
<b>-B</b> <i>number</i>, <b>--before-context=</b><i>number</i>
|
||||
Output <i>number</i> lines of context before each matching line. If file names
|
||||
and/or line numbers are being output, a hyphen separator is used instead of a
|
||||
colon for the context lines. A line containing "--" is output between each
|
||||
group of lines, unless they are in fact contiguous in the input file. The value
|
||||
of <i>number</i> is expected to be relatively small. However, <b>pcre2grep</b>
|
||||
guarantees to have up to 8K of preceding text available for context output.
|
||||
Output up to <i>number</i> lines of context before each matching line. Fewer
|
||||
lines are output if the previous match or the start of the file is within
|
||||
<i>number</i> lines, or if the processing buffer size has been set too small. If
|
||||
file names and/or line numbers are being output, a hyphen separator is used
|
||||
instead of a colon for the context lines. A line containing "--" is output
|
||||
between each group of lines, unless they are in fact contiguous in the input
|
||||
file. The value of <i>number</i> is expected to be relatively small. When
|
||||
<b>-c</b> is used, <b>-B</b> is ignored.
|
||||
</P>
|
||||
<P>
|
||||
<b>--binary-files=</b><i>word</i>
|
||||
|
@ -191,8 +202,9 @@ return code.
|
|||
</P>
|
||||
<P>
|
||||
<b>--buffer-size=</b><i>number</i>
|
||||
Set the parameter that controls how much memory is used for buffering files
|
||||
that are being scanned.
|
||||
Set the parameter that controls how much memory is obtained at the start of
|
||||
processing for buffering files that are being scanned. See also
|
||||
<b>--max-buffer-size</b> below.
|
||||
</P>
|
||||
<P>
|
||||
<b>-C</b> <i>number</i>, <b>--context=</b><i>number</i>
|
||||
|
@ -202,14 +214,16 @@ This is equivalent to setting both <b>-A</b> and <b>-B</b> to the same value.
|
|||
<P>
|
||||
<b>-c</b>, <b>--count</b>
|
||||
Do not output lines from the files that are being scanned; instead output the
|
||||
number of matches (or non-matches if <b>-v</b> is used) that would otherwise
|
||||
have caused lines to be shown. By default, this count is the same as the number
|
||||
of suppressed lines, but if the <b>-M</b> (multiline) option is used (without
|
||||
<b>-v</b>), there may be more suppressed lines than the number of matches.
|
||||
number of lines that would have been shown, either because they matched, or, if
|
||||
<b>-v</b> is set, because they failed to match. By default, this count is
|
||||
exactly the same as the number of lines that would have been output, but if the
|
||||
<b>-M</b> (multiline) option is used (without <b>-v</b>), there may be more
|
||||
suppressed lines than the count (that is, the number of matches).
|
||||
<br>
|
||||
<br>
|
||||
If no lines are selected, the number zero is output. If several files are are
|
||||
being scanned, a count is output for each of them. However, if the
|
||||
being scanned, a count is output for each of them and the <b>-t</b> option can
|
||||
be used to cause a total to be output at the end. However, if the
|
||||
<b>--files-with-matches</b> option is also used, only those files whose counts
|
||||
are greater than zero are listed. When <b>-c</b> is used, the <b>-A</b>,
|
||||
<b>-B</b>, and <b>-C</b> options are ignored.
|
||||
|
@ -232,11 +246,12 @@ just one, in order to colour them all.
|
|||
<br>
|
||||
<br>
|
||||
The colour that is used can be specified by setting the environment variable
|
||||
PCRE2GREP_COLOUR or PCRE2GREP_COLOR. The value of this variable should be a
|
||||
string of two numbers, separated by a semicolon. They are copied directly into
|
||||
the control string for setting colour on a terminal, so it is your
|
||||
responsibility to ensure that they make sense. If neither of the environment
|
||||
variables is set, the default is "1;31", which gives red.
|
||||
PCRE2GREP_COLOUR or PCRE2GREP_COLOR. If neither of these are set,
|
||||
<b>pcre2grep</b> looks for GREP_COLOUR or GREP_COLOR. The value of the variable
|
||||
should be a string of two numbers, separated by a semicolon. They are copied
|
||||
directly into the control string for setting colour on a terminal, so it is
|
||||
your responsibility to ensure that they make sense. If neither of the
|
||||
environment variables is set, the default is "1;31", which gives red.
|
||||
</P>
|
||||
<P>
|
||||
<b>-D</b> <i>action</i>, <b>--devices=</b><i>action</i>
|
||||
|
@ -321,18 +336,18 @@ files; it does not apply to patterns specified by any of the <b>--include</b> or
|
|||
</P>
|
||||
<P>
|
||||
<b>-f</b> <i>filename</i>, <b>--file=</b><i>filename</i>
|
||||
Read patterns from the file, one per line, and match them against
|
||||
each line of input. What constitutes a newline when reading the file is the
|
||||
operating system's default. The <b>--newline</b> option has no effect on this
|
||||
option. Trailing white space is removed from each line, and blank lines are
|
||||
ignored. An empty file contains no patterns and therefore matches nothing. See
|
||||
also the comments about multiple patterns versus a single pattern with
|
||||
alternatives in the description of <b>-e</b> above.
|
||||
Read patterns from the file, one per line, and match them against each line of
|
||||
input. What constitutes a newline when reading the file is the operating
|
||||
system's default. The <b>--newline</b> option has no effect on this option.
|
||||
Trailing white space is removed from each line, and blank lines are ignored. An
|
||||
empty file contains no patterns and therefore matches nothing. See also the
|
||||
comments about multiple patterns versus a single pattern with alternatives in
|
||||
the description of <b>-e</b> above.
|
||||
<br>
|
||||
<br>
|
||||
If this option is given more than once, all the specified files are
|
||||
read. A data line is output if any of the patterns match it. A file name can
|
||||
be given as "-" to refer to the standard input. When <b>-f</b> is used, patterns
|
||||
If this option is given more than once, all the specified files are read. A
|
||||
data line is output if any of the patterns match it. A file name can be given
|
||||
as "-" to refer to the standard input. When <b>-f</b> is used, patterns
|
||||
specified on the command line using <b>-e</b> may also be present; they are
|
||||
tested before the file's patterns. However, no other pattern is taken from the
|
||||
command line; all arguments are treated as the names of paths to be searched.
|
||||
|
@ -502,22 +517,24 @@ There are no short forms for these options. The default settings are specified
|
|||
when the PCRE2 library is compiled, with the default default being 10 million.
|
||||
</P>
|
||||
<P>
|
||||
\fB--max-buffer-size=<i>number</i>
|
||||
This limits the expansion of the processing buffer, whose initial size can be
|
||||
set by <b>--buffer-size</b>. The maximum buffer size is silently forced to be no
|
||||
smaller than the starting buffer size.
|
||||
</P>
|
||||
<P>
|
||||
<b>-M</b>, <b>--multiline</b>
|
||||
Allow patterns to match more than one line. When this option is given, patterns
|
||||
may usefully contain literal newline characters and internal occurrences of ^
|
||||
and $ characters. The output for a successful match may consist of more than
|
||||
one line. The first is the line in which the match started, and the last is the
|
||||
line in which the match ended. If the matched string ends with a newline
|
||||
sequence the output ends at the end of that line.
|
||||
<br>
|
||||
<br>
|
||||
When this option is set, the PCRE2 library is called in "multiline" mode. This
|
||||
allows a matched string to extend past the end of a line and continue on one or
|
||||
more subsequent lines. However, <b>pcre2grep</b> still processes the input line
|
||||
by line. Once a match has been handled, scanning restarts at the beginning of
|
||||
the next line, just as it does when <b>-M</b> is not present. This means that it
|
||||
is possible for the second or subsequent lines in a multiline match to be
|
||||
output again as part of another match.
|
||||
Allow patterns to match more than one line. When this option is set, the PCRE2
|
||||
library is called in "multiline" mode. This allows a matched string to extend
|
||||
past the end of a line and continue on one or more subsequent lines. Patterns
|
||||
used with <b>-M</b> may usefully contain literal newline characters and internal
|
||||
occurrences of ^ and $ characters. The output for a successful match may
|
||||
consist of more than one line. The first line is the line in which the match
|
||||
started, and the last line is the line in which the match ended. If the matched
|
||||
string ends with a newline sequence, the output ends at the end of that line.
|
||||
If <b>-v</b> is set, none of the lines in a multi-line match are output. Once a
|
||||
match has been handled, scanning restarts at the beginning of the line after
|
||||
the one in which the match ended.
|
||||
<br>
|
||||
<br>
|
||||
The newline sequence that separates multiple lines must be matched as part of
|
||||
|
@ -533,11 +550,8 @@ well as possibly handling a two-character newline sequence.
|
|||
<br>
|
||||
<br>
|
||||
There is a limit to the number of lines that can be matched, imposed by the way
|
||||
that <b>pcre2grep</b> buffers the input file as it scans it. However,
|
||||
<b>pcre2grep</b> ensures that at least 8K characters or the rest of the file
|
||||
(whichever is the shorter) are available for forward matching, and similarly
|
||||
the previous 8K characters (or all the previous characters, if fewer than 8K)
|
||||
are guaranteed to be available for lookbehind assertions. The <b>-M</b> option
|
||||
that <b>pcre2grep</b> buffers the input file as it scans it. With a sufficiently
|
||||
large processing buffer, this should not be a problem, but the <b>-M</b> option
|
||||
does not work when input is read line by line (see \fP--line-buffered\fP.)
|
||||
</P>
|
||||
<P>
|
||||
|
@ -585,12 +599,13 @@ It should never be needed in normal use.
|
|||
Show only the part of the line that matched a pattern instead of the whole
|
||||
line. In this mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>, and
|
||||
<b>-C</b> options are ignored. If there is more than one match in a line, each
|
||||
of them is shown separately. If <b>-o</b> is combined with <b>-v</b> (invert the
|
||||
sense of the match to find non-matching lines), no output is generated, but the
|
||||
return code is set appropriately. If the matched portion of the line is empty,
|
||||
nothing is output unless the file name or line number are being printed, in
|
||||
which case they are shown on an otherwise empty line. This option is mutually
|
||||
exclusive with <b>--file-offsets</b> and <b>--line-offsets</b>.
|
||||
of them is shown separately, on a separate line of output. If <b>-o</b> is
|
||||
combined with <b>-v</b> (invert the sense of the match to find non-matching
|
||||
lines), no output is generated, but the return code is set appropriately. If
|
||||
the matched portion of the line is empty, nothing is output unless the file
|
||||
name or line number are being printed, in which case they are shown on an
|
||||
otherwise empty line. This option is mutually exclusive with
|
||||
<b>--file-offsets</b> and <b>--line-offsets</b>.
|
||||
</P>
|
||||
<P>
|
||||
<b>-o</b><i>number</i>, <b>--only-matching</b>=<i>number</i>
|
||||
|
@ -604,10 +619,11 @@ capturing parentheses do not exist in the pattern, or were not set in the
|
|||
match, nothing is output unless the file name or line number are being output.
|
||||
<br>
|
||||
<br>
|
||||
If this option is given multiple times, multiple substrings are output, in the
|
||||
order the options are given. For example, -o3 -o1 -o3 causes the substrings
|
||||
matched by capturing parentheses 3 and 1 and then 3 again to be output. By
|
||||
default, there is no separator (but see the next option).
|
||||
If this option is given multiple times, multiple substrings are output for each
|
||||
match, in the order the options are given, and all on one line. For example,
|
||||
-o3 -o1 -o3 causes the substrings matched by capturing parentheses 3 and 1 and
|
||||
then 3 again to be output. By default, there is no separator (but see the next
|
||||
option).
|
||||
</P>
|
||||
<P>
|
||||
<b>--om-separator</b>=<i>text</i>
|
||||
|
@ -638,6 +654,18 @@ quietly skipped. However, the return code is still 2, even if matches were
|
|||
found in other files.
|
||||
</P>
|
||||
<P>
|
||||
<b>-t</b>, <b>--total-count</b>
|
||||
This option is useful when scanning more than one file. If used on its own,
|
||||
<b>-t</b> suppresses all output except for a grand total number of matching
|
||||
lines (or non-matching lines if <b>-v</b> is used) in all the files. If <b>-t</b>
|
||||
is used with <b>-c</b>, a grand total is output except when the previous output
|
||||
is just one line. In other words, it is not output when just one file's count
|
||||
is listed. If file names are being output, the grand total is preceded by
|
||||
"TOTAL:". Otherwise, it appears as just another number. The <b>-t</b> option is
|
||||
ignored when used with <b>-L</b> (list files without matches), because the grand
|
||||
total would always be zero.
|
||||
</P>
|
||||
<P>
|
||||
<b>-u</b>, <b>--utf-8</b>
|
||||
Operate in UTF-8 mode. This option is available only if PCRE2 has been compiled
|
||||
with UTF-8 support. All patterns (including those for any <b>--exclude</b> and
|
||||
|
@ -665,11 +693,12 @@ specified by any of the <b>--include</b> or <b>--exclude</b> options.
|
|||
<P>
|
||||
<b>-x</b>, <b>--line-regex</b>, <b>--line-regexp</b>
|
||||
Force the patterns to be anchored (each must start matching at the beginning of
|
||||
a line) and in addition, require them to match entire lines. This is equivalent
|
||||
to having ^ and $ characters at the start and end of each alternative top-level
|
||||
branch in every pattern. This option applies only to the patterns that are
|
||||
matched against the contents of files; it does not apply to patterns specified
|
||||
by any of the <b>--include</b> or <b>--exclude</b> options.
|
||||
a line) and in addition, require them to match entire lines. In multiline mode
|
||||
the match may be more than one line. This is equivalent to having \A and \Z
|
||||
characters at the start and end of each alternative top-level branch in every
|
||||
pattern. This option applies only to the patterns that are matched against the
|
||||
contents of files; it does not apply to patterns specified by any of the
|
||||
<b>--include</b> or <b>--exclude</b> options.
|
||||
</P>
|
||||
<br><a name="SEC6" href="#TOC1">ENVIRONMENT VARIABLES</a><br>
|
||||
<P>
|
||||
|
@ -831,7 +860,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC15" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 19 June 2016
|
||||
Last updated: 31 October 2016
|
||||
<br>
|
||||
Copyright © 1997-2016 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -61,14 +61,10 @@ The maximum length of a lookbehind assertion is 65535 characters.
|
|||
There is no limit to the number of parenthesized subpatterns, but there can be
|
||||
no more than 65535 capturing subpatterns. There is, however, a limit to the
|
||||
depth of nesting of parenthesized subpatterns of all kinds. This is imposed in
|
||||
order to limit the amount of system stack used at compile time. The limit can
|
||||
be specified when PCRE2 is built; the default is 250.
|
||||
</P>
|
||||
<P>
|
||||
There is a limit to the number of forward references to subsequent subpatterns
|
||||
of around 200,000. Repeated forward references with fixed upper limits, for
|
||||
example, (?2){0,100} when subpattern number 2 is to the right, are included in
|
||||
the count. There is no limit to the number of backward references.
|
||||
order to limit the amount of system stack used at compile time. The default
|
||||
limit can be specified when PCRE2 is built; the default default is 250. An
|
||||
application can change this limit by calling pcre2_set_parens_nest_limit() to
|
||||
set the limit in a compile context.
|
||||
</P>
|
||||
<P>
|
||||
The maximum length of name for a named subpattern is 32 code units, and the
|
||||
|
@ -76,7 +72,12 @@ maximum number of named subpatterns is 10000.
|
|||
</P>
|
||||
<P>
|
||||
The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb
|
||||
is 255 for the 8-bit library and 65535 for the 16-bit and 32-bit libraries.
|
||||
is 255 code units for the 8-bit library and 65535 code units for the 16-bit and
|
||||
32-bit libraries.
|
||||
</P>
|
||||
<P>
|
||||
The maximum length of a string argument to a callout is the largest number a
|
||||
32-bit unsigned integer can hold.
|
||||
</P>
|
||||
<br><b>
|
||||
AUTHOR
|
||||
|
@ -93,9 +94,9 @@ Cambridge, England.
|
|||
REVISION
|
||||
</b><br>
|
||||
<P>
|
||||
Last updated: 05 November 2015
|
||||
Last updated: 26 October 2016
|
||||
<br>
|
||||
Copyright © 1997-2015 University of Cambridge.
|
||||
Copyright © 1997-2016 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
|
|
|
@ -379,32 +379,31 @@ case letter, it is converted to upper case. Then bit 6 of the character (hex
|
|||
40) is inverted. Thus \cA to \cZ become hex 01 to hex 1A (A is 41, Z is 5A),
|
||||
but \c{ becomes hex 3B ({ is 7B), and \c; becomes hex 7B (; is 3B). If the
|
||||
code unit following \c has a value less than 32 or greater than 126, a
|
||||
compile-time error occurs. This locks out non-printable ASCII characters in all
|
||||
modes.
|
||||
compile-time error occurs.
|
||||
</P>
|
||||
<P>
|
||||
When PCRE2 is compiled in EBCDIC mode, \a, \e, \f, \n, \r, and \t
|
||||
generate the appropriate EBCDIC code values. The \c escape is processed
|
||||
as specified for Perl in the <b>perlebcdic</b> document. The only characters
|
||||
that are allowed after \c are A-Z, a-z, or one of @, [, \, ], ^, _, or ?. Any
|
||||
other character provokes a compile-time error. The sequence \@ encodes
|
||||
character code 0; the letters (in either case) encode characters 1-26 (hex 01
|
||||
to hex 1A); [, \, ], ^, and _ encode characters 27-31 (hex 1B to hex 1F), and
|
||||
\? becomes either 255 (hex FF) or 95 (hex 5F).
|
||||
other character provokes a compile-time error. The sequence \c@ encodes
|
||||
character code 0; after \c the letters (in either case) encode characters 1-26
|
||||
(hex 01 to hex 1A); [, \, ], ^, and _ encode characters 27-31 (hex 1B to hex
|
||||
1F), and \c? becomes either 255 (hex FF) or 95 (hex 5F).
|
||||
</P>
|
||||
<P>
|
||||
Thus, apart from \?, these escapes generate the same character code values as
|
||||
Thus, apart from \c?, these escapes generate the same character code values as
|
||||
they do in an ASCII environment, though the meanings of the values mostly
|
||||
differ. For example, \G always generates code value 7, which is BEL in ASCII
|
||||
differ. For example, \cG always generates code value 7, which is BEL in ASCII
|
||||
but DEL in EBCDIC.
|
||||
</P>
|
||||
<P>
|
||||
The sequence \? generates DEL (127, hex 7F) in an ASCII environment, but
|
||||
The sequence \c? generates DEL (127, hex 7F) in an ASCII environment, but
|
||||
because 127 is not a control character in EBCDIC, Perl makes it generate the
|
||||
APC character. Unfortunately, there are several variants of EBCDIC. In most of
|
||||
them the APC character has the value 255 (hex FF), but in the one Perl calls
|
||||
POSIX-BC its value is 95 (hex 5F). If certain other characters have POSIX-BC
|
||||
values, PCRE2 makes \? generate 95; otherwise it generates 255.
|
||||
values, PCRE2 makes \c? generate 95; otherwise it generates 255.
|
||||
</P>
|
||||
<P>
|
||||
After \0 up to two further octal digits are read. If there are fewer than two
|
||||
|
@ -526,9 +525,9 @@ by code point, as described in the previous section.
|
|||
Absolute and relative back references
|
||||
</b><br>
|
||||
<P>
|
||||
The sequence \g followed by an unsigned or a negative number, optionally
|
||||
enclosed in braces, is an absolute or relative back reference. A named back
|
||||
reference can be coded as \g{name}. Back references are discussed
|
||||
The sequence \g followed by a signed or unsigned number, optionally enclosed
|
||||
in braces, is an absolute or relative back reference. A named back reference
|
||||
can be coded as \g{name}. Back references are discussed
|
||||
<a href="#backreferences">later,</a>
|
||||
following the discussion of
|
||||
<a href="#subpattern">parenthesized subpatterns.</a>
|
||||
|
@ -1326,13 +1325,32 @@ whatever setting of the PCRE2_DOTALL and PCRE2_MULTILINE options is used. A
|
|||
class such as [^a] always matches one of these characters.
|
||||
</P>
|
||||
<P>
|
||||
The character escape sequences \d, \D, \h, \H, \p, \P, \s, \S, \v,
|
||||
\V, \w, and \W may appear in a character class, and add the characters that
|
||||
they match to the class. For example, [\dABCDEF] matches any hexadecimal
|
||||
digit. In UTF modes, the PCRE2_UCP option affects the meanings of \d, \s, \w
|
||||
and their upper case partners, just as it does when they appear outside a
|
||||
character class, as described in the section entitled
|
||||
<a href="#genericchartypes">"Generic character types"</a>
|
||||
above. The escape sequence \b has a different meaning inside a character
|
||||
class; it matches the backspace character. The sequences \B, \N, \R, and \X
|
||||
are not special inside a character class. Like any other unrecognized escape
|
||||
sequences, they cause an error.
|
||||
</P>
|
||||
<P>
|
||||
The minus (hyphen) character can be used to specify a range of characters in a
|
||||
character class. For example, [d-m] matches any letter between d and m,
|
||||
inclusive. If a minus character is required in a class, it must be escaped with
|
||||
a backslash or appear in a position where it cannot be interpreted as
|
||||
indicating a range, typically as the first or last character in the class, or
|
||||
immediately after a range. For example, [b-d-z] matches letters in the range b
|
||||
to d, a hyphen character, or z.
|
||||
indicating a range, typically as the first or last character in the class,
|
||||
or immediately after a range. For example, [b-d-z] matches letters in the range
|
||||
b to d, a hyphen character, or z.
|
||||
</P>
|
||||
<P>
|
||||
Perl treats a hyphen as a literal if it appears before a POSIX class (see
|
||||
below) or a character type escape such as as \d, but gives a warning in its
|
||||
warning mode, as this is most likely a user error. As PCRE2 has no facility for
|
||||
warning, an error is given in these cases.
|
||||
</P>
|
||||
<P>
|
||||
It is not possible to have the literal character "]" as the end character of a
|
||||
|
@ -1344,12 +1362,6 @@ followed by two other characters. The octal or hexadecimal representation of
|
|||
"]" can also be used to end a range.
|
||||
</P>
|
||||
<P>
|
||||
An error is generated if a POSIX character class (see below) or an escape
|
||||
sequence other than one that defines a single character appears at a point
|
||||
where a range ending character is expected. For example, [z-\xff] is valid,
|
||||
but [A-\d] and [A-[:digit:]] are not.
|
||||
</P>
|
||||
<P>
|
||||
Ranges normally include all code points between the start and end characters,
|
||||
inclusive. They can also be used for code points specified numerically, for
|
||||
example [\000-\037]. Ranges can include any characters that are valid for the
|
||||
|
@ -1372,19 +1384,6 @@ tables for a French locale are in use, [\xc8-\xcb] matches accented E
|
|||
characters in both cases.
|
||||
</P>
|
||||
<P>
|
||||
The character escape sequences \d, \D, \h, \H, \p, \P, \s, \S, \v,
|
||||
\V, \w, and \W may appear in a character class, and add the characters that
|
||||
they match to the class. For example, [\dABCDEF] matches any hexadecimal
|
||||
digit. In UTF modes, the PCRE2_UCP option affects the meanings of \d, \s, \w
|
||||
and their upper case partners, just as it does when they appear outside a
|
||||
character class, as described in the section entitled
|
||||
<a href="#genericchartypes">"Generic character types"</a>
|
||||
above. The escape sequence \b has a different meaning inside a character
|
||||
class; it matches the backspace character. The sequences \B, \N, \R, and \X
|
||||
are not special inside a character class. Like any other unrecognized escape
|
||||
sequences, they cause an error.
|
||||
</P>
|
||||
<P>
|
||||
A circumflex can conveniently be used with the upper case character types to
|
||||
specify a more restricted set of characters than the matching lower case type.
|
||||
For example, the class [^\W_] matches any letter or digit, but not underscore,
|
||||
|
@ -1552,13 +1551,8 @@ respectively.
|
|||
<P>
|
||||
When one of these option changes occurs at top level (that is, not inside
|
||||
subpattern parentheses), the change applies to the remainder of the pattern
|
||||
that follows. If the change is placed right at the start of a pattern, PCRE2
|
||||
extracts it into the global options (and it will therefore show up in data
|
||||
extracted by the <b>pcre2_pattern_info()</b> function).
|
||||
</P>
|
||||
<P>
|
||||
An option change within a subpattern (see below for a description of
|
||||
subpatterns) affects only that part of the subpattern that follows it, so
|
||||
that follows. An option change within a subpattern (see below for a description
|
||||
of subpatterns) affects only that part of the subpattern that follows it, so
|
||||
<pre>
|
||||
(a(?i)b)c
|
||||
</pre>
|
||||
|
@ -2093,9 +2087,9 @@ subpattern is possible using named parentheses (see below).
|
|||
</P>
|
||||
<P>
|
||||
Another way of avoiding the ambiguity inherent in the use of digits following a
|
||||
backslash is to use the \g escape sequence. This escape must be followed by an
|
||||
unsigned number or a negative number, optionally enclosed in braces. These
|
||||
examples are all identical:
|
||||
backslash is to use the \g escape sequence. This escape must be followed by a
|
||||
signed or unsigned number, optionally enclosed in braces. These examples are
|
||||
all identical:
|
||||
<pre>
|
||||
(ring), \1
|
||||
(ring), \g1
|
||||
|
@ -2103,8 +2097,7 @@ examples are all identical:
|
|||
</pre>
|
||||
An unsigned number specifies an absolute reference without the ambiguity that
|
||||
is present in the older syntax. It is also useful when literal digits follow
|
||||
the reference. A negative number is a relative reference. Consider this
|
||||
example:
|
||||
the reference. A signed number is a relative reference. Consider this example:
|
||||
<pre>
|
||||
(abc(def)ghi)\g{-1}
|
||||
</pre>
|
||||
|
@ -2115,6 +2108,11 @@ can be helpful in long patterns, and also in patterns that are created by
|
|||
joining together fragments that contain references within themselves.
|
||||
</P>
|
||||
<P>
|
||||
The sequence \g{+1} is a reference to the next capturing subpattern. This kind
|
||||
of forward reference can be useful it patterns that repeat. Perl does not
|
||||
support the use of + in this way.
|
||||
</P>
|
||||
<P>
|
||||
A back reference matches whatever actually matched the capturing subpattern in
|
||||
the current subject string, rather than anything matching the subpattern
|
||||
itself (see
|
||||
|
@ -2214,6 +2212,14 @@ capturing is carried out only for positive assertions. (Perl sometimes, but not
|
|||
always, does do capturing in negative assertions.)
|
||||
</P>
|
||||
<P>
|
||||
WARNING: If a positive assertion containing one or more capturing subpatterns
|
||||
succeeds, but failure to match later in the pattern causes backtracking over
|
||||
this assertion, the captures within the assertion are reset only if no higher
|
||||
numbered captures are already set. This is, unfortunately, a fundamental
|
||||
limitation of the current implementation; it may get removed in a future
|
||||
reworking.
|
||||
</P>
|
||||
<P>
|
||||
For compatibility with Perl, most assertion subpatterns may be repeated; though
|
||||
it makes no sense to assert the same thing several times, the side effect of
|
||||
capturing parentheses may occasionally be useful. However, an assertion that
|
||||
|
@ -2310,18 +2316,31 @@ match. If there are insufficient characters before the current position, the
|
|||
assertion fails.
|
||||
</P>
|
||||
<P>
|
||||
In a UTF mode, PCRE2 does not allow the \C escape (which matches a single code
|
||||
unit even in a UTF mode) to appear in lookbehind assertions, because it makes
|
||||
it impossible to calculate the length of the lookbehind. The \X and \R
|
||||
escapes, which can match different numbers of code units, are also not
|
||||
permitted.
|
||||
In UTF-8 and UTF-16 modes, PCRE2 does not allow the \C escape (which matches a
|
||||
single code unit even in a UTF mode) to appear in lookbehind assertions,
|
||||
because it makes it impossible to calculate the length of the lookbehind. The
|
||||
\X and \R escapes, which can match different numbers of code units, are never
|
||||
permitted in lookbehinds.
|
||||
</P>
|
||||
<P>
|
||||
<a href="#subpatternsassubroutines">"Subroutine"</a>
|
||||
calls (see below) such as (?2) or (?&X) are permitted in lookbehinds, as long
|
||||
as the subpattern matches a fixed-length string.
|
||||
<a href="#recursion">Recursion,</a>
|
||||
however, is not supported.
|
||||
as the subpattern matches a fixed-length string. However,
|
||||
<a href="#recursion">recursion,</a>
|
||||
that is, a "subroutine" call into a group that is already active,
|
||||
is not supported.
|
||||
</P>
|
||||
<P>
|
||||
Perl does not support back references in lookbehinds. PCRE2 does support them,
|
||||
but only if certain conditions are met. The PCRE2_MATCH_UNSET_BACKREF option
|
||||
must not be set, there must be no use of (?| in the pattern (it creates
|
||||
duplicate subpattern numbers), and if the back reference is by name, the name
|
||||
must be unique. Of course, the referenced subpattern must itself be of fixed
|
||||
length. The following pattern matches words containing at least two characters
|
||||
that begin and end with the same character:
|
||||
<pre>
|
||||
\b(\w)\w++(?<=\1)
|
||||
</PRE>
|
||||
</P>
|
||||
<P>
|
||||
Possessive quantifiers can be used in conjunction with lookbehind assertions to
|
||||
|
@ -2459,7 +2478,9 @@ Checking for a used subpattern by name
|
|||
<P>
|
||||
Perl uses the syntax (?(<name>)...) or (?('name')...) to test for a used
|
||||
subpattern by name. For compatibility with earlier versions of PCRE1, which had
|
||||
this facility before Perl, the syntax (?(name)...) is also recognized.
|
||||
this facility before Perl, the syntax (?(name)...) is also recognized. Note,
|
||||
however, that undelimited names consisting of the letter R followed by digits
|
||||
are ambiguous (see the following section).
|
||||
</P>
|
||||
<P>
|
||||
Rewriting the above example to use a named subpattern gives this:
|
||||
|
@ -2474,30 +2495,52 @@ matched.
|
|||
Checking for pattern recursion
|
||||
</b><br>
|
||||
<P>
|
||||
If the condition is the string (R), and there is no subpattern with the name R,
|
||||
the condition is true if a recursive call to the whole pattern or any
|
||||
subpattern has been made. If digits or a name preceded by ampersand follow the
|
||||
letter R, for example:
|
||||
"Recursion" in this sense refers to any subroutine-like call from one part of
|
||||
the pattern to another, whether or not it is actually recursive. See the
|
||||
sections entitled
|
||||
<a href="#recursion">"Recursive patterns"</a>
|
||||
and
|
||||
<a href="#subpatternsassubroutines">"Subpatterns as subroutines"</a>
|
||||
below for details of recursion and subpattern calls.
|
||||
</P>
|
||||
<P>
|
||||
If a condition is the string (R), and there is no subpattern with the name R,
|
||||
the condition is true if matching is currently in a recursion or subroutine
|
||||
call to the whole pattern or any subpattern. If digits follow the letter R, and
|
||||
there is no subpattern with that name, the condition is true if the most recent
|
||||
call is into a subpattern with the given number, which must exist somewhere in
|
||||
the overall pattern. This is a contrived example that is equivalent to a+b:
|
||||
<pre>
|
||||
(?(R3)...) or (?(R&name)...)
|
||||
((?(R1)a+|(?1)b))
|
||||
</pre>
|
||||
the condition is true if the most recent recursion is into a subpattern whose
|
||||
number or name is given. This condition does not check the entire recursion
|
||||
stack. If the name used in a condition of this kind is a duplicate, the test is
|
||||
applied to all subpatterns of the same name, and is true if any one of them is
|
||||
the most recent recursion.
|
||||
However, in both cases, if there is a subpattern with a matching name, the
|
||||
condition tests for its being set, as described in the section above, instead
|
||||
of testing for recursion. For example, creating a group with the name R1 by
|
||||
adding (?<R1>) to the above pattern completely changes its meaning.
|
||||
</P>
|
||||
<P>
|
||||
If a name preceded by ampersand follows the letter R, for example:
|
||||
<pre>
|
||||
(?(R&name)...)
|
||||
</pre>
|
||||
the condition is true if the most recent recursion is into a subpattern of that
|
||||
name (which must exist within the pattern).
|
||||
</P>
|
||||
<P>
|
||||
This condition does not check the entire recursion stack. It tests only the
|
||||
current level. If the name used in a condition of this kind is a duplicate, the
|
||||
test is applied to all subpatterns of the same name, and is true if any one of
|
||||
them is the most recent recursion.
|
||||
</P>
|
||||
<P>
|
||||
At "top level", all these recursion test conditions are false.
|
||||
<a href="#recursion">The syntax for recursive patterns</a>
|
||||
is described below.
|
||||
<a name="subdefine"></a></P>
|
||||
<br><b>
|
||||
Defining subpatterns for use by reference only
|
||||
</b><br>
|
||||
<P>
|
||||
If the condition is the string (DEFINE), and there is no subpattern with the
|
||||
name DEFINE, the condition is always false. In this case, there may be only one
|
||||
If the condition is the string (DEFINE), the condition is always false, even if
|
||||
there is a group with the name DEFINE. In this case, there may be only one
|
||||
alternative in the subpattern. It is always skipped if control reaches this
|
||||
point in the pattern; the idea of DEFINE is that it can be used to define
|
||||
subroutines that can be referenced from elsewhere. (The use of
|
||||
|
@ -2965,12 +3008,22 @@ depending on whether or not a name is present.
|
|||
By default, for compatibility with Perl, a name is any sequence of characters
|
||||
that does not include a closing parenthesis. The name is not processed in
|
||||
any way, and it is not possible to include a closing parenthesis in the name.
|
||||
However, if the PCRE2_ALT_VERBNAMES option is set, normal backslash processing
|
||||
is applied to verb names and only an unescaped closing parenthesis terminates
|
||||
the name. A closing parenthesis can be included in a name either as \) or
|
||||
between \Q and \E. If the PCRE2_EXTENDED option is set, unescaped whitespace
|
||||
in verb names is skipped and #-comments are recognized, exactly as in the rest
|
||||
of the pattern.
|
||||
This can be changed by setting the PCRE2_ALT_VERBNAMES option, but the result
|
||||
is no longer Perl-compatible.
|
||||
</P>
|
||||
<P>
|
||||
When PCRE2_ALT_VERBNAMES is set, backslash processing is applied to verb names
|
||||
and only an unescaped closing parenthesis terminates the name. However, the
|
||||
only backslash items that are permitted are \Q, \E, and sequences such as
|
||||
\x{100} that define character code points. Character type escapes such as \d
|
||||
are faulted.
|
||||
</P>
|
||||
<P>
|
||||
A closing parenthesis can be included in a name either as \) or between \Q
|
||||
and \E. In addition to backslash processing, if the PCRE2_EXTENDED option is
|
||||
also set, unescaped whitespace in verb names is skipped, and #-comments are
|
||||
recognized, exactly as in the rest of the pattern. PCRE2_EXTENDED does not
|
||||
affect verb names unless PCRE2_ALT_VERBNAMES is also set.
|
||||
</P>
|
||||
<P>
|
||||
The maximum length of a name is 255 in the 8-bit library and 65535 in the
|
||||
|
@ -3393,7 +3446,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC30" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 20 June 2016
|
||||
Last updated: 23 October 2016
|
||||
<br>
|
||||
Copyright © 1997-2016 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -492,6 +492,9 @@ Each top-level branch of a look behind must be of a fixed length.
|
|||
\n reference by number (can be ambiguous)
|
||||
\gn reference by number
|
||||
\g{n} reference by number
|
||||
\g+n relative reference by number (PCRE2 extension)
|
||||
\g-n relative reference by number
|
||||
\g{+n} relative reference by number (PCRE2 extension)
|
||||
\g{-n} relative reference by number
|
||||
\k<name> reference by name (Perl)
|
||||
\k'name' reference by name (Perl)
|
||||
|
@ -530,14 +533,17 @@ Each top-level branch of a look behind must be of a fixed length.
|
|||
(?(-n) relative reference condition
|
||||
(?(<name>) named reference condition (Perl)
|
||||
(?('name') named reference condition (Perl)
|
||||
(?(name) named reference condition (PCRE2)
|
||||
(?(name) named reference condition (PCRE2, deprecated)
|
||||
(?(R) overall recursion condition
|
||||
(?(Rn) specific group recursion condition
|
||||
(?(R&name) specific recursion condition
|
||||
(?(Rn) specific numbered group recursion condition
|
||||
(?(R&name) specific named group recursion condition
|
||||
(?(DEFINE) define subpattern for reference
|
||||
(?(VERSION[>]=n.m) test PCRE2 version
|
||||
(?(assert) assertion condition
|
||||
</PRE>
|
||||
</pre>
|
||||
Note the ambiguity of (?(R) and (?(Rn) which might be named reference
|
||||
conditions or recursion tests. Such a condition is interpreted as a reference
|
||||
condition if the relevant named group exists.
|
||||
</P>
|
||||
<br><a name="SEC23" href="#TOC1">BACKTRACKING CONTROL</a><br>
|
||||
<P>
|
||||
|
@ -589,9 +595,9 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC27" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 16 October 2015
|
||||
Last updated: 28 September 2016
|
||||
<br>
|
||||
Copyright © 1997-2015 University of Cambridge.
|
||||
Copyright © 1997-2016 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
|
|
|
@ -615,6 +615,7 @@ about the pattern:
|
|||
pushcopy push a copy onto the stack
|
||||
stackguard=<number> test the stackguard feature
|
||||
tables=[0|1|2] select internal tables
|
||||
use_length do not zero-terminate the pattern
|
||||
utf8_input treat input as UTF-8
|
||||
</pre>
|
||||
The effects of these modifiers are described in the following sections.
|
||||
|
@ -698,6 +699,18 @@ testing that <b>pcre2_compile()</b> behaves correctly in this case (it uses
|
|||
default values).
|
||||
</P>
|
||||
<br><b>
|
||||
Specifying the pattern's length
|
||||
</b><br>
|
||||
<P>
|
||||
By default, patterns are passed to the compiling functions as zero-terminated
|
||||
strings. When using the POSIX wrapper API, there is no other option. However,
|
||||
when using PCRE2's native API, patterns can be passed by length instead of
|
||||
being zero-terminated. The <b>use_length</b> modifier causes this to happen.
|
||||
Using a length happens automatically (whether or not <b>use_length</b> is set)
|
||||
when <b>hex</b> is set, because patterns specified in hexadecimal may contain
|
||||
binary zeros.
|
||||
</P>
|
||||
<br><b>
|
||||
Specifying pattern characters in hexadecimal
|
||||
</b><br>
|
||||
<P>
|
||||
|
@ -720,10 +733,10 @@ the delimiter within a substring. The <b>hex</b> and <b>expand</b> modifiers are
|
|||
mutually exclusive.
|
||||
</P>
|
||||
<P>
|
||||
By default, <b>pcre2test</b> passes patterns as zero-terminated strings to
|
||||
<b>pcre2_compile()</b>, giving the length as PCRE2_ZERO_TERMINATED. However, for
|
||||
patterns specified with the <b>hex</b> modifier, the actual length of the
|
||||
pattern is passed.
|
||||
The POSIX API cannot be used with patterns specified in hexadecimal because
|
||||
they may contain binary zeros, which conflicts with <b>regcomp()</b>'s
|
||||
requirement for a zero-terminated string. Such patterns are always passed to
|
||||
<b>pcre2_compile()</b> as a string with a length, not as zero-terminated.
|
||||
</P>
|
||||
<br><b>
|
||||
Specifying wide characters in 16-bit and 32-bit modes
|
||||
|
@ -1753,7 +1766,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 02 August 2016
|
||||
Last updated: 04 November 2016
|
||||
<br>
|
||||
Copyright © 1997-2016 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -94,6 +94,9 @@ in the library.
|
|||
<tr><td><a href="pcre2_code_copy.html">pcre2_code_copy</a></td>
|
||||
<td> Copy a compiled pattern</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_code_copy_with_tables.html">pcre2_code_copy_with_tables</a></td>
|
||||
<td> Copy a compiled pattern and its character tables</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_code_free.html">pcre2_code_free</a></td>
|
||||
<td> Free a compiled pattern</td></tr>
|
||||
|
||||
|
|
411
doc/pcre2.txt
411
doc/pcre2.txt
|
@ -379,6 +379,8 @@ PCRE2 NATIVE API AUXILIARY FUNCTIONS
|
|||
|
||||
pcre2_code *pcre2_code_copy(const pcre2_code *code);
|
||||
|
||||
pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *code);
|
||||
|
||||
int pcre2_get_error_message(int errorcode, PCRE2_UCHAR *buffer,
|
||||
PCRE2_SIZE bufflen);
|
||||
|
||||
|
@ -626,8 +628,8 @@ MULTITHREADING
|
|||
similar logic is required. JIT compilation updates a pointer within the
|
||||
compiled code block, so a thread must gain unique write access to the
|
||||
pointer before calling pcre2_jit_compile(). Alternatively,
|
||||
pcre2_code_copy() can be used to obtain a private copy of the compiled
|
||||
code.
|
||||
pcre2_code_copy() or pcre2_code_copy_with_tables() can be used to
|
||||
obtain a private copy of the compiled code.
|
||||
|
||||
Context blocks
|
||||
|
||||
|
@ -789,7 +791,9 @@ PCRE2 CONTEXTS
|
|||
|
||||
This parameter ajusts the limit, set when PCRE2 is built (default 250),
|
||||
on the depth of parenthesis nesting in a pattern. This limit stops
|
||||
rogue patterns using up too much system stack when being compiled.
|
||||
rogue patterns using up too much system stack when being compiled. The
|
||||
limit applies to parentheses of all kinds, not just capturing parenthe-
|
||||
ses.
|
||||
|
||||
int pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
|
||||
int (*guard_function)(uint32_t, void *), void *user_data);
|
||||
|
@ -1102,6 +1106,8 @@ COMPILING A PATTERN
|
|||
|
||||
pcre2_code *pcre2_code_copy(const pcre2_code *code);
|
||||
|
||||
pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *code);
|
||||
|
||||
The pcre2_compile() function compiles a pattern into an internal form.
|
||||
The pattern is defined by a pointer to a string of code units and a
|
||||
length. If the pattern is zero-terminated, the length can be specified
|
||||
|
@ -1120,10 +1126,21 @@ COMPILING A PATTERN
|
|||
However, if the code has been processed by the JIT compiler (see
|
||||
below), the JIT information cannot be copied (because it is position-
|
||||
dependent). The new copy can initially be used only for non-JIT match-
|
||||
ing, though it can be passed to pcre2_jit_compile() if required. The
|
||||
pcre2_code_copy() function provides a way for individual threads in a
|
||||
multithreaded application to acquire a private copy of shared compiled
|
||||
code.
|
||||
ing, though it can be passed to pcre2_jit_compile() if required.
|
||||
|
||||
The pcre2_code_copy() function provides a way for individual threads in
|
||||
a multithreaded application to acquire a private copy of shared com-
|
||||
piled code. However, it does not make a copy of the character tables
|
||||
used by the compiled pattern; the new pattern code points to the same
|
||||
tables as the original code. (See "Locale Support" below for details
|
||||
of these character tables.) In many applications the same tables are
|
||||
used throughout, so this behaviour is appropriate. Nevertheless, there
|
||||
are occasions when a copy of a compiled pattern and the relevant tables
|
||||
are needed. The pcre2_code_copy_with_tables() provides this facility.
|
||||
Copies of both the code and the tables are made, with the new code
|
||||
pointing to the new tables. The memory for the new tables is automati-
|
||||
cally freed when pcre2_code_free() is called for the new copy of the
|
||||
compiled code.
|
||||
|
||||
NOTE: When one of the matching functions is called, pointers to the
|
||||
compiled pattern and the subject string are set in the match data block
|
||||
|
@ -1155,6 +1172,12 @@ COMPILING A PATTERN
|
|||
error has occurred. The values are not defined when compilation is suc-
|
||||
cessful and pcre2_compile() returns a non-NULL value.
|
||||
|
||||
The value returned in erroroffset is an indication of where in the pat-
|
||||
tern the error occurred. It is not necessarily the furthest point in
|
||||
the pattern that was read. For example, after the error "lookbehind
|
||||
assertion is not fixed length", the error offset points to the start of
|
||||
the failing assertion.
|
||||
|
||||
The pcre2_get_error_message() function (see "Obtaining a textual error
|
||||
message" below) provides a textual message for each error code. Compi-
|
||||
lation errors have positive error codes; UTF formatting error codes are
|
||||
|
@ -1244,8 +1267,9 @@ COMPILING A PATTERN
|
|||
PCRE2_AUTO_CALLOUT
|
||||
|
||||
If this bit is set, pcre2_compile() automatically inserts callout
|
||||
items, all with number 255, before each pattern item. For discussion of
|
||||
the callout facility, see the pcre2callout documentation.
|
||||
items, all with number 255, before each pattern item, except immedi-
|
||||
ately before or after a callout in the pattern. For discussion of the
|
||||
callout facility, see the pcre2callout documentation.
|
||||
|
||||
PCRE2_CASELESS
|
||||
|
||||
|
@ -3151,7 +3175,7 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 17 June 2016
|
||||
Last updated: 22 November 2016
|
||||
Copyright (c) 1997-2016 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
@ -3506,16 +3530,21 @@ PCRE2GREP BUFFER SIZE
|
|||
|
||||
pcre2grep uses an internal buffer to hold a "window" on the file it is
|
||||
scanning, in order to be able to output "before" and "after" lines when
|
||||
it finds a match. The size of the buffer is controlled by a parameter
|
||||
whose default value is 20K. The buffer itself is three times this size,
|
||||
but because of the way it is used for holding "before" lines, the long-
|
||||
est line that is guaranteed to be processable is the parameter size.
|
||||
You can change the default parameter value by adding, for example,
|
||||
it finds a match. The starting size of the buffer is controlled by a
|
||||
parameter whose default value is 20K. The buffer itself is three times
|
||||
this size, but because of the way it is used for holding "before"
|
||||
lines, the longest line that is guaranteed to be processable is the
|
||||
parameter size. If a longer line is encountered, pcre2grep automati-
|
||||
cally expands the buffer, up to a specified maximum size, whose default
|
||||
is 1M or the starting size, whichever is the larger. You can change the
|
||||
default parameter values by adding, for example,
|
||||
|
||||
--with-pcre2grep-bufsize=50K
|
||||
--with-pcre2grep-bufsize=51200
|
||||
--with-pcre2grep-max-bufsize=2097152
|
||||
|
||||
to the configure command. The caller of pcre2grep can override this
|
||||
value by using --buffer-size on the command line.
|
||||
to the configure command. The caller of pcre2grep can override these
|
||||
values by using --buffer-size and --max-buffer-size on the command
|
||||
line.
|
||||
|
||||
|
||||
PCRE2TEST OPTION FOR LIBREADLINE SUPPORT
|
||||
|
@ -3630,6 +3659,29 @@ CODE COVERAGE REPORTING
|
|||
mentation.
|
||||
|
||||
|
||||
SUPPORT FOR FUZZERS
|
||||
|
||||
There is a special option for use by people who want to run fuzzing
|
||||
tests on PCRE2:
|
||||
|
||||
--enable-fuzz-support
|
||||
|
||||
At present this applies only to the 8-bit library. If set, it causes an
|
||||
extra library called libpcre2-fuzzsupport.a to be built, but not
|
||||
installed. This contains a single function called LLVMFuzzerTestOneIn-
|
||||
put() whose arguments are a pointer to a string and the length of the
|
||||
string. When called, this function tries to compile the string as a
|
||||
pattern, and if that succeeds, to match it. This is done both with no
|
||||
options and with some random options bits that are generated from the
|
||||
string. Setting --enable-fuzz-support also causes a binary called
|
||||
pcre2fuzzcheck to be created. This is normally run under valgrind or
|
||||
used when PCRE2 is compiled with address sanitizing enabled. It calls
|
||||
the fuzzing function and outputs information about it is doing. The
|
||||
input strings are specified by arguments: if an argument starts with
|
||||
"=" the rest of it is a literal input string. Otherwise, it is assumed
|
||||
to be a file name, and the contents of the file are the test string.
|
||||
|
||||
|
||||
SEE ALSO
|
||||
|
||||
pcre2api(3), pcre2-config(3).
|
||||
|
@ -3644,7 +3696,7 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 01 April 2016
|
||||
Last updated: 01 November 2016
|
||||
Copyright (c) 1997-2016 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
@ -3689,13 +3741,22 @@ DESCRIPTION
|
|||
|
||||
If the PCRE2_AUTO_CALLOUT option bit is set when a pattern is compiled,
|
||||
PCRE2 automatically inserts callouts, all with number 255, before each
|
||||
item in the pattern except for immediately before or after a callout
|
||||
item in the pattern. For example, if PCRE2_AUTO_CALLOUT is used with
|
||||
the pattern
|
||||
|
||||
A(\d{2}|--)
|
||||
A(?C3)B
|
||||
|
||||
it is processed as if it were
|
||||
|
||||
(?C255)A(?C3)B(?C255)
|
||||
|
||||
Here is a more complicated example:
|
||||
|
||||
A(\d{2}|--)
|
||||
|
||||
With PCRE2_AUTO_CALLOUT, this pattern is processed as if it were
|
||||
|
||||
(?C255)A(?C255)((?C255)\d{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255)
|
||||
|
||||
Notice that there is a callout before and after each parenthesis and
|
||||
|
@ -3737,10 +3798,11 @@ MISSING CALLOUTS
|
|||
No match
|
||||
|
||||
This indicates that when matching [bc] fails, there is no backtracking
|
||||
into a+ and therefore the callouts that would be taken for the back-
|
||||
tracks do not occur. You can disable the auto-possessify feature by
|
||||
passing PCRE2_NO_AUTO_POSSESS to pcre2_compile(), or starting the pat-
|
||||
tern with (*NO_AUTO_POSSESS). In this case, the output changes to this:
|
||||
into a+ (because it is being treated as a++) and therefore the callouts
|
||||
that would be taken for the backtracks do not occur. You can disable
|
||||
the auto-possessify feature by passing PCRE2_NO_AUTO_POSSESS to
|
||||
pcre2_compile(), or starting the pattern with (*NO_AUTO_POSSESS). In
|
||||
this case, the output changes to this:
|
||||
|
||||
--->aaaa
|
||||
+0 ^ a+
|
||||
|
@ -3859,8 +3921,8 @@ THE CALLOUT INTERFACE
|
|||
|
||||
For a numerical callout, callout_string is NULL, and callout_number
|
||||
contains the number of the callout, in the range 0-255. This is the
|
||||
number that follows (?C for manual callouts; it is 255 for automati-
|
||||
cally generated callouts.
|
||||
number that follows (?C for callouts that part of the pattern; it is
|
||||
255 for automatically generated callouts.
|
||||
|
||||
Fields for string callouts
|
||||
|
||||
|
@ -3921,10 +3983,16 @@ THE CALLOUT INTERFACE
|
|||
the next item to be matched.
|
||||
|
||||
The next_item_length field contains the length of the next item to be
|
||||
matched in the pattern string. When the callout immediately precedes an
|
||||
alternation bar, a closing parenthesis, or the end of the pattern, the
|
||||
length is zero. When the callout precedes an opening parenthesis, the
|
||||
length is that of the entire subpattern.
|
||||
processed in the pattern string. When the callout is at the end of the
|
||||
pattern, the length is zero. When the callout precedes an opening
|
||||
parenthesis, the length includes meta characters that follow the paren-
|
||||
thesis. For example, in a callout before an assertion such as (?=ab)
|
||||
the length is 3. For an an alternation bar or a closing parenthesis,
|
||||
the length is one, unless a closing parenthesis is followed by a quan-
|
||||
tifier, in which case its length is included. (This changed in release
|
||||
10.23. In earlier releases, before an opening parenthesis the length
|
||||
was that of the entire subpattern, and before an alternation bar or a
|
||||
closing parenthesis the length was zero.)
|
||||
|
||||
The pattern_position and next_item_length fields are intended to help
|
||||
in distinguishing between different automatic callouts, which all have
|
||||
|
@ -4008,8 +4076,8 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 23 March 2015
|
||||
Copyright (c) 1997-2015 University of Cambridge.
|
||||
Last updated: 29 September 2016
|
||||
Copyright (c) 1997-2016 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
@ -4103,7 +4171,7 @@ DIFFERENCES BETWEEN PCRE2 AND PERL
|
|||
first one that is backtracked onto acts. For example, in the pattern
|
||||
A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure
|
||||
in C triggers (*PRUNE). Perl's behaviour is more complex; in many cases
|
||||
it is the same as PCRE2, but there are examples where it differs.
|
||||
it is the same as PCRE2, but there are cases where it differs.
|
||||
|
||||
11. Most backtracking verbs in assertions have their normal actions.
|
||||
They are not confined to the assertion.
|
||||
|
@ -4117,18 +4185,18 @@ DIFFERENCES BETWEEN PCRE2 AND PERL
|
|||
pattern names is not as general as Perl's. This is a consequence of the
|
||||
fact the PCRE2 works internally just with numbers, using an external
|
||||
table to translate between numbers and names. In particular, a pattern
|
||||
such as (?|(?<a>A)|(?<b)B), where the two capturing parentheses have
|
||||
such as (?|(?<a>A)|(?<b>B), where the two capturing parentheses have
|
||||
the same number but different names, is not supported, and causes an
|
||||
error at compile time. If it were allowed, it would not be possible to
|
||||
distinguish which parentheses matched, because both names map to cap-
|
||||
turing subpattern number 1. To avoid this confusing situation, an error
|
||||
is given at compile time.
|
||||
|
||||
14. Perl recognizes comments in some places that PCRE2 does not, for
|
||||
example, between the ( and ? at the start of a subpattern. If the /x
|
||||
modifier is set, Perl allows white space between ( and ? (though cur-
|
||||
rent Perls warn that this is deprecated) but PCRE2 never does, even if
|
||||
the PCRE2_EXTENDED option is set.
|
||||
14. Perl used to recognize comments in some places that PCRE2 does not,
|
||||
for example, between the ( and ? at the start of a subpattern. If the
|
||||
/x modifier is set, Perl allowed white space between ( and ? though the
|
||||
latest Perls give an error (for a while it was just deprecated). There
|
||||
may still be some cases where Perl behaves differently.
|
||||
|
||||
15. Perl, when in warning mode, gives warnings for character classes
|
||||
such as [A-\d] or [a-[:digit:]]. It then treats the hyphens as liter-
|
||||
|
@ -4152,34 +4220,39 @@ DIFFERENCES BETWEEN PCRE2 AND PERL
|
|||
different length of string. Perl requires them all to have the same
|
||||
length.
|
||||
|
||||
(b) If PCRE2_DOLLAR_ENDONLY is set and PCRE2_MULTILINE is not set, the
|
||||
(b) From PCRE2 10.23, back references to groups of fixed length are
|
||||
supported in lookbehinds, provided that there is no possibility of ref-
|
||||
erencing a non-unique number or name. Perl does not support backrefer-
|
||||
ences in lookbehinds.
|
||||
|
||||
(c) If PCRE2_DOLLAR_ENDONLY is set and PCRE2_MULTILINE is not set, the
|
||||
$ meta-character matches only at the very end of the string.
|
||||
|
||||
(c) A backslash followed by a letter with no special meaning is
|
||||
(d) A backslash followed by a letter with no special meaning is
|
||||
faulted. (Perl can be made to issue a warning.)
|
||||
|
||||
(d) If PCRE2_UNGREEDY is set, the greediness of the repetition quanti-
|
||||
(e) If PCRE2_UNGREEDY is set, the greediness of the repetition quanti-
|
||||
fiers is inverted, that is, by default they are not greedy, but if fol-
|
||||
lowed by a question mark they are.
|
||||
|
||||
(e) PCRE2_ANCHORED can be used at matching time to force a pattern to
|
||||
(f) PCRE2_ANCHORED can be used at matching time to force a pattern to
|
||||
be tried only at the first matching position in the subject string.
|
||||
|
||||
(f) The PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY,
|
||||
(g) The PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY,
|
||||
PCRE2_NOTEMPTY_ATSTART, and PCRE2_NO_AUTO_CAPTURE options have no Perl
|
||||
equivalents.
|
||||
|
||||
(g) The \R escape sequence can be restricted to match only CR, LF, or
|
||||
(h) The \R escape sequence can be restricted to match only CR, LF, or
|
||||
CRLF by the PCRE2_BSR_ANYCRLF option.
|
||||
|
||||
(h) The callout facility is PCRE2-specific.
|
||||
(i) The callout facility is PCRE2-specific.
|
||||
|
||||
(i) The partial matching facility is PCRE2-specific.
|
||||
(j) The partial matching facility is PCRE2-specific.
|
||||
|
||||
(j) The alternative matching function (pcre2_dfa_match() matches in a
|
||||
(k) The alternative matching function (pcre2_dfa_match() matches in a
|
||||
different way and is not Perl-compatible.
|
||||
|
||||
(k) PCRE2 recognizes some special sequences such as (*CR) at the start
|
||||
(l) PCRE2 recognizes some special sequences such as (*CR) at the start
|
||||
of a pattern that set overall options that cannot be changed within the
|
||||
pattern.
|
||||
|
||||
|
@ -4193,8 +4266,8 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 15 March 2015
|
||||
Copyright (c) 1997-2015 University of Cambridge.
|
||||
Last updated: 18 October 2016
|
||||
Copyright (c) 1997-2016 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
@ -4642,21 +4715,20 @@ SIZE AND OTHER LIMITATIONS
|
|||
can be no more than 65535 capturing subpatterns. There is, however, a
|
||||
limit to the depth of nesting of parenthesized subpatterns of all
|
||||
kinds. This is imposed in order to limit the amount of system stack
|
||||
used at compile time. The limit can be specified when PCRE2 is built;
|
||||
the default is 250.
|
||||
|
||||
There is a limit to the number of forward references to subsequent sub-
|
||||
patterns of around 200,000. Repeated forward references with fixed
|
||||
upper limits, for example, (?2){0,100} when subpattern number 2 is to
|
||||
the right, are included in the count. There is no limit to the number
|
||||
of backward references.
|
||||
used at compile time. The default limit can be specified when PCRE2 is
|
||||
built; the default default is 250. An application can change this limit
|
||||
by calling pcre2_set_parens_nest_limit() to set the limit in a compile
|
||||
context.
|
||||
|
||||
The maximum length of name for a named subpattern is 32 code units, and
|
||||
the maximum number of named subpatterns is 10000.
|
||||
|
||||
The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or
|
||||
(*THEN) verb is 255 for the 8-bit library and 65535 for the 16-bit and
|
||||
32-bit libraries.
|
||||
(*THEN) verb is 255 code units for the 8-bit library and 65535 code
|
||||
units for the 16-bit and 32-bit libraries.
|
||||
|
||||
The maximum length of a string argument to a callout is the largest
|
||||
number a 32-bit unsigned integer can hold.
|
||||
|
||||
|
||||
AUTHOR
|
||||
|
@ -4668,8 +4740,8 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 05 November 2015
|
||||
Copyright (c) 1997-2015 University of Cambridge.
|
||||
Last updated: 26 October 2016
|
||||
Copyright (c) 1997-2016 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
@ -5644,29 +5716,29 @@ BACKSLASH
|
|||
character (hex 40) is inverted. Thus \cA to \cZ become hex 01 to hex 1A
|
||||
(A is 41, Z is 5A), but \c{ becomes hex 3B ({ is 7B), and \c; becomes
|
||||
hex 7B (; is 3B). If the code unit following \c has a value less than
|
||||
32 or greater than 126, a compile-time error occurs. This locks out
|
||||
non-printable ASCII characters in all modes.
|
||||
32 or greater than 126, a compile-time error occurs.
|
||||
|
||||
When PCRE2 is compiled in EBCDIC mode, \a, \e, \f, \n, \r, and \t gen-
|
||||
erate the appropriate EBCDIC code values. The \c escape is processed as
|
||||
specified for Perl in the perlebcdic document. The only characters that
|
||||
are allowed after \c are A-Z, a-z, or one of @, [, \, ], ^, _, or ?.
|
||||
Any other character provokes a compile-time error. The sequence \@
|
||||
encodes character code 0; the letters (in either case) encode charac-
|
||||
ters 1-26 (hex 01 to hex 1A); [, \, ], ^, and _ encode characters 27-31
|
||||
(hex 1B to hex 1F), and \? becomes either 255 (hex FF) or 95 (hex 5F).
|
||||
Any other character provokes a compile-time error. The sequence \c@
|
||||
encodes character code 0; after \c the letters (in either case) encode
|
||||
characters 1-26 (hex 01 to hex 1A); [, \, ], ^, and _ encode characters
|
||||
27-31 (hex 1B to hex 1F), and \c? becomes either 255 (hex FF) or 95
|
||||
(hex 5F).
|
||||
|
||||
Thus, apart from \?, these escapes generate the same character code
|
||||
Thus, apart from \c?, these escapes generate the same character code
|
||||
values as they do in an ASCII environment, though the meanings of the
|
||||
values mostly differ. For example, \G always generates code value 7,
|
||||
values mostly differ. For example, \cG always generates code value 7,
|
||||
which is BEL in ASCII but DEL in EBCDIC.
|
||||
|
||||
The sequence \? generates DEL (127, hex 7F) in an ASCII environment,
|
||||
The sequence \c? generates DEL (127, hex 7F) in an ASCII environment,
|
||||
but because 127 is not a control character in EBCDIC, Perl makes it
|
||||
generate the APC character. Unfortunately, there are several variants
|
||||
of EBCDIC. In most of them the APC character has the value 255 (hex
|
||||
FF), but in the one Perl calls POSIX-BC its value is 95 (hex 5F). If
|
||||
certain other characters have POSIX-BC values, PCRE2 makes \? generate
|
||||
certain other characters have POSIX-BC values, PCRE2 makes \c? generate
|
||||
95; otherwise it generates 255.
|
||||
|
||||
After \0 up to two further octal digits are read. If there are fewer
|
||||
|
@ -5776,10 +5848,10 @@ BACKSLASH
|
|||
|
||||
Absolute and relative back references
|
||||
|
||||
The sequence \g followed by an unsigned or a negative number, option-
|
||||
ally enclosed in braces, is an absolute or relative back reference. A
|
||||
named back reference can be coded as \g{name}. Back references are dis-
|
||||
cussed later, following the discussion of parenthesized subpatterns.
|
||||
The sequence \g followed by a signed or unsigned number, optionally
|
||||
enclosed in braces, is an absolute or relative back reference. A named
|
||||
back reference can be coded as \g{name}. Back references are discussed
|
||||
later, following the discussion of parenthesized subpatterns.
|
||||
|
||||
Absolute and relative subroutine calls
|
||||
|
||||
|
@ -6404,6 +6476,18 @@ SQUARE BRACKETS AND CHARACTER CLASSES
|
|||
PCRE2_MULTILINE options is used. A class such as [^a] always matches
|
||||
one of these characters.
|
||||
|
||||
The character escape sequences \d, \D, \h, \H, \p, \P, \s, \S, \v, \V,
|
||||
\w, and \W may appear in a character class, and add the characters that
|
||||
they match to the class. For example, [\dABCDEF] matches any hexadeci-
|
||||
mal digit. In UTF modes, the PCRE2_UCP option affects the meanings of
|
||||
\d, \s, \w and their upper case partners, just as it does when they
|
||||
appear outside a character class, as described in the section entitled
|
||||
"Generic character types" above. The escape sequence \b has a different
|
||||
meaning inside a character class; it matches the backspace character.
|
||||
The sequences \B, \N, \R, and \X are not special inside a character
|
||||
class. Like any other unrecognized escape sequences, they cause an
|
||||
error.
|
||||
|
||||
The minus (hyphen) character can be used to specify a range of charac-
|
||||
ters in a character class. For example, [d-m] matches any letter
|
||||
between d and m, inclusive. If a minus character is required in a
|
||||
|
@ -6413,6 +6497,11 @@ SQUARE BRACKETS AND CHARACTER CLASSES
|
|||
example, [b-d-z] matches letters in the range b to d, a hyphen charac-
|
||||
ter, or z.
|
||||
|
||||
Perl treats a hyphen as a literal if it appears before a POSIX class
|
||||
(see below) or a character type escape such as as \d, but gives a warn-
|
||||
ing in its warning mode, as this is most likely a user error. As PCRE2
|
||||
has no facility for warning, an error is given in these cases.
|
||||
|
||||
It is not possible to have the literal character "]" as the end charac-
|
||||
ter of a range. A pattern such as [W-]46] is interpreted as a class of
|
||||
two characters ("W" and "-") followed by a literal string "46]", so it
|
||||
|
@ -6422,11 +6511,6 @@ SQUARE BRACKETS AND CHARACTER CLASSES
|
|||
The octal or hexadecimal representation of "]" can also be used to end
|
||||
a range.
|
||||
|
||||
An error is generated if a POSIX character class (see below) or an
|
||||
escape sequence other than one that defines a single character appears
|
||||
at a point where a range ending character is expected. For example,
|
||||
[z-\xff] is valid, but [A-\d] and [A-[:digit:]] are not.
|
||||
|
||||
Ranges normally include all code points between the start and end char-
|
||||
acters, inclusive. They can also be used for code points specified
|
||||
numerically, for example [\000-\037]. Ranges can include any characters
|
||||
|
@ -6446,18 +6530,6 @@ SQUARE BRACKETS AND CHARACTER CLASSES
|
|||
character tables for a French locale are in use, [\xc8-\xcb] matches
|
||||
accented E characters in both cases.
|
||||
|
||||
The character escape sequences \d, \D, \h, \H, \p, \P, \s, \S, \v, \V,
|
||||
\w, and \W may appear in a character class, and add the characters that
|
||||
they match to the class. For example, [\dABCDEF] matches any hexadeci-
|
||||
mal digit. In UTF modes, the PCRE2_UCP option affects the meanings of
|
||||
\d, \s, \w and their upper case partners, just as it does when they
|
||||
appear outside a character class, as described in the section entitled
|
||||
"Generic character types" above. The escape sequence \b has a different
|
||||
meaning inside a character class; it matches the backspace character.
|
||||
The sequences \B, \N, \R, and \X are not special inside a character
|
||||
class. Like any other unrecognized escape sequences, they cause an
|
||||
error.
|
||||
|
||||
A circumflex can conveniently be used with the upper case character
|
||||
types to specify a more restricted set of characters than the matching
|
||||
lower case type. For example, the class [^\W_] matches any letter or
|
||||
|
@ -6618,14 +6690,9 @@ INTERNAL OPTION SETTING
|
|||
|
||||
When one of these option changes occurs at top level (that is, not
|
||||
inside subpattern parentheses), the change applies to the remainder of
|
||||
the pattern that follows. If the change is placed right at the start of
|
||||
a pattern, PCRE2 extracts it into the global options (and it will
|
||||
therefore show up in data extracted by the pcre2_pattern_info() func-
|
||||
tion).
|
||||
|
||||
An option change within a subpattern (see below for a description of
|
||||
subpatterns) affects only that part of the subpattern that follows it,
|
||||
so
|
||||
the pattern that follows. An option change within a subpattern (see
|
||||
below for a description of subpatterns) affects only that part of the
|
||||
subpattern that follows it, so
|
||||
|
||||
(a(?i)b)c
|
||||
|
||||
|
@ -7140,8 +7207,8 @@ BACK REFERENCES
|
|||
|
||||
Another way of avoiding the ambiguity inherent in the use of digits
|
||||
following a backslash is to use the \g escape sequence. This escape
|
||||
must be followed by an unsigned number or a negative number, optionally
|
||||
enclosed in braces. These examples are all identical:
|
||||
must be followed by a signed or unsigned number, optionally enclosed in
|
||||
braces. These examples are all identical:
|
||||
|
||||
(ring), \1
|
||||
(ring), \g1
|
||||
|
@ -7149,7 +7216,7 @@ BACK REFERENCES
|
|||
|
||||
An unsigned number specifies an absolute reference without the ambigu-
|
||||
ity that is present in the older syntax. It is also useful when literal
|
||||
digits follow the reference. A negative number is a relative reference.
|
||||
digits follow the reference. A signed number is a relative reference.
|
||||
Consider this example:
|
||||
|
||||
(abc(def)ghi)\g{-1}
|
||||
|
@ -7161,6 +7228,10 @@ BACK REFERENCES
|
|||
are created by joining together fragments that contain references
|
||||
within themselves.
|
||||
|
||||
The sequence \g{+1} is a reference to the next capturing subpattern.
|
||||
This kind of forward reference can be useful it patterns that repeat.
|
||||
Perl does not support the use of + in this way.
|
||||
|
||||
A back reference matches whatever actually matched the capturing sub-
|
||||
pattern in the current subject string, rather than anything matching
|
||||
the subpattern itself (see "Subpatterns as subroutines" below for a way
|
||||
|
@ -7252,6 +7323,13 @@ ASSERTIONS
|
|||
assertions. (Perl sometimes, but not always, does do capturing in nega-
|
||||
tive assertions.)
|
||||
|
||||
WARNING: If a positive assertion containing one or more capturing sub-
|
||||
patterns succeeds, but failure to match later in the pattern causes
|
||||
backtracking over this assertion, the captures within the assertion are
|
||||
reset only if no higher numbered captures are already set. This is,
|
||||
unfortunately, a fundamental limitation of the current implementation;
|
||||
it may get removed in a future reworking.
|
||||
|
||||
For compatibility with Perl, most assertion subpatterns may be
|
||||
repeated; though it makes no sense to assert the same thing several
|
||||
times, the side effect of capturing parentheses may occasionally be
|
||||
|
@ -7340,15 +7418,27 @@ ASSERTIONS
|
|||
then try to match. If there are insufficient characters before the cur-
|
||||
rent position, the assertion fails.
|
||||
|
||||
In a UTF mode, PCRE2 does not allow the \C escape (which matches a sin-
|
||||
gle code unit even in a UTF mode) to appear in lookbehind assertions,
|
||||
because it makes it impossible to calculate the length of the lookbe-
|
||||
hind. The \X and \R escapes, which can match different numbers of code
|
||||
units, are also not permitted.
|
||||
In UTF-8 and UTF-16 modes, PCRE2 does not allow the \C escape (which
|
||||
matches a single code unit even in a UTF mode) to appear in lookbehind
|
||||
assertions, because it makes it impossible to calculate the length of
|
||||
the lookbehind. The \X and \R escapes, which can match different num-
|
||||
bers of code units, are never permitted in lookbehinds.
|
||||
|
||||
"Subroutine" calls (see below) such as (?2) or (?&X) are permitted in
|
||||
lookbehinds, as long as the subpattern matches a fixed-length string.
|
||||
Recursion, however, is not supported.
|
||||
However, recursion, that is, a "subroutine" call into a group that is
|
||||
already active, is not supported.
|
||||
|
||||
Perl does not support back references in lookbehinds. PCRE2 does sup-
|
||||
port them, but only if certain conditions are met. The
|
||||
PCRE2_MATCH_UNSET_BACKREF option must not be set, there must be no use
|
||||
of (?| in the pattern (it creates duplicate subpattern numbers), and if
|
||||
the back reference is by name, the name must be unique. Of course, the
|
||||
referenced subpattern must itself be of fixed length. The following
|
||||
pattern matches words containing at least two characters that begin and
|
||||
end with the same character:
|
||||
|
||||
\b(\w)\w++(?<=\1)
|
||||
|
||||
Possessive quantifiers can be used in conjunction with lookbehind
|
||||
assertions to specify efficient matching of fixed-length strings at the
|
||||
|
@ -7482,7 +7572,9 @@ CONDITIONAL SUBPATTERNS
|
|||
Perl uses the syntax (?(<name>)...) or (?('name')...) to test for a
|
||||
used subpattern by name. For compatibility with earlier versions of
|
||||
PCRE1, which had this facility before Perl, the syntax (?(name)...) is
|
||||
also recognized.
|
||||
also recognized. Note, however, that undelimited names consisting of
|
||||
the letter R followed by digits are ambiguous (see the following sec-
|
||||
tion).
|
||||
|
||||
Rewriting the above example to use a named subpattern gives this:
|
||||
|
||||
|
@ -7494,32 +7586,51 @@ CONDITIONAL SUBPATTERNS
|
|||
|
||||
Checking for pattern recursion
|
||||
|
||||
If the condition is the string (R), and there is no subpattern with the
|
||||
name R, the condition is true if a recursive call to the whole pattern
|
||||
or any subpattern has been made. If digits or a name preceded by amper-
|
||||
sand follow the letter R, for example:
|
||||
"Recursion" in this sense refers to any subroutine-like call from one
|
||||
part of the pattern to another, whether or not it is actually recur-
|
||||
sive. See the sections entitled "Recursive patterns" and "Subpatterns
|
||||
as subroutines" below for details of recursion and subpattern calls.
|
||||
|
||||
(?(R3)...) or (?(R&name)...)
|
||||
If a condition is the string (R), and there is no subpattern with the
|
||||
name R, the condition is true if matching is currently in a recursion
|
||||
or subroutine call to the whole pattern or any subpattern. If digits
|
||||
follow the letter R, and there is no subpattern with that name, the
|
||||
condition is true if the most recent call is into a subpattern with the
|
||||
given number, which must exist somewhere in the overall pattern. This
|
||||
is a contrived example that is equivalent to a+b:
|
||||
|
||||
((?(R1)a+|(?1)b))
|
||||
|
||||
However, in both cases, if there is a subpattern with a matching name,
|
||||
the condition tests for its being set, as described in the section
|
||||
above, instead of testing for recursion. For example, creating a group
|
||||
with the name R1 by adding (?<R1>) to the above pattern completely
|
||||
changes its meaning.
|
||||
|
||||
If a name preceded by ampersand follows the letter R, for example:
|
||||
|
||||
(?(R&name)...)
|
||||
|
||||
the condition is true if the most recent recursion is into a subpattern
|
||||
whose number or name is given. This condition does not check the entire
|
||||
recursion stack. If the name used in a condition of this kind is a
|
||||
of that name (which must exist within the pattern).
|
||||
|
||||
This condition does not check the entire recursion stack. It tests only
|
||||
the current level. If the name used in a condition of this kind is a
|
||||
duplicate, the test is applied to all subpatterns of the same name, and
|
||||
is true if any one of them is the most recent recursion.
|
||||
|
||||
At "top level", all these recursion test conditions are false. The
|
||||
syntax for recursive patterns is described below.
|
||||
At "top level", all these recursion test conditions are false.
|
||||
|
||||
Defining subpatterns for use by reference only
|
||||
|
||||
If the condition is the string (DEFINE), and there is no subpattern
|
||||
with the name DEFINE, the condition is always false. In this case,
|
||||
there may be only one alternative in the subpattern. It is always
|
||||
skipped if control reaches this point in the pattern; the idea of
|
||||
DEFINE is that it can be used to define subroutines that can be refer-
|
||||
enced from elsewhere. (The use of subroutines is described below.) For
|
||||
example, a pattern to match an IPv4 address such as "192.168.23.245"
|
||||
could be written like this (ignore white space and line breaks):
|
||||
If the condition is the string (DEFINE), the condition is always false,
|
||||
even if there is a group with the name DEFINE. In this case, there may
|
||||
be only one alternative in the subpattern. It is always skipped if con-
|
||||
trol reaches this point in the pattern; the idea of DEFINE is that it
|
||||
can be used to define subroutines that can be referenced from else-
|
||||
where. (The use of subroutines is described below.) For example, a pat-
|
||||
tern to match an IPv4 address such as "192.168.23.245" could be written
|
||||
like this (ignore white space and line breaks):
|
||||
|
||||
(?(DEFINE) (?<byte> 2[0-4]\d | 25[0-5] | 1\d\d | [1-9]?\d) )
|
||||
\b (?&byte) (\.(?&byte)){3} \b
|
||||
|
@ -7971,13 +8082,22 @@ BACKTRACKING CONTROL
|
|||
By default, for compatibility with Perl, a name is any sequence of
|
||||
characters that does not include a closing parenthesis. The name is not
|
||||
processed in any way, and it is not possible to include a closing
|
||||
parenthesis in the name. However, if the PCRE2_ALT_VERBNAMES option is
|
||||
set, normal backslash processing is applied to verb names and only an
|
||||
unescaped closing parenthesis terminates the name. A closing parenthe-
|
||||
sis can be included in a name either as \) or between \Q and \E. If the
|
||||
PCRE2_EXTENDED option is set, unescaped whitespace in verb names is
|
||||
skipped and #-comments are recognized, exactly as in the rest of the
|
||||
pattern.
|
||||
parenthesis in the name. This can be changed by setting the
|
||||
PCRE2_ALT_VERBNAMES option, but the result is no longer Perl-compati-
|
||||
ble.
|
||||
|
||||
When PCRE2_ALT_VERBNAMES is set, backslash processing is applied to
|
||||
verb names and only an unescaped closing parenthesis terminates the
|
||||
name. However, the only backslash items that are permitted are \Q, \E,
|
||||
and sequences such as \x{100} that define character code points. Char-
|
||||
acter type escapes such as \d are faulted.
|
||||
|
||||
A closing parenthesis can be included in a name either as \) or between
|
||||
\Q and \E. In addition to backslash processing, if the PCRE2_EXTENDED
|
||||
option is also set, unescaped whitespace in verb names is skipped, and
|
||||
#-comments are recognized, exactly as in the rest of the pattern.
|
||||
PCRE2_EXTENDED does not affect verb names unless PCRE2_ALT_VERBNAMES is
|
||||
also set.
|
||||
|
||||
The maximum length of a name is 255 in the 8-bit library and 65535 in
|
||||
the 16-bit and 32-bit libraries. If the name is empty, that is, if the
|
||||
|
@ -8367,7 +8487,7 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 20 June 2016
|
||||
Last updated: 23 October 2016
|
||||
Copyright (c) 1997-2016 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
@ -9589,6 +9709,9 @@ BACKREFERENCES
|
|||
\n reference by number (can be ambiguous)
|
||||
\gn reference by number
|
||||
\g{n} reference by number
|
||||
\g+n relative reference by number (PCRE2 extension)
|
||||
\g-n relative reference by number
|
||||
\g{+n} relative reference by number (PCRE2 extension)
|
||||
\g{-n} relative reference by number
|
||||
\k<name> reference by name (Perl)
|
||||
\k'name' reference by name (Perl)
|
||||
|
@ -9625,14 +9748,18 @@ CONDITIONAL PATTERNS
|
|||
(?(-n) relative reference condition
|
||||
(?(<name>) named reference condition (Perl)
|
||||
(?('name') named reference condition (Perl)
|
||||
(?(name) named reference condition (PCRE2)
|
||||
(?(name) named reference condition (PCRE2, deprecated)
|
||||
(?(R) overall recursion condition
|
||||
(?(Rn) specific group recursion condition
|
||||
(?(R&name) specific recursion condition
|
||||
(?(Rn) specific numbered group recursion condition
|
||||
(?(R&name) specific named group recursion condition
|
||||
(?(DEFINE) define subpattern for reference
|
||||
(?(VERSION[>]=n.m) test PCRE2 version
|
||||
(?(assert) assertion condition
|
||||
|
||||
Note the ambiguity of (?(R) and (?(Rn) which might be named reference
|
||||
conditions or recursion tests. Such a condition is interpreted as a
|
||||
reference condition if the relevant named group exists.
|
||||
|
||||
|
||||
BACKTRACKING CONTROL
|
||||
|
||||
|
@ -9684,8 +9811,8 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 16 October 2015
|
||||
Copyright (c) 1997-2015 University of Cambridge.
|
||||
Last updated: 28 September 2016
|
||||
Copyright (c) 1997-2016 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2_CODE_COPY 3 "26 February 2016" "PCRE2 10.22"
|
||||
.TH PCRE2_CODE_COPY 3 "22 November 2016" "PCRE2 10.23"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
@ -16,8 +16,9 @@ PCRE2 - Perl-compatible regular expressions (revised API)
|
|||
This function makes a copy of the memory used for a compiled pattern, excluding
|
||||
any memory used by the JIT compiler. Without a subsequent call to
|
||||
\fBpcre2_jit_compile()\fP, the copy can be used only for non-JIT matching. The
|
||||
yield of the function is NULL if \fIcode\fP is NULL or if sufficient memory
|
||||
cannot be obtained.
|
||||
pointer to the character tables is copied, not the tables themselves (see
|
||||
\fBpcre2_code_copy_with_tables()\fP). The yield of the function is NULL if
|
||||
\fIcode\fP is NULL or if sufficient memory cannot be obtained.
|
||||
.P
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
.\" HREF
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
.TH PCRE2_CODE_COPY 3 "22 November 2016" "PCRE2 10.23"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.B #include <pcre2.h>
|
||||
.PP
|
||||
.nf
|
||||
.B pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *\fIcode\fP);
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
.sp
|
||||
This function makes a copy of the memory used for a compiled pattern, excluding
|
||||
any memory used by the JIT compiler. Without a subsequent call to
|
||||
\fBpcre2_jit_compile()\fP, the copy can be used only for non-JIT matching.
|
||||
Unlike \fBpcre2_code_copy()\fP, a separate copy of the character tables is also
|
||||
made, with the new code pointing to it. This memory will be automatically freed
|
||||
when \fBpcre2_code_free()\fP is called. The yield of the function is NULL if
|
||||
\fIcode\fP is NULL or if sufficient memory cannot be obtained.
|
||||
.P
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
.\" HREF
|
||||
\fBpcre2api\fP
|
||||
.\"
|
||||
page and a description of the POSIX API in the
|
||||
.\" HREF
|
||||
\fBpcre2posix\fP
|
||||
.\"
|
||||
page.
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2API 3 "30 September 2016" "PCRE2 10.23"
|
||||
.TH PCRE2API 3 "22 November 2016" "PCRE2 10.23"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.sp
|
||||
|
@ -235,6 +235,8 @@ document for an overview of all the PCRE2 documentation.
|
|||
.nf
|
||||
.B pcre2_code *pcre2_code_copy(const pcre2_code *\fIcode\fP);
|
||||
.sp
|
||||
.B pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *\fIcode\fP);
|
||||
.sp
|
||||
.B int pcre2_get_error_message(int \fIerrorcode\fP, PCRE2_UCHAR *\fIbuffer\fP,
|
||||
.B " PCRE2_SIZE \fIbufflen\fP);"
|
||||
.sp
|
||||
|
@ -509,8 +511,9 @@ If JIT is being used, but the JIT compilation is not being done immediately,
|
|||
(perhaps waiting to see if the pattern is used often enough) similar logic is
|
||||
required. JIT compilation updates a pointer within the compiled code block, so
|
||||
a thread must gain unique write access to the pointer before calling
|
||||
\fBpcre2_jit_compile()\fP. Alternatively, \fBpcre2_code_copy()\fP can be used
|
||||
to obtain a private copy of the compiled code.
|
||||
\fBpcre2_jit_compile()\fP. Alternatively, \fBpcre2_code_copy()\fP or
|
||||
\fBpcre2_code_copy_with_tables()\fP can be used to obtain a private copy of the
|
||||
compiled code.
|
||||
.
|
||||
.
|
||||
.SS "Context blocks"
|
||||
|
@ -1027,6 +1030,8 @@ zero.
|
|||
.B void pcre2_code_free(pcre2_code *\fIcode\fP);
|
||||
.sp
|
||||
.B pcre2_code *pcre2_code_copy(const pcre2_code *\fIcode\fP);
|
||||
.sp
|
||||
.B pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *\fIcode\fP);
|
||||
.fi
|
||||
.P
|
||||
The \fBpcre2_compile()\fP function compiles a pattern into an internal form.
|
||||
|
@ -1049,9 +1054,24 @@ below),
|
|||
.\"
|
||||
the JIT information cannot be copied (because it is position-dependent).
|
||||
The new copy can initially be used only for non-JIT matching, though it can be
|
||||
passed to \fBpcre2_jit_compile()\fP if required. The \fBpcre2_code_copy()\fP
|
||||
function provides a way for individual threads in a multithreaded application
|
||||
to acquire a private copy of shared compiled code.
|
||||
passed to \fBpcre2_jit_compile()\fP if required.
|
||||
.P
|
||||
The \fBpcre2_code_copy()\fP function provides a way for individual threads in a
|
||||
multithreaded application to acquire a private copy of shared compiled code.
|
||||
However, it does not make a copy of the character tables used by the compiled
|
||||
pattern; the new pattern code points to the same tables as the original code.
|
||||
(See
|
||||
.\" HTML <a href="#jitcompiling">
|
||||
.\" </a>
|
||||
"Locale Support"
|
||||
.\"
|
||||
below for details of these character tables.) In many applications the same
|
||||
tables are used throughout, so this behaviour is appropriate. Nevertheless,
|
||||
there are occasions when a copy of a compiled pattern and the relevant tables
|
||||
are needed. The \fBpcre2_code_copy_with_tables()\fP provides this facility.
|
||||
Copies of both the code and the tables are made, with the new code pointing to
|
||||
the new tables. The memory for the new tables is automatically freed when
|
||||
\fBpcre2_code_free()\fP is called for the new copy of the compiled code.
|
||||
.P
|
||||
NOTE: When one of the matching functions is called, pointers to the compiled
|
||||
pattern and the subject string are set in the match data block so that they can
|
||||
|
@ -3299,6 +3319,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 30 September 2016
|
||||
Last updated: 22 November 2016
|
||||
Copyright (c) 1997-2016 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -51,11 +51,20 @@ DESCRIPTION
|
|||
boundary is controlled by the -N (--newline) option.
|
||||
|
||||
The amount of memory used for buffering files that are being scanned is
|
||||
controlled by a parameter that can be set by the --buffer-size option.
|
||||
The default value for this parameter is specified when pcre2grep is
|
||||
built, with the default default being 20K. A block of memory three
|
||||
times this size is used (to allow for buffering "before" and "after"
|
||||
lines). An error occurs if a line overflows the buffer.
|
||||
controlled by parameters that can be set by the --buffer-size and
|
||||
--max-buffer-size options. The first of these sets the size of buffer
|
||||
that is obtained at the start of processing. If an input file contains
|
||||
very long lines, a larger buffer may be needed; this is handled by
|
||||
automatically extending the buffer, up to the limit specified by --max-
|
||||
buffer-size. The default values for these parameters are specified when
|
||||
pcre2grep is built, with the default defaults being 20K and 1M respec-
|
||||
tively. An error occurs if a line is too long and the buffer can no
|
||||
longer be expanded.
|
||||
|
||||
The block of memory that is actually used is three times the "buffer
|
||||
size", to allow for buffering "before" and "after" lines. If the buffer
|
||||
size is too small, fewer than requested "before" and "after" lines may
|
||||
be output.
|
||||
|
||||
Patterns can be no longer than 8K or BUFSIZ bytes, whichever is the
|
||||
greater. BUFSIZ is defined in <stdio.h>. When there is more than one
|
||||
|
@ -126,28 +135,31 @@ OPTIONS
|
|||
names that start with hyphens.
|
||||
|
||||
-A number, --after-context=number
|
||||
Output number lines of context after each matching line. If
|
||||
file names and/or line numbers are being output, a hyphen
|
||||
separator is used instead of a colon for the context lines. A
|
||||
line containing "--" is output between each group of lines,
|
||||
unless they are in fact contiguous in the input file. The
|
||||
value of number is expected to be relatively small. However,
|
||||
pcre2grep guarantees to have up to 8K of following text
|
||||
available for context output.
|
||||
Output up to number lines of context after each matching
|
||||
line. Fewer lines are output if the next match or the end of
|
||||
the file is reached, or if the processing buffer size has
|
||||
been set too small. If file names and/or line numbers are
|
||||
being output, a hyphen separator is used instead of a colon
|
||||
for the context lines. A line containing "--" is output
|
||||
between each group of lines, unless they are in fact contigu-
|
||||
ous in the input file. The value of number is expected to be
|
||||
relatively small. When -c is used, -A is ignored.
|
||||
|
||||
-a, --text
|
||||
Treat binary files as text. This is equivalent to --binary-
|
||||
files=text.
|
||||
|
||||
-B number, --before-context=number
|
||||
Output number lines of context before each matching line. If
|
||||
file names and/or line numbers are being output, a hyphen
|
||||
separator is used instead of a colon for the context lines. A
|
||||
line containing "--" is output between each group of lines,
|
||||
unless they are in fact contiguous in the input file. The
|
||||
value of number is expected to be relatively small. However,
|
||||
pcre2grep guarantees to have up to 8K of preceding text
|
||||
available for context output.
|
||||
Output up to number lines of context before each matching
|
||||
line. Fewer lines are output if the previous match or the
|
||||
start of the file is within number lines, or if the process-
|
||||
ing buffer size has been set too small. If file names and/or
|
||||
line numbers are being output, a hyphen separator is used
|
||||
instead of a colon for the context lines. A line containing
|
||||
"--" is output between each group of lines, unless they are
|
||||
in fact contiguous in the input file. The value of number is
|
||||
expected to be relatively small. When -c is used, -B is
|
||||
ignored.
|
||||
|
||||
--binary-files=word
|
||||
Specify how binary files are to be processed. If the word is
|
||||
|
@ -164,8 +176,9 @@ OPTIONS
|
|||
any output or affecting the return code.
|
||||
|
||||
--buffer-size=number
|
||||
Set the parameter that controls how much memory is used for
|
||||
buffering files that are being scanned.
|
||||
Set the parameter that controls how much memory is obtained
|
||||
at the start of processing for buffering files that are being
|
||||
scanned. See also --max-buffer-size below.
|
||||
|
||||
-C number, --context=number
|
||||
Output number lines of context both before and after each
|
||||
|
@ -174,19 +187,21 @@ OPTIONS
|
|||
|
||||
-c, --count
|
||||
Do not output lines from the files that are being scanned;
|
||||
instead output the number of matches (or non-matches if -v is
|
||||
used) that would otherwise have caused lines to be shown. By
|
||||
default, this count is the same as the number of suppressed
|
||||
lines, but if the -M (multiline) option is used (without -v),
|
||||
there may be more suppressed lines than the number of
|
||||
matches.
|
||||
instead output the number of lines that would have been
|
||||
shown, either because they matched, or, if -v is set, because
|
||||
they failed to match. By default, this count is exactly the
|
||||
same as the number of lines that would have been output, but
|
||||
if the -M (multiline) option is used (without -v), there may
|
||||
be more suppressed lines than the count (that is, the number
|
||||
of matches).
|
||||
|
||||
If no lines are selected, the number zero is output. If sev-
|
||||
eral files are are being scanned, a count is output for each
|
||||
of them. However, if the --files-with-matches option is also
|
||||
used, only those files whose counts are greater than zero are
|
||||
listed. When -c is used, the -A, -B, and -C options are
|
||||
ignored.
|
||||
of them and the -t option can be used to cause a total to be
|
||||
output at the end. However, if the --files-with-matches
|
||||
option is also used, only those files whose counts are
|
||||
greater than zero are listed. When -c is used, the -A, -B,
|
||||
and -C options are ignored.
|
||||
|
||||
--colour, --color
|
||||
If this option is given without any data, it is equivalent to
|
||||
|
@ -205,13 +220,14 @@ OPTIONS
|
|||
them all.
|
||||
|
||||
The colour that is used can be specified by setting the envi-
|
||||
ronment variable PCRE2GREP_COLOUR or PCRE2GREP_COLOR. The
|
||||
value of this variable should be a string of two numbers,
|
||||
separated by a semicolon. They are copied directly into the
|
||||
control string for setting colour on a terminal, so it is
|
||||
your responsibility to ensure that they make sense. If nei-
|
||||
ther of the environment variables is set, the default is
|
||||
"1;31", which gives red.
|
||||
ronment variable PCRE2GREP_COLOUR or PCRE2GREP_COLOR. If nei-
|
||||
ther of these are set, pcre2grep looks for GREP_COLOUR or
|
||||
GREP_COLOR. The value of the variable should be a string of
|
||||
two numbers, separated by a semicolon. They are copied
|
||||
directly into the control string for setting colour on a ter-
|
||||
minal, so it is your responsibility to ensure that they make
|
||||
sense. If neither of the environment variables is set, the
|
||||
default is "1;31", which gives red.
|
||||
|
||||
-D action, --devices=action
|
||||
If an input path is not a regular file or a directory,
|
||||
|
@ -299,12 +315,12 @@ OPTIONS
|
|||
Read patterns from the file, one per line, and match them
|
||||
against each line of input. What constitutes a newline when
|
||||
reading the file is the operating system's default. The
|
||||
--newline option has no effect on this option. Trailing white
|
||||
space is removed from each line, and blank lines are ignored.
|
||||
An empty file contains no patterns and therefore matches
|
||||
nothing. See also the comments about multiple patterns versus
|
||||
a single pattern with alternatives in the description of -e
|
||||
above.
|
||||
--newline option has no effect on this option. Trailing
|
||||
white space is removed from each line, and blank lines are
|
||||
ignored. An empty file contains no patterns and therefore
|
||||
matches nothing. See also the comments about multiple pat-
|
||||
terns versus a single pattern with alternatives in the
|
||||
description of -e above.
|
||||
|
||||
If this option is given more than once, all the specified
|
||||
files are read. A data line is output if any of the patterns
|
||||
|
@ -482,25 +498,27 @@ OPTIONS
|
|||
tings are specified when the PCRE2 library is compiled, with
|
||||
the default default being 10 million.
|
||||
|
||||
--max-buffer-size=number
|
||||
This limits the expansion of the processing buffer, whose
|
||||
initial size can be set by --buffer-size. The maximum buffer
|
||||
size is silently forced to be no smaller than the starting
|
||||
buffer size.
|
||||
|
||||
-M, --multiline
|
||||
Allow patterns to match more than one line. When this option
|
||||
is given, patterns may usefully contain literal newline char-
|
||||
acters and internal occurrences of ^ and $ characters. The
|
||||
output for a successful match may consist of more than one
|
||||
line. The first is the line in which the match started, and
|
||||
the last is the line in which the match ended. If the matched
|
||||
string ends with a newline sequence the output ends at the
|
||||
end of that line.
|
||||
|
||||
When this option is set, the PCRE2 library is called in "mul-
|
||||
tiline" mode. This allows a matched string to extend past the
|
||||
end of a line and continue on one or more subsequent lines.
|
||||
However, pcre2grep still processes the input line by line.
|
||||
Once a match has been handled, scanning restarts at the
|
||||
beginning of the next line, just as it does when -M is not
|
||||
present. This means that it is possible for the second or
|
||||
subsequent lines in a multiline match to be output again as
|
||||
part of another match.
|
||||
is set, the PCRE2 library is called in "multiline" mode. This
|
||||
allows a matched string to extend past the end of a line and
|
||||
continue on one or more subsequent lines. Patterns used with
|
||||
-M may usefully contain literal newline characters and inter-
|
||||
nal occurrences of ^ and $ characters. The output for a suc-
|
||||
cessful match may consist of more than one line. The first
|
||||
line is the line in which the match started, and the last
|
||||
line is the line in which the match ended. If the matched
|
||||
string ends with a newline sequence, the output ends at the
|
||||
end of that line. If -v is set, none of the lines in a
|
||||
multi-line match are output. Once a match has been handled,
|
||||
scanning restarts at the beginning of the line after the one
|
||||
in which the match ended.
|
||||
|
||||
The newline sequence that separates multiple lines must be
|
||||
matched as part of the pattern. For example, to find the
|
||||
|
@ -517,13 +535,9 @@ OPTIONS
|
|||
|
||||
There is a limit to the number of lines that can be matched,
|
||||
imposed by the way that pcre2grep buffers the input file as
|
||||
it scans it. However, pcre2grep ensures that at least 8K
|
||||
characters or the rest of the file (whichever is the shorter)
|
||||
are available for forward matching, and similarly the previ-
|
||||
ous 8K characters (or all the previous characters, if fewer
|
||||
than 8K) are guaranteed to be available for lookbehind asser-
|
||||
tions. The -M option does not work when input is read line by
|
||||
line (see --line-buffered.)
|
||||
it scans it. With a sufficiently large processing buffer,
|
||||
this should not be a problem, but the -M option does not work
|
||||
when input is read line by line (see --line-buffered.)
|
||||
|
||||
-N newline-type, --newline=newline-type
|
||||
The PCRE2 library supports five different conventions for
|
||||
|
@ -570,14 +584,15 @@ OPTIONS
|
|||
Show only the part of the line that matched a pattern instead
|
||||
of the whole line. In this mode, no context is shown. That
|
||||
is, the -A, -B, and -C options are ignored. If there is more
|
||||
than one match in a line, each of them is shown separately.
|
||||
If -o is combined with -v (invert the sense of the match to
|
||||
find non-matching lines), no output is generated, but the
|
||||
return code is set appropriately. If the matched portion of
|
||||
the line is empty, nothing is output unless the file name or
|
||||
line number are being printed, in which case they are shown
|
||||
on an otherwise empty line. This option is mutually exclusive
|
||||
with --file-offsets and --line-offsets.
|
||||
than one match in a line, each of them is shown separately,
|
||||
on a separate line of output. If -o is combined with -v
|
||||
(invert the sense of the match to find non-matching lines),
|
||||
no output is generated, but the return code is set appropri-
|
||||
ately. If the matched portion of the line is empty, nothing
|
||||
is output unless the file name or line number are being
|
||||
printed, in which case they are shown on an otherwise empty
|
||||
line. This option is mutually exclusive with --file-offsets
|
||||
and --line-offsets.
|
||||
|
||||
-onumber, --only-matching=number
|
||||
Show only the part of the line that matched the capturing
|
||||
|
@ -593,10 +608,11 @@ OPTIONS
|
|||
put.
|
||||
|
||||
If this option is given multiple times, multiple substrings
|
||||
are output, in the order the options are given. For example,
|
||||
-o3 -o1 -o3 causes the substrings matched by capturing paren-
|
||||
theses 3 and 1 and then 3 again to be output. By default,
|
||||
there is no separator (but see the next option).
|
||||
are output for each match, in the order the options are
|
||||
given, and all on one line. For example, -o3 -o1 -o3 causes
|
||||
the substrings matched by capturing parentheses 3 and 1 and
|
||||
then 3 again to be output. By default, there is no separator
|
||||
(but see the next option).
|
||||
|
||||
--om-separator=text
|
||||
Specify a separating string for multiple occurrences of -o.
|
||||
|
@ -624,6 +640,19 @@ OPTIONS
|
|||
files. Such files are quietly skipped. However, the return
|
||||
code is still 2, even if matches were found in other files.
|
||||
|
||||
-t, --total-count
|
||||
This option is useful when scanning more than one file. If
|
||||
used on its own, -t suppresses all output except for a grand
|
||||
total number of matching lines (or non-matching lines if -v
|
||||
is used) in all the files. If -t is used with -c, a grand
|
||||
total is output except when the previous output is just one
|
||||
line. In other words, it is not output when just one file's
|
||||
count is listed. If file names are being output, the grand
|
||||
total is preceded by "TOTAL:". Otherwise, it appears as just
|
||||
another number. The -t option is ignored when used with -L
|
||||
(list files without matches), because the grand total would
|
||||
always be zero.
|
||||
|
||||
-u, --utf-8
|
||||
Operate in UTF-8 mode. This option is available only if PCRE2
|
||||
has been compiled with UTF-8 support. All patterns (including
|
||||
|
@ -650,8 +679,9 @@ OPTIONS
|
|||
-x, --line-regex, --line-regexp
|
||||
Force the patterns to be anchored (each must start matching
|
||||
at the beginning of a line) and in addition, require them to
|
||||
match entire lines. This is equivalent to having ^ and $
|
||||
characters at the start and end of each alternative top-level
|
||||
match entire lines. In multiline mode the match may be more
|
||||
than one line. This is equivalent to having \A and \Z charac-
|
||||
ters at the start and end of each alternative top-level
|
||||
branch in every pattern. This option applies only to the pat-
|
||||
terns that are matched against the contents of files; it does
|
||||
not apply to patterns specified by any of the --include or
|
||||
|
@ -822,5 +852,5 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 19 June 2016
|
||||
Last updated: 31 October 2016
|
||||
Copyright (c) 1997-2016 University of Cambridge.
|
||||
|
|
|
@ -558,6 +558,7 @@ PATTERN MODIFIERS
|
|||
pushcopy push a copy onto the stack
|
||||
stackguard=<number> test the stackguard feature
|
||||
tables=[0|1|2] select internal tables
|
||||
use_length do not zero-terminate the pattern
|
||||
utf8_input treat input as UTF-8
|
||||
|
||||
The effects of these modifiers are described in the following sections.
|
||||
|
@ -631,6 +632,16 @@ PATTERN MODIFIERS
|
|||
testing that pcre2_compile() behaves correctly in this case (it uses
|
||||
default values).
|
||||
|
||||
Specifying the pattern's length
|
||||
|
||||
By default, patterns are passed to the compiling functions as zero-ter-
|
||||
minated strings. When using the POSIX wrapper API, there is no other
|
||||
option. However, when using PCRE2's native API, patterns can be passed
|
||||
by length instead of being zero-terminated. The use_length modifier
|
||||
causes this to happen. Using a length happens automatically (whether
|
||||
or not use_length is set) when hex is set, because patterns specified
|
||||
in hexadecimal may contain binary zeros.
|
||||
|
||||
Specifying pattern characters in hexadecimal
|
||||
|
||||
The hex modifier specifies that the characters of the pattern, except
|
||||
|
@ -652,10 +663,11 @@ PATTERN MODIFIERS
|
|||
ing the delimiter within a substring. The hex and expand modifiers are
|
||||
mutually exclusive.
|
||||
|
||||
By default, pcre2test passes patterns as zero-terminated strings to
|
||||
pcre2_compile(), giving the length as PCRE2_ZERO_TERMINATED. However,
|
||||
for patterns specified with the hex modifier, the actual length of the
|
||||
pattern is passed.
|
||||
The POSIX API cannot be used with patterns specified in hexadecimal
|
||||
because they may contain binary zeros, which conflicts with regcomp()'s
|
||||
requirement for a zero-terminated string. Such patterns are always
|
||||
passed to pcre2_compile() as a string with a length, not as zero-termi-
|
||||
nated.
|
||||
|
||||
Specifying wide characters in 16-bit and 32-bit modes
|
||||
|
||||
|
@ -1589,5 +1601,5 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 02 August 2016
|
||||
Last updated: 04 November 2016
|
||||
Copyright (c) 1997-2016 University of Cambridge.
|
||||
|
|
|
@ -465,7 +465,9 @@ PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
|
|||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_code_free(pcre2_code *); \
|
||||
PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
|
||||
*pcre2_code_copy(const pcre2_code *);
|
||||
*pcre2_code_copy(const pcre2_code *); \
|
||||
PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
|
||||
*pcre2_code_copy_with_tables(const pcre2_code *);
|
||||
|
||||
|
||||
/* Functions that give information about a compiled pattern. */
|
||||
|
@ -629,6 +631,7 @@ pcre2_compile are called by application code. */
|
|||
|
||||
#define pcre2_callout_enumerate PCRE2_SUFFIX(pcre2_callout_enumerate_)
|
||||
#define pcre2_code_copy PCRE2_SUFFIX(pcre2_code_copy_)
|
||||
#define pcre2_code_copy_with_tables PCRE2_SUFFIX(pcre2_code_copy_with_tables_)
|
||||
#define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_)
|
||||
#define pcre2_compile PCRE2_SUFFIX(pcre2_compile_)
|
||||
#define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_)
|
||||
|
|
|
@ -465,7 +465,9 @@ PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
|
|||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||
pcre2_code_free(pcre2_code *); \
|
||||
PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
|
||||
*pcre2_code_copy(const pcre2_code *);
|
||||
*pcre2_code_copy(const pcre2_code *); \
|
||||
PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
|
||||
*pcre2_code_copy_with_tables(const pcre2_code *);
|
||||
|
||||
|
||||
/* Functions that give information about a compiled pattern. */
|
||||
|
@ -629,6 +631,7 @@ pcre2_compile are called by application code. */
|
|||
|
||||
#define pcre2_callout_enumerate PCRE2_SUFFIX(pcre2_callout_enumerate_)
|
||||
#define pcre2_code_copy PCRE2_SUFFIX(pcre2_code_copy_)
|
||||
#define pcre2_code_copy_with_tables PCRE2_SUFFIX(pcre2_code_copy_with_tables_)
|
||||
#define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_)
|
||||
#define pcre2_compile PCRE2_SUFFIX(pcre2_compile_)
|
||||
#define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_)
|
||||
|
|
|
@ -1042,6 +1042,45 @@ return newcode;
|
|||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Copy compiled code and character tables *
|
||||
*************************************************/
|
||||
|
||||
/* Compiled JIT code cannot be copied, so the new compiled block has no
|
||||
associated JIT data. This version of code_copy also makes a separate copy of
|
||||
the character tables. */
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_code * PCRE2_CALL_CONVENTION
|
||||
pcre2_code_copy_with_tables(const pcre2_code *code)
|
||||
{
|
||||
PCRE2_SIZE* ref_count;
|
||||
pcre2_code *newcode;
|
||||
uint8_t *newtables;
|
||||
|
||||
if (code == NULL) return NULL;
|
||||
newcode = code->memctl.malloc(code->blocksize, code->memctl.memory_data);
|
||||
if (newcode == NULL) return NULL;
|
||||
memcpy(newcode, code, code->blocksize);
|
||||
newcode->executable_jit = NULL;
|
||||
|
||||
newtables = code->memctl.malloc(tables_length + sizeof(PCRE2_SIZE),
|
||||
code->memctl.memory_data);
|
||||
if (newtables == NULL)
|
||||
{
|
||||
code->memctl.free((void *)newcode, code->memctl.memory_data);
|
||||
return NULL;
|
||||
}
|
||||
memcpy(newtables, code->tables, tables_length);
|
||||
ref_count = (PCRE2_SIZE *)(newtables + tables_length);
|
||||
*ref_count = 1;
|
||||
|
||||
newcode->tables = newtables;
|
||||
newcode->flags |= PCRE2_DEREF_TABLES;
|
||||
return newcode;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Free compiled code *
|
||||
*************************************************/
|
||||
|
|
|
@ -427,15 +427,13 @@ so many of them that they are split into two fields. */
|
|||
#define CTL_NULLCONTEXT 0x00200000u
|
||||
#define CTL_POSIX 0x00400000u
|
||||
#define CTL_POSIX_NOSUB 0x00800000u
|
||||
#define CTL_PUSH 0x01000000u
|
||||
#define CTL_PUSHCOPY 0x02000000u
|
||||
#define CTL_STARTCHAR 0x04000000u
|
||||
#define CTL_USE_LENGTH 0x08000000u /* Same word as HEXPAT */
|
||||
#define CTL_UTF8_INPUT 0x10000000u
|
||||
#define CTL_ZERO_TERMINATE 0x20000000u
|
||||
|
||||
#define CTL_NL_SET 0x40000000u /* Informational */
|
||||
#define CTL_BSR_SET 0x80000000u /* Informational */
|
||||
#define CTL_PUSH 0x01000000u /* These three must be */
|
||||
#define CTL_PUSHCOPY 0x02000000u /* all in the same */
|
||||
#define CTL_PUSHTABLESCOPY 0x04000000u /* word. */
|
||||
#define CTL_STARTCHAR 0x08000000u
|
||||
#define CTL_USE_LENGTH 0x10000000u /* Same word as HEXPAT */
|
||||
#define CTL_UTF8_INPUT 0x20000000u
|
||||
#define CTL_ZERO_TERMINATE 0x40000000u
|
||||
|
||||
/* Second control word */
|
||||
|
||||
|
@ -444,6 +442,9 @@ so many of them that they are split into two fields. */
|
|||
#define CTL2_SUBSTITUTE_UNKNOWN_UNSET 0x00000004u
|
||||
#define CTL2_SUBSTITUTE_UNSET_EMPTY 0x00000008u
|
||||
|
||||
#define CTL_NL_SET 0x40000000u /* Informational */
|
||||
#define CTL_BSR_SET 0x80000000u /* Informational */
|
||||
|
||||
/* Combinations */
|
||||
|
||||
#define CTL_DEBUG (CTL_FULLBINCODE|CTL_INFO) /* For setting */
|
||||
|
@ -608,6 +609,7 @@ static modstruct modlist[] = {
|
|||
{ "ps", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
|
||||
{ "push", MOD_PAT, MOD_CTL, CTL_PUSH, PO(control) },
|
||||
{ "pushcopy", MOD_PAT, MOD_CTL, CTL_PUSHCOPY, PO(control) },
|
||||
{ "pushtablescopy", MOD_PAT, MOD_CTL, CTL_PUSHTABLESCOPY, PO(control) },
|
||||
{ "recursion_limit", MOD_CTM, MOD_INT, 0, MO(recursion_limit) },
|
||||
{ "regerror_buffsize", MOD_PAT, MOD_INT, 0, PO(regerror_buffsize) },
|
||||
{ "replace", MOD_PND, MOD_STR, REPLACE_MODSIZE, PO(replacement) },
|
||||
|
@ -651,10 +653,10 @@ static modstruct modlist[] = {
|
|||
|
||||
#define PUSH_SUPPORTED_COMPILE_CONTROLS ( \
|
||||
CTL_BINCODE|CTL_CALLOUT_INFO|CTL_FULLBINCODE|CTL_HEXPAT|CTL_INFO| \
|
||||
CTL_JITVERIFY|CTL_MEMORY|CTL_PUSH|CTL_PUSHCOPY|CTL_BSR_SET|CTL_NL_SET| \
|
||||
CTL_JITVERIFY|CTL_MEMORY|CTL_PUSH|CTL_PUSHCOPY|CTL_PUSHTABLESCOPY| \
|
||||
CTL_USE_LENGTH)
|
||||
|
||||
#define PUSH_SUPPORTED_COMPILE_CONTROLS2 (0)
|
||||
#define PUSH_SUPPORTED_COMPILE_CONTROLS2 (CTL_BSR_SET|CTL_NL_SET)
|
||||
|
||||
/* Controls that apply only at compile time with 'push'. */
|
||||
|
||||
|
@ -664,7 +666,7 @@ static modstruct modlist[] = {
|
|||
/* Controls that are forbidden with #pop or #popcopy. */
|
||||
|
||||
#define NOTPOP_CONTROLS (CTL_HEXPAT|CTL_POSIX|CTL_POSIX_NOSUB|CTL_PUSH| \
|
||||
CTL_PUSHCOPY|CTL_USE_LENGTH)
|
||||
CTL_PUSHCOPY|CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
|
||||
|
||||
/* Pattern controls that are mutually exclusive. At present these are all in
|
||||
the first control word. Note that CTL_POSIX_NOSUB is always accompanied by
|
||||
|
@ -674,6 +676,7 @@ static uint32_t exclusive_pat_controls[] = {
|
|||
CTL_POSIX | CTL_HEXPAT,
|
||||
CTL_POSIX | CTL_PUSH,
|
||||
CTL_POSIX | CTL_PUSHCOPY,
|
||||
CTL_POSIX | CTL_PUSHTABLESCOPY,
|
||||
CTL_POSIX | CTL_USE_LENGTH,
|
||||
CTL_EXPAND | CTL_HEXPAT };
|
||||
|
||||
|
@ -973,6 +976,14 @@ are supported. */
|
|||
else \
|
||||
a = (void *)pcre2_code_copy_32(G(b,32))
|
||||
|
||||
#define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
|
||||
if (test_mode == PCRE8_MODE) \
|
||||
a = (void *)pcre2_code_copy_with_tables_8(G(b,8)); \
|
||||
else if (test_mode == PCRE16_MODE) \
|
||||
a = (void *)pcre2_code_copy_with_tables_16(G(b,16)); \
|
||||
else \
|
||||
a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
|
||||
|
||||
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
|
||||
if (test_mode == PCRE8_MODE) \
|
||||
G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g); \
|
||||
|
@ -1436,6 +1447,12 @@ the three different cases. */
|
|||
else \
|
||||
a = (void *)G(pcre2_code_copy_,BITTWO)(G(b,BITTWO))
|
||||
|
||||
#define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
|
||||
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
||||
a = (void *)G(pcre2_code_copy_with_tables_,BITONE)(G(b,BITONE)); \
|
||||
else \
|
||||
a = (void *)G(pcre2_code_copy_with_tables_,BITTWO)(G(b,BITTWO))
|
||||
|
||||
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
|
||||
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
||||
G(a,BITONE) = G(pcre2_compile_,BITONE)(G(b,BITONE),c,d,e,f,g); \
|
||||
|
@ -1773,6 +1790,7 @@ the three different cases. */
|
|||
(int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c)
|
||||
#define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,8) = pcre2_code_copy_8(b)
|
||||
#define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_8(G(b,8))
|
||||
#define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_8(G(b,8))
|
||||
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
|
||||
G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g)
|
||||
#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
|
||||
|
@ -1868,6 +1886,7 @@ the three different cases. */
|
|||
(int (*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c)
|
||||
#define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,16) = pcre2_code_copy_16(b)
|
||||
#define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_16(G(b,16))
|
||||
#define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_16(G(b,16))
|
||||
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
|
||||
G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g)
|
||||
#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
|
||||
|
@ -1963,6 +1982,7 @@ the three different cases. */
|
|||
(int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
|
||||
#define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,32) = pcre2_code_copy_32(b)
|
||||
#define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_32(G(b,32))
|
||||
#define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
|
||||
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
|
||||
G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
|
||||
#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
|
||||
|
@ -3435,8 +3455,8 @@ for (;;)
|
|||
#else
|
||||
*((uint16_t *)field) = PCRE2_BSR_UNICODE;
|
||||
#endif
|
||||
if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control &= ~CTL_BSR_SET;
|
||||
else dctl->control &= ~CTL_BSR_SET;
|
||||
if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL_BSR_SET;
|
||||
else dctl->control2 &= ~CTL_BSR_SET;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -3445,8 +3465,8 @@ for (;;)
|
|||
else if (len == 7 && strncmpic(pp, (const uint8_t *)"unicode", 7) == 0)
|
||||
*((uint16_t *)field) = PCRE2_BSR_UNICODE;
|
||||
else goto INVALID_VALUE;
|
||||
if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control |= CTL_BSR_SET;
|
||||
else dctl->control |= CTL_BSR_SET;
|
||||
if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL_BSR_SET;
|
||||
else dctl->control2 |= CTL_BSR_SET;
|
||||
}
|
||||
pp = ep;
|
||||
break;
|
||||
|
@ -3513,14 +3533,14 @@ for (;;)
|
|||
if (i == 0)
|
||||
{
|
||||
*((uint16_t *)field) = NEWLINE_DEFAULT;
|
||||
if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control &= ~CTL_NL_SET;
|
||||
else dctl->control &= ~CTL_NL_SET;
|
||||
if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL_NL_SET;
|
||||
else dctl->control2 &= ~CTL_NL_SET;
|
||||
}
|
||||
else
|
||||
{
|
||||
*((uint16_t *)field) = i;
|
||||
if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control |= CTL_NL_SET;
|
||||
else dctl->control |= CTL_NL_SET;
|
||||
if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL_NL_SET;
|
||||
else dctl->control2 |= CTL_NL_SET;
|
||||
}
|
||||
pp = ep;
|
||||
break;
|
||||
|
@ -3691,7 +3711,7 @@ Returns: nothing
|
|||
static void
|
||||
show_controls(uint32_t controls, uint32_t controls2, const char *before)
|
||||
{
|
||||
fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
before,
|
||||
((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
|
||||
((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
|
||||
|
@ -3699,7 +3719,7 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s
|
|||
((controls & CTL_ALLUSEDTEXT) != 0)? " allusedtext" : "",
|
||||
((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "",
|
||||
((controls & CTL_BINCODE) != 0)? " bincode" : "",
|
||||
((controls & CTL_BSR_SET) != 0)? " bsr" : "",
|
||||
((controls2 & CTL_BSR_SET) != 0)? " bsr" : "",
|
||||
((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "",
|
||||
((controls & CTL_CALLOUT_INFO) != 0)? " callout_info" : "",
|
||||
((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "",
|
||||
|
@ -3715,12 +3735,13 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s
|
|||
((controls & CTL_JITVERIFY) != 0)? " jitverify" : "",
|
||||
((controls & CTL_MARK) != 0)? " mark" : "",
|
||||
((controls & CTL_MEMORY) != 0)? " memory" : "",
|
||||
((controls & CTL_NL_SET) != 0)? " newline" : "",
|
||||
((controls2 & CTL_NL_SET) != 0)? " newline" : "",
|
||||
((controls & CTL_NULLCONTEXT) != 0)? " null_context" : "",
|
||||
((controls & CTL_POSIX) != 0)? " posix" : "",
|
||||
((controls & CTL_POSIX_NOSUB) != 0)? " posix_nosub" : "",
|
||||
((controls & CTL_PUSH) != 0)? " push" : "",
|
||||
((controls & CTL_PUSHCOPY) != 0)? " pushcopy" : "",
|
||||
((controls & CTL_PUSHTABLESCOPY) != 0)? " pushtablescopy" : "",
|
||||
((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
|
||||
((controls2 & CTL2_SUBSTITUTE_EXTENDED) != 0)? " substitute_extended" : "",
|
||||
((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "",
|
||||
|
@ -4061,7 +4082,7 @@ if ((pat_patctl.control & CTL_INFO) != 0)
|
|||
|
||||
if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
|
||||
|
||||
if ((pat_patctl.control & CTL_BSR_SET) != 0 ||
|
||||
if ((pat_patctl.control2 & CTL_BSR_SET) != 0 ||
|
||||
(FLD(compiled_code, flags) & PCRE2_BSR_SET) != 0)
|
||||
fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)?
|
||||
"any Unicode newline" : "CR, LF, or CRLF");
|
||||
|
@ -4930,7 +4951,7 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
|
|||
/* Handle compiling via the native interface. Controls that act later are
|
||||
ignored with "push". Replacements are locked out. */
|
||||
|
||||
if ((pat_patctl.control & (CTL_PUSH|CTL_PUSHCOPY)) != 0)
|
||||
if ((pat_patctl.control & (CTL_PUSH|CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
|
||||
{
|
||||
if (pat_patctl.replacement[0] != 0)
|
||||
{
|
||||
|
@ -5031,7 +5052,7 @@ if (test_mode == PCRE32_MODE && pbuffer32 != NULL)
|
|||
appropriate default newline setting, local_newline_default will be non-zero. We
|
||||
use this if there is no explicit newline modifier. */
|
||||
|
||||
if ((pat_patctl.control & CTL_NL_SET) == 0 && local_newline_default != 0)
|
||||
if ((pat_patctl.control2 & CTL_NL_SET) == 0 && local_newline_default != 0)
|
||||
{
|
||||
SETFLD(pat_context, newline_convention, local_newline_default);
|
||||
}
|
||||
|
@ -5163,7 +5184,7 @@ if (pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) != 0)
|
|||
/* If an explicit newline modifier was given, set the information flag in the
|
||||
pattern so that it is preserved over push/pop. */
|
||||
|
||||
if ((pat_patctl.control & CTL_NL_SET) != 0)
|
||||
if ((pat_patctl.control2 & CTL_NL_SET) != 0)
|
||||
{
|
||||
SETFLD(compiled_code, flags, FLD(compiled_code, flags) | PCRE2_NL_SET);
|
||||
}
|
||||
|
@ -5191,18 +5212,26 @@ if ((pat_patctl.control & CTL_PUSH) != 0)
|
|||
SET(compiled_code, NULL);
|
||||
}
|
||||
|
||||
/* The "pushcopy" control is similar, but pushes a copy of the pattern. This
|
||||
tests the pcre2_code_copy() function. */
|
||||
/* The "pushcopy" and "pushtablescopy" controls are similar, but push a
|
||||
copy of the pattern, the latter with a copy of its character tables. This tests
|
||||
the pcre2_code_copy() and pcre2_code_copy_with_tables() functions. */
|
||||
|
||||
if ((pat_patctl.control & CTL_PUSHCOPY) != 0)
|
||||
if ((pat_patctl.control & (CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
|
||||
{
|
||||
if (patstacknext >= PATSTACKSIZE)
|
||||
{
|
||||
fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
|
||||
return PR_ABEND;
|
||||
}
|
||||
if ((pat_patctl.control & CTL_PUSHCOPY) != 0)
|
||||
{
|
||||
PCRE2_CODE_COPY_TO_VOID(patstack[patstacknext++], compiled_code);
|
||||
}
|
||||
else
|
||||
{
|
||||
PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(patstack[patstacknext++],
|
||||
compiled_code); }
|
||||
}
|
||||
|
||||
return PR_OK;
|
||||
}
|
||||
|
|
|
@ -88,4 +88,13 @@
|
|||
|
||||
#pop should give an error
|
||||
|
||||
/abcd/pushtablescopy
|
||||
abcd
|
||||
|
||||
#popcopy
|
||||
abcd
|
||||
|
||||
#pop
|
||||
abcd
|
||||
|
||||
# End of testinput20
|
||||
|
|
|
@ -135,4 +135,16 @@ Serialization failed: error -30: patterns do not all use the same character tabl
|
|||
#pop should give an error
|
||||
** Can't pop off an empty stack
|
||||
|
||||
/abcd/pushtablescopy
|
||||
abcd
|
||||
0: abcd
|
||||
|
||||
#popcopy
|
||||
abcd
|
||||
0: abcd
|
||||
|
||||
#pop
|
||||
abcd
|
||||
0: abcd
|
||||
|
||||
# End of testinput20
|
||||
|
|
Loading…
Reference in New Issue