Add pcre2_code_copy_with_tables().
This commit is contained in:
parent
43e541adda
commit
2aec84e37e
|
@ -181,6 +181,9 @@ wrong name.
|
||||||
|
|
||||||
27. In pcre2test, give some offset information for errors in hex patterns.
|
27. In pcre2test, give some offset information for errors in hex patterns.
|
||||||
|
|
||||||
|
28. Implemented pcre2_code_copy_with_tables(), and added pushtablescopy to
|
||||||
|
pcre2test for testing it.
|
||||||
|
|
||||||
|
|
||||||
Version 10.22 29-July-2016
|
Version 10.22 29-July-2016
|
||||||
--------------------------
|
--------------------------
|
||||||
|
@ -250,7 +253,7 @@ a report of compiler warnings from Visual Studio 2013 and a few tests with
|
||||||
gcc's -Wconversion (which still throws up a lot).
|
gcc's -Wconversion (which still throws up a lot).
|
||||||
|
|
||||||
15. Implemented pcre2_code_copy(), and added pushcopy and #popcopy to pcre2test
|
15. Implemented pcre2_code_copy(), and added pushcopy and #popcopy to pcre2test
|
||||||
for testing it.
|
for testing it.
|
||||||
|
|
||||||
16. Change 66 for 10.21 introduced the use of snprintf() in PCRE2's version of
|
16. Change 66 for 10.21 introduced the use of snprintf() in PCRE2's version of
|
||||||
regerror(). When the error buffer is too small, my version of snprintf() puts a
|
regerror(). When the error buffer is too small, my version of snprintf() puts a
|
||||||
|
|
|
@ -25,6 +25,7 @@ dist_html_DATA = \
|
||||||
doc/html/pcre2.html \
|
doc/html/pcre2.html \
|
||||||
doc/html/pcre2_callout_enumerate.html \
|
doc/html/pcre2_callout_enumerate.html \
|
||||||
doc/html/pcre2_code_copy.html \
|
doc/html/pcre2_code_copy.html \
|
||||||
|
doc/html/pcre2_code_copy_with_tables.html \
|
||||||
doc/html/pcre2_code_free.html \
|
doc/html/pcre2_code_free.html \
|
||||||
doc/html/pcre2_compile.html \
|
doc/html/pcre2_compile.html \
|
||||||
doc/html/pcre2_compile_context_copy.html \
|
doc/html/pcre2_compile_context_copy.html \
|
||||||
|
@ -107,6 +108,7 @@ dist_man_MANS = \
|
||||||
doc/pcre2.3 \
|
doc/pcre2.3 \
|
||||||
doc/pcre2_callout_enumerate.3 \
|
doc/pcre2_callout_enumerate.3 \
|
||||||
doc/pcre2_code_copy.3 \
|
doc/pcre2_code_copy.3 \
|
||||||
|
doc/pcre2_code_copy_with_tables.3 \
|
||||||
doc/pcre2_code_free.3 \
|
doc/pcre2_code_free.3 \
|
||||||
doc/pcre2_compile.3 \
|
doc/pcre2_compile.3 \
|
||||||
doc/pcre2_compile_context_copy.3 \
|
doc/pcre2_compile_context_copy.3 \
|
||||||
|
|
|
@ -174,7 +174,11 @@ can skip ahead to the CMake section.
|
||||||
|
|
||||||
(11) If you want to use the pcre2grep command, compile and link
|
(11) If you want to use the pcre2grep command, compile and link
|
||||||
src/pcre2grep.c; it uses only the basic 8-bit PCRE2 library (it does not
|
src/pcre2grep.c; it uses only the basic 8-bit PCRE2 library (it does not
|
||||||
need the pcre2posix library).
|
need the pcre2posix library). If you have built the PCRE2 library with JIT
|
||||||
|
support by defining SUPPORT_JIT in src/config.h, you can also define
|
||||||
|
SUPPORT_PCRE2GREP_JIT, which causes pcre2grep to make use of JIT (unless
|
||||||
|
it is run with --no-jit). If you define SUPPORT_PCRE2GREP_JIT without
|
||||||
|
defining SUPPORT_JIT, pcre2grep does not try to make use of JIT.
|
||||||
|
|
||||||
|
|
||||||
STACK SIZE IN WINDOWS ENVIRONMENTS
|
STACK SIZE IN WINDOWS ENVIRONMENTS
|
||||||
|
@ -389,4 +393,4 @@ and executable, is in EBCDIC and native z/OS file formats and this is the
|
||||||
recommended download site.
|
recommended download site.
|
||||||
|
|
||||||
=============================
|
=============================
|
||||||
Last Updated: 16 July 2015
|
Last Updated: 13 October 2016
|
||||||
|
|
|
@ -44,7 +44,7 @@ wrappers.
|
||||||
|
|
||||||
The distribution does contain a set of C wrapper functions for the 8-bit
|
The distribution does contain a set of C wrapper functions for the 8-bit
|
||||||
library that are based on the POSIX regular expression API (see the pcre2posix
|
library that are based on the POSIX regular expression API (see the pcre2posix
|
||||||
man page). These can be found in a library called libpcre2posix. Note that this
|
man page). These can be found in a library called libpcre2-posix. Note that this
|
||||||
just provides a POSIX calling interface to PCRE2; the regular expressions
|
just provides a POSIX calling interface to PCRE2; the regular expressions
|
||||||
themselves still follow Perl syntax and semantics. The POSIX API is restricted,
|
themselves still follow Perl syntax and semantics. The POSIX API is restricted,
|
||||||
and does not give full access to all of PCRE2's facilities.
|
and does not give full access to all of PCRE2's facilities.
|
||||||
|
@ -58,8 +58,8 @@ renamed or pointed at by a link.
|
||||||
If you are using the POSIX interface to PCRE2 and there is already a POSIX
|
If you are using the POSIX interface to PCRE2 and there is already a POSIX
|
||||||
regex library installed on your system, as well as worrying about the regex.h
|
regex library installed on your system, as well as worrying about the regex.h
|
||||||
header file (as mentioned above), you must also take care when linking programs
|
header file (as mentioned above), you must also take care when linking programs
|
||||||
to ensure that they link with PCRE2's libpcre2posix library. Otherwise they may
|
to ensure that they link with PCRE2's libpcre2-posix library. Otherwise they
|
||||||
pick up the POSIX functions of the same name from the other library.
|
may pick up the POSIX functions of the same name from the other library.
|
||||||
|
|
||||||
One way of avoiding this confusion is to compile PCRE2 with the addition of
|
One way of avoiding this confusion is to compile PCRE2 with the addition of
|
||||||
-Dregcomp=PCRE2regcomp (and similarly for the other POSIX functions) to the
|
-Dregcomp=PCRE2regcomp (and similarly for the other POSIX functions) to the
|
||||||
|
@ -204,13 +204,6 @@ library. They are also documented in the pcre2build man page.
|
||||||
--enable-newline-is-crlf, --enable-newline-is-anycrlf, or
|
--enable-newline-is-crlf, --enable-newline-is-anycrlf, or
|
||||||
--enable-newline-is-any to the "configure" command, respectively.
|
--enable-newline-is-any to the "configure" command, respectively.
|
||||||
|
|
||||||
If you specify --enable-newline-is-cr or --enable-newline-is-crlf, some of
|
|
||||||
the standard tests will fail, because the lines in the test files end with
|
|
||||||
LF. Even if the files are edited to change the line endings, there are likely
|
|
||||||
to be some failures. With --enable-newline-is-anycrlf or
|
|
||||||
--enable-newline-is-any, many tests should succeed, but there may be some
|
|
||||||
failures.
|
|
||||||
|
|
||||||
. By default, the sequence \R in a pattern matches any Unicode line ending
|
. By default, the sequence \R in a pattern matches any Unicode line ending
|
||||||
sequence. This is independent of the option specifying what PCRE2 considers
|
sequence. This is independent of the option specifying what PCRE2 considers
|
||||||
to be the end of a line (see above). However, the caller of PCRE2 can
|
to be the end of a line (see above). However, the caller of PCRE2 can
|
||||||
|
@ -253,13 +246,13 @@ library. They are also documented in the pcre2build man page.
|
||||||
sizes in the pcre2stack man page.
|
sizes in the pcre2stack man page.
|
||||||
|
|
||||||
. In the 8-bit library, the default maximum compiled pattern size is around
|
. In the 8-bit library, the default maximum compiled pattern size is around
|
||||||
64K. You can increase this by adding --with-link-size=3 to the "configure"
|
64K bytes. You can increase this by adding --with-link-size=3 to the
|
||||||
command. PCRE2 then uses three bytes instead of two for offsets to different
|
"configure" command. PCRE2 then uses three bytes instead of two for offsets
|
||||||
parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is
|
to different parts of the compiled pattern. In the 16-bit library,
|
||||||
the same as --with-link-size=4, which (in both libraries) uses four-byte
|
--with-link-size=3 is the same as --with-link-size=4, which (in both
|
||||||
offsets. Increasing the internal link size reduces performance in the 8-bit
|
libraries) uses four-byte offsets. Increasing the internal link size reduces
|
||||||
and 16-bit libraries. In the 32-bit library, the link size setting is
|
performance in the 8-bit and 16-bit libraries. In the 32-bit library, the
|
||||||
ignored, as 4-byte offsets are always used.
|
link size setting is ignored, as 4-byte offsets are always used.
|
||||||
|
|
||||||
. You can build PCRE2 so that its internal match() function that is called from
|
. You can build PCRE2 so that its internal match() function that is called from
|
||||||
pcre2_match() does not call itself recursively. Instead, it uses memory
|
pcre2_match() does not call itself recursively. Instead, it uses memory
|
||||||
|
@ -339,12 +332,23 @@ library. They are also documented in the pcre2build man page.
|
||||||
|
|
||||||
Of course, the relevant libraries must be installed on your system.
|
Of course, the relevant libraries must be installed on your system.
|
||||||
|
|
||||||
. The default size (in bytes) of the internal buffer used by pcre2grep can be
|
. The default starting size (in bytes) of the internal buffer used by pcre2grep
|
||||||
set by, for example:
|
can be set by, for example:
|
||||||
|
|
||||||
--with-pcre2grep-bufsize=51200
|
--with-pcre2grep-bufsize=51200
|
||||||
|
|
||||||
The value must be a plain integer. The default is 20480.
|
The value must be a plain integer. The default is 20480. The amount of memory
|
||||||
|
used by pcre2grep is actually three times this number, to allow for "before"
|
||||||
|
and "after" lines. If very long lines are encountered, the buffer is
|
||||||
|
automatically enlarged, up to a fixed maximum size.
|
||||||
|
|
||||||
|
. The default maximum size of pcre2grep's internal buffer can be set by, for
|
||||||
|
example:
|
||||||
|
|
||||||
|
--with-pcre2grep-max-bufsize=2097152
|
||||||
|
|
||||||
|
The default is either 1048576 or the value of --with-pcre2grep-bufsize,
|
||||||
|
whichever is the larger.
|
||||||
|
|
||||||
. It is possible to compile pcre2test so that it links with the libreadline
|
. It is possible to compile pcre2test so that it links with the libreadline
|
||||||
or libedit libraries, by specifying, respectively,
|
or libedit libraries, by specifying, respectively,
|
||||||
|
@ -368,6 +372,22 @@ library. They are also documented in the pcre2build man page.
|
||||||
If you get error messages about missing functions tgetstr, tgetent, tputs,
|
If you get error messages about missing functions tgetstr, tgetent, tputs,
|
||||||
tgetflag, or tgoto, this is the problem, and linking with the ncurses library
|
tgetflag, or tgoto, this is the problem, and linking with the ncurses library
|
||||||
should fix it.
|
should fix it.
|
||||||
|
|
||||||
|
. There is a special option called --enable-fuzz-support for use by people who
|
||||||
|
want to run fuzzing tests on PCRE2. At present this applies only to the 8-bit
|
||||||
|
library. If set, it causes an extra library called libpcre2-fuzzsupport.a to
|
||||||
|
be built, but not installed. This contains a single function called
|
||||||
|
LLVMFuzzerTestOneInput() whose arguments are a pointer to a string and the
|
||||||
|
length of the string. When called, this function tries to compile the string
|
||||||
|
as a pattern, and if that succeeds, to match it. This is done both with no
|
||||||
|
options and with some random options bits that are generated from the string.
|
||||||
|
Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck to
|
||||||
|
be created. This is normally run under valgrind or used when PCRE2 is
|
||||||
|
compiled with address sanitizing enabled. It calls the fuzzing function and
|
||||||
|
outputs information about it is doing. The input strings are specified by
|
||||||
|
arguments: if an argument starts with "=" the rest of it is a literal input
|
||||||
|
string. Otherwise, it is assumed to be a file name, and the contents of the
|
||||||
|
file are the test string.
|
||||||
|
|
||||||
The "configure" script builds the following files for the basic C library:
|
The "configure" script builds the following files for the basic C library:
|
||||||
|
|
||||||
|
@ -543,7 +563,7 @@ script creates the .txt and HTML forms of the documentation from the man pages.
|
||||||
|
|
||||||
|
|
||||||
Testing PCRE2
|
Testing PCRE2
|
||||||
------------
|
-------------
|
||||||
|
|
||||||
To test the basic PCRE2 library on a Unix-like system, run the RunTest script.
|
To test the basic PCRE2 library on a Unix-like system, run the RunTest script.
|
||||||
There is another script called RunGrepTest that tests the pcre2grep command.
|
There is another script called RunGrepTest that tests the pcre2grep command.
|
||||||
|
@ -757,6 +777,7 @@ The distribution should contain the files listed below.
|
||||||
src/pcre2_xclass.c )
|
src/pcre2_xclass.c )
|
||||||
|
|
||||||
src/pcre2_printint.c debugging function that is used by pcre2test,
|
src/pcre2_printint.c debugging function that is used by pcre2test,
|
||||||
|
src/pcre2_fuzzsupport.c function for (optional) fuzzing support
|
||||||
|
|
||||||
src/config.h.in template for config.h, when built by "configure"
|
src/config.h.in template for config.h, when built by "configure"
|
||||||
src/pcre2.h.in template for pcre2.h when built by "configure"
|
src/pcre2.h.in template for pcre2.h when built by "configure"
|
||||||
|
@ -814,7 +835,7 @@ The distribution should contain the files listed below.
|
||||||
libpcre2-8.pc.in template for libpcre2-8.pc for pkg-config
|
libpcre2-8.pc.in template for libpcre2-8.pc for pkg-config
|
||||||
libpcre2-16.pc.in template for libpcre2-16.pc for pkg-config
|
libpcre2-16.pc.in template for libpcre2-16.pc for pkg-config
|
||||||
libpcre2-32.pc.in template for libpcre2-32.pc for pkg-config
|
libpcre2-32.pc.in template for libpcre2-32.pc for pkg-config
|
||||||
libpcre2posix.pc.in template for libpcre2posix.pc for pkg-config
|
libpcre2-posix.pc.in template for libpcre2-posix.pc for pkg-config
|
||||||
ltmain.sh file used to build a libtool script
|
ltmain.sh file used to build a libtool script
|
||||||
missing ) common stub for a few missing GNU programs while
|
missing ) common stub for a few missing GNU programs while
|
||||||
) installing, generated by automake
|
) installing, generated by automake
|
||||||
|
@ -845,4 +866,4 @@ The distribution should contain the files listed below.
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
Email local part: ph10
|
Email local part: ph10
|
||||||
Email domain: cam.ac.uk
|
Email domain: cam.ac.uk
|
||||||
Last updated: 01 April 2016
|
Last updated: 01 November 2016
|
||||||
|
|
|
@ -94,6 +94,9 @@ in the library.
|
||||||
<tr><td><a href="pcre2_code_copy.html">pcre2_code_copy</a></td>
|
<tr><td><a href="pcre2_code_copy.html">pcre2_code_copy</a></td>
|
||||||
<td> Copy a compiled pattern</td></tr>
|
<td> Copy a compiled pattern</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre2_code_copy_with_tables.html">pcre2_code_copy_with_tables</a></td>
|
||||||
|
<td> Copy a compiled pattern and its character tables</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre2_code_free.html">pcre2_code_free</a></td>
|
<tr><td><a href="pcre2_code_free.html">pcre2_code_free</a></td>
|
||||||
<td> Free a compiled pattern</td></tr>
|
<td> Free a compiled pattern</td></tr>
|
||||||
|
|
||||||
|
|
|
@ -28,8 +28,9 @@ DESCRIPTION
|
||||||
This function makes a copy of the memory used for a compiled pattern, excluding
|
This function makes a copy of the memory used for a compiled pattern, excluding
|
||||||
any memory used by the JIT compiler. Without a subsequent call to
|
any memory used by the JIT compiler. Without a subsequent call to
|
||||||
<b>pcre2_jit_compile()</b>, the copy can be used only for non-JIT matching. The
|
<b>pcre2_jit_compile()</b>, the copy can be used only for non-JIT matching. The
|
||||||
yield of the function is NULL if <i>code</i> is NULL or if sufficient memory
|
pointer to the character tables is copied, not the tables themselves (see
|
||||||
cannot be obtained.
|
<b>pcre2_code_copy_with_tables()</b>). The yield of the function is NULL if
|
||||||
|
<i>code</i> is NULL or if sufficient memory cannot be obtained.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
There is a complete description of the PCRE2 native API in the
|
There is a complete description of the PCRE2 native API in the
|
||||||
|
|
|
@ -0,0 +1,44 @@
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>pcre2_code_copy_with_tables specification</title>
|
||||||
|
</head>
|
||||||
|
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||||
|
<h1>pcre2_code_copy_with_tables man page</h1>
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
This page is part of the PCRE2 HTML documentation. It was generated
|
||||||
|
automatically from the original man page. If there is any nonsense in it,
|
||||||
|
please consult the man page, in case the conversion went wrong.
|
||||||
|
<br>
|
||||||
|
<br><b>
|
||||||
|
SYNOPSIS
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
<b>#include <pcre2.h></b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *<i>code</i>);</b>
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
DESCRIPTION
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
This function makes a copy of the memory used for a compiled pattern, excluding
|
||||||
|
any memory used by the JIT compiler. Without a subsequent call to
|
||||||
|
<b>pcre2_jit_compile()</b>, the copy can be used only for non-JIT matching.
|
||||||
|
Unlike <b>pcre2_code_copy()</b>, a separate copy of the character tables is also
|
||||||
|
made, with the new code pointing to it. This memory will be automatically freed
|
||||||
|
when <b>pcre2_code_free()</b> is called. The yield of the function is NULL if
|
||||||
|
<i>code</i> is NULL or if sufficient memory cannot be obtained.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
There is a complete description of the PCRE2 native API in the
|
||||||
|
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||||
|
page and a description of the POSIX API in the
|
||||||
|
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||||
|
page.
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||||
|
</p>
|
|
@ -26,8 +26,11 @@ SYNOPSIS
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
This function sets, in a compile context, the maximum length (in code units) of
|
This function sets, in a compile context, the maximum text length (in code
|
||||||
the pattern that can be compiled. The result is always zero.
|
units) of the pattern that can be compiled. The result is always zero. If a
|
||||||
|
longer pattern is passed to <b>pcre2_compile()</b> there is an immediate error
|
||||||
|
return. The default is effectively unlimited, being the largest value a
|
||||||
|
PCRE2_SIZE variable can hold.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
There is a complete description of the PCRE2 native API in the
|
There is a complete description of the PCRE2 native API in the
|
||||||
|
|
|
@ -294,6 +294,9 @@ document for an overview of all the PCRE2 documentation.
|
||||||
<b>pcre2_code *pcre2_code_copy(const pcre2_code *<i>code</i>);</b>
|
<b>pcre2_code *pcre2_code_copy(const pcre2_code *<i>code</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
|
<b>pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *<i>code</i>);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
<b>int pcre2_get_error_message(int <i>errorcode</i>, PCRE2_UCHAR *<i>buffer</i>,</b>
|
<b>int pcre2_get_error_message(int <i>errorcode</i>, PCRE2_UCHAR *<i>buffer</i>,</b>
|
||||||
<b> PCRE2_SIZE <i>bufflen</i>);</b>
|
<b> PCRE2_SIZE <i>bufflen</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
|
@ -567,8 +570,9 @@ If JIT is being used, but the JIT compilation is not being done immediately,
|
||||||
(perhaps waiting to see if the pattern is used often enough) similar logic is
|
(perhaps waiting to see if the pattern is used often enough) similar logic is
|
||||||
required. JIT compilation updates a pointer within the compiled code block, so
|
required. JIT compilation updates a pointer within the compiled code block, so
|
||||||
a thread must gain unique write access to the pointer before calling
|
a thread must gain unique write access to the pointer before calling
|
||||||
<b>pcre2_jit_compile()</b>. Alternatively, <b>pcre2_code_copy()</b> can be used
|
<b>pcre2_jit_compile()</b>. Alternatively, <b>pcre2_code_copy()</b> or
|
||||||
to obtain a private copy of the compiled code.
|
<b>pcre2_code_copy_with_tables()</b> can be used to obtain a private copy of the
|
||||||
|
compiled code.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
Context blocks
|
Context blocks
|
||||||
|
@ -736,7 +740,8 @@ functions, <i>pcre2_match()</i> and <i>pcre2_dfa_match()</i>.
|
||||||
<br>
|
<br>
|
||||||
This parameter ajusts the limit, set when PCRE2 is built (default 250), on the
|
This parameter ajusts the limit, set when PCRE2 is built (default 250), on the
|
||||||
depth of parenthesis nesting in a pattern. This limit stops rogue patterns
|
depth of parenthesis nesting in a pattern. This limit stops rogue patterns
|
||||||
using up too much system stack when being compiled.
|
using up too much system stack when being compiled. The limit applies to
|
||||||
|
parentheses of all kinds, not just capturing parentheses.
|
||||||
<b>int pcre2_set_compile_recursion_guard(pcre2_compile_context *<i>ccontext</i>,</b>
|
<b>int pcre2_set_compile_recursion_guard(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||||
<b> int (*<i>guard_function</i>)(uint32_t, void *), void *<i>user_data</i>);</b>
|
<b> int (*<i>guard_function</i>)(uint32_t, void *), void *<i>user_data</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
|
@ -1058,6 +1063,9 @@ zero.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
<b>pcre2_code *pcre2_code_copy(const pcre2_code *<i>code</i>);</b>
|
<b>pcre2_code *pcre2_code_copy(const pcre2_code *<i>code</i>);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
<b>pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *<i>code</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <b>pcre2_compile()</b> function compiles a pattern into an internal form.
|
The <b>pcre2_compile()</b> function compiles a pattern into an internal form.
|
||||||
|
@ -1079,9 +1087,22 @@ if the code has been processed by the JIT compiler (see
|
||||||
<a href="#jitcompiling">below),</a>
|
<a href="#jitcompiling">below),</a>
|
||||||
the JIT information cannot be copied (because it is position-dependent).
|
the JIT information cannot be copied (because it is position-dependent).
|
||||||
The new copy can initially be used only for non-JIT matching, though it can be
|
The new copy can initially be used only for non-JIT matching, though it can be
|
||||||
passed to <b>pcre2_jit_compile()</b> if required. The <b>pcre2_code_copy()</b>
|
passed to <b>pcre2_jit_compile()</b> if required.
|
||||||
function provides a way for individual threads in a multithreaded application
|
</P>
|
||||||
to acquire a private copy of shared compiled code.
|
<P>
|
||||||
|
The <b>pcre2_code_copy()</b> function provides a way for individual threads in a
|
||||||
|
multithreaded application to acquire a private copy of shared compiled code.
|
||||||
|
However, it does not make a copy of the character tables used by the compiled
|
||||||
|
pattern; the new pattern code points to the same tables as the original code.
|
||||||
|
(See
|
||||||
|
<a href="#jitcompiling">"Locale Support"</a>
|
||||||
|
below for details of these character tables.) In many applications the same
|
||||||
|
tables are used throughout, so this behaviour is appropriate. Nevertheless,
|
||||||
|
there are occasions when a copy of a compiled pattern and the relevant tables
|
||||||
|
are needed. The <b>pcre2_code_copy_with_tables()</b> provides this facility.
|
||||||
|
Copies of both the code and the tables are made, with the new code pointing to
|
||||||
|
the new tables. The memory for the new tables is automatically freed when
|
||||||
|
<b>pcre2_code_free()</b> is called for the new copy of the compiled code.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
NOTE: When one of the matching functions is called, pointers to the compiled
|
NOTE: When one of the matching functions is called, pointers to the compiled
|
||||||
|
@ -1119,7 +1140,14 @@ NULL immediately. Otherwise, the variables to which these point are set to an
|
||||||
error code and an offset (number of code units) within the pattern,
|
error code and an offset (number of code units) within the pattern,
|
||||||
respectively, when <b>pcre2_compile()</b> returns NULL because a compilation
|
respectively, when <b>pcre2_compile()</b> returns NULL because a compilation
|
||||||
error has occurred. The values are not defined when compilation is successful
|
error has occurred. The values are not defined when compilation is successful
|
||||||
and <b>pcre2_compile()</b> returns a non-NULL value.
|
and <b>pcre2_compile()</b> returns a non-NULL value.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The value returned in <i>erroroffset</i> is an indication of where in the
|
||||||
|
pattern the error occurred. It is not necessarily the furthest point in the
|
||||||
|
pattern that was read. For example, after the error "lookbehind assertion is
|
||||||
|
not fixed length", the error offset points to the start of the failing
|
||||||
|
assertion.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <b>pcre2_get_error_message()</b> function (see "Obtaining a textual error
|
The <b>pcre2_get_error_message()</b> function (see "Obtaining a textual error
|
||||||
|
@ -1215,8 +1243,8 @@ recognized, exactly as in the rest of the pattern.
|
||||||
PCRE2_AUTO_CALLOUT
|
PCRE2_AUTO_CALLOUT
|
||||||
</pre>
|
</pre>
|
||||||
If this bit is set, <b>pcre2_compile()</b> automatically inserts callout items,
|
If this bit is set, <b>pcre2_compile()</b> automatically inserts callout items,
|
||||||
all with number 255, before each pattern item. For discussion of the callout
|
all with number 255, before each pattern item, except immediately before or
|
||||||
facility, see the
|
after a callout in the pattern. For discussion of the callout facility, see the
|
||||||
<a href="pcre2callout.html"><b>pcre2callout</b></a>
|
<a href="pcre2callout.html"><b>pcre2callout</b></a>
|
||||||
documentation.
|
documentation.
|
||||||
<pre>
|
<pre>
|
||||||
|
@ -3235,7 +3263,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC41" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC41" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 17 June 2016
|
Last updated: 22 November 2016
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2016 University of Cambridge.
|
Copyright © 1997-2016 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -34,9 +34,10 @@ please consult the man page, in case the conversion went wrong.
|
||||||
<li><a name="TOC19" href="#SEC19">INCLUDING DEBUGGING CODE</a>
|
<li><a name="TOC19" href="#SEC19">INCLUDING DEBUGGING CODE</a>
|
||||||
<li><a name="TOC20" href="#SEC20">DEBUGGING WITH VALGRIND SUPPORT</a>
|
<li><a name="TOC20" href="#SEC20">DEBUGGING WITH VALGRIND SUPPORT</a>
|
||||||
<li><a name="TOC21" href="#SEC21">CODE COVERAGE REPORTING</a>
|
<li><a name="TOC21" href="#SEC21">CODE COVERAGE REPORTING</a>
|
||||||
<li><a name="TOC22" href="#SEC22">SEE ALSO</a>
|
<li><a name="TOC22" href="#SEC22">SUPPORT FOR FUZZERS</a>
|
||||||
<li><a name="TOC23" href="#SEC23">AUTHOR</a>
|
<li><a name="TOC23" href="#SEC23">SEE ALSO</a>
|
||||||
<li><a name="TOC24" href="#SEC24">REVISION</a>
|
<li><a name="TOC24" href="#SEC24">AUTHOR</a>
|
||||||
|
<li><a name="TOC25" href="#SEC25">REVISION</a>
|
||||||
</ul>
|
</ul>
|
||||||
<br><a name="SEC1" href="#TOC1">BUILDING PCRE2</a><br>
|
<br><a name="SEC1" href="#TOC1">BUILDING PCRE2</a><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -376,16 +377,19 @@ they are not.
|
||||||
<P>
|
<P>
|
||||||
<b>pcre2grep</b> uses an internal buffer to hold a "window" on the file it is
|
<b>pcre2grep</b> uses an internal buffer to hold a "window" on the file it is
|
||||||
scanning, in order to be able to output "before" and "after" lines when it
|
scanning, in order to be able to output "before" and "after" lines when it
|
||||||
finds a match. The size of the buffer is controlled by a parameter whose
|
finds a match. The starting size of the buffer is controlled by a parameter
|
||||||
default value is 20K. The buffer itself is three times this size, but because
|
whose default value is 20K. The buffer itself is three times this size, but
|
||||||
of the way it is used for holding "before" lines, the longest line that is
|
because of the way it is used for holding "before" lines, the longest line that
|
||||||
guaranteed to be processable is the parameter size. You can change the default
|
is guaranteed to be processable is the parameter size. If a longer line is
|
||||||
parameter value by adding, for example,
|
encountered, <b>pcre2grep</b> automatically expands the buffer, up to a
|
||||||
|
specified maximum size, whose default is 1M or the starting size, whichever is
|
||||||
|
the larger. You can change the default parameter values by adding, for example,
|
||||||
<pre>
|
<pre>
|
||||||
--with-pcre2grep-bufsize=50K
|
--with-pcre2grep-bufsize=51200
|
||||||
|
--with-pcre2grep-max-bufsize=2097152
|
||||||
</pre>
|
</pre>
|
||||||
to the <b>configure</b> command. The caller of \fPpcre2grep\fP can override this
|
to the <b>configure</b> command. The caller of \fPpcre2grep\fP can override
|
||||||
value by using --buffer-size on the command line.
|
these values by using --buffer-size and --max-buffer-size on the command line.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC18" href="#TOC1">PCRE2TEST OPTION FOR LIBREADLINE SUPPORT</a><br>
|
<br><a name="SEC18" href="#TOC1">PCRE2TEST OPTION FOR LIBREADLINE SUPPORT</a><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -497,11 +501,32 @@ This cleans all coverage data including the generated coverage report. For more
|
||||||
information about code coverage, see the <b>gcov</b> and <b>lcov</b>
|
information about code coverage, see the <b>gcov</b> and <b>lcov</b>
|
||||||
documentation.
|
documentation.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC22" href="#TOC1">SEE ALSO</a><br>
|
<br><a name="SEC22" href="#TOC1">SUPPORT FOR FUZZERS</a><br>
|
||||||
|
<P>
|
||||||
|
There is a special option for use by people who want to run fuzzing tests on
|
||||||
|
PCRE2:
|
||||||
|
<pre>
|
||||||
|
--enable-fuzz-support
|
||||||
|
</pre>
|
||||||
|
At present this applies only to the 8-bit library. If set, it causes an extra
|
||||||
|
library called libpcre2-fuzzsupport.a to be built, but not installed. This
|
||||||
|
contains a single function called LLVMFuzzerTestOneInput() whose arguments are
|
||||||
|
a pointer to a string and the length of the string. When called, this function
|
||||||
|
tries to compile the string as a pattern, and if that succeeds, to match it.
|
||||||
|
This is done both with no options and with some random options bits that are
|
||||||
|
generated from the string. Setting --enable-fuzz-support also causes a binary
|
||||||
|
called <b>pcre2fuzzcheck</b> to be created. This is normally run under valgrind
|
||||||
|
or used when PCRE2 is compiled with address sanitizing enabled. It calls the
|
||||||
|
fuzzing function and outputs information about it is doing. The input strings
|
||||||
|
are specified by arguments: if an argument starts with "=" the rest of it is a
|
||||||
|
literal input string. Otherwise, it is assumed to be a file name, and the
|
||||||
|
contents of the file are the test string.
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC23" href="#TOC1">SEE ALSO</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre2api</b>(3), <b>pcre2-config</b>(3).
|
<b>pcre2api</b>(3), <b>pcre2-config</b>(3).
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC23" href="#TOC1">AUTHOR</a><br>
|
<br><a name="SEC24" href="#TOC1">AUTHOR</a><br>
|
||||||
<P>
|
<P>
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
<br>
|
<br>
|
||||||
|
@ -510,9 +535,9 @@ University Computing Service
|
||||||
Cambridge, England.
|
Cambridge, England.
|
||||||
<br>
|
<br>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC24" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC25" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 01 April 2016
|
Last updated: 01 November 2016
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2016 University of Cambridge.
|
Copyright © 1997-2016 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -57,11 +57,20 @@ two callout points:
|
||||||
</pre>
|
</pre>
|
||||||
If the PCRE2_AUTO_CALLOUT option bit is set when a pattern is compiled, PCRE2
|
If the PCRE2_AUTO_CALLOUT option bit is set when a pattern is compiled, PCRE2
|
||||||
automatically inserts callouts, all with number 255, before each item in the
|
automatically inserts callouts, all with number 255, before each item in the
|
||||||
pattern. For example, if PCRE2_AUTO_CALLOUT is used with the pattern
|
pattern except for immediately before or after a callout item in the pattern.
|
||||||
|
For example, if PCRE2_AUTO_CALLOUT is used with the pattern
|
||||||
|
<pre>
|
||||||
|
A(?C3)B
|
||||||
|
</pre>
|
||||||
|
it is processed as if it were
|
||||||
|
<pre>
|
||||||
|
(?C255)A(?C3)B(?C255)
|
||||||
|
</pre>
|
||||||
|
Here is a more complicated example:
|
||||||
<pre>
|
<pre>
|
||||||
A(\d{2}|--)
|
A(\d{2}|--)
|
||||||
</pre>
|
</pre>
|
||||||
it is processed as if it were
|
With PCRE2_AUTO_CALLOUT, this pattern is processed as if it were
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
(?C255)A(?C255)((?C255)\d{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255)
|
(?C255)A(?C255)((?C255)\d{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255)
|
||||||
|
@ -107,10 +116,10 @@ with PCRE2_ANCHORED and PCRE2_AUTO_CALLOUT and then applied to the string
|
||||||
No match
|
No match
|
||||||
</pre>
|
</pre>
|
||||||
This indicates that when matching [bc] fails, there is no backtracking into a+
|
This indicates that when matching [bc] fails, there is no backtracking into a+
|
||||||
and therefore the callouts that would be taken for the backtracks do not occur.
|
(because it is being treated as a++) and therefore the callouts that would be
|
||||||
You can disable the auto-possessify feature by passing PCRE2_NO_AUTO_POSSESS to
|
taken for the backtracks do not occur. You can disable the auto-possessify
|
||||||
<b>pcre2_compile()</b>, or starting the pattern with (*NO_AUTO_POSSESS). In this
|
feature by passing PCRE2_NO_AUTO_POSSESS to <b>pcre2_compile()</b>, or starting
|
||||||
case, the output changes to this:
|
the pattern with (*NO_AUTO_POSSESS). In this case, the output changes to this:
|
||||||
<pre>
|
<pre>
|
||||||
--->aaaa
|
--->aaaa
|
||||||
+0 ^ a+
|
+0 ^ a+
|
||||||
|
@ -235,8 +244,8 @@ Fields for numerical callouts
|
||||||
<P>
|
<P>
|
||||||
For a numerical callout, <i>callout_string</i> is NULL, and <i>callout_number</i>
|
For a numerical callout, <i>callout_string</i> is NULL, and <i>callout_number</i>
|
||||||
contains the number of the callout, in the range 0-255. This is the number
|
contains the number of the callout, in the range 0-255. This is the number
|
||||||
that follows (?C for manual callouts; it is 255 for automatically generated
|
that follows (?C for callouts that part of the pattern; it is 255 for
|
||||||
callouts.
|
automatically generated callouts.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
Fields for string callouts
|
Fields for string callouts
|
||||||
|
@ -310,10 +319,15 @@ the next item to be matched.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <i>next_item_length</i> field contains the length of the next item to be
|
The <i>next_item_length</i> field contains the length of the next item to be
|
||||||
matched in the pattern string. When the callout immediately precedes an
|
processed in the pattern string. When the callout is at the end of the pattern,
|
||||||
alternation bar, a closing parenthesis, or the end of the pattern, the length
|
the length is zero. When the callout precedes an opening parenthesis, the
|
||||||
is zero. When the callout precedes an opening parenthesis, the length is that
|
length includes meta characters that follow the parenthesis. For example, in a
|
||||||
of the entire subpattern.
|
callout before an assertion such as (?=ab) the length is 3. For an an
|
||||||
|
alternation bar or a closing parenthesis, the length is one, unless a closing
|
||||||
|
parenthesis is followed by a quantifier, in which case its length is included.
|
||||||
|
(This changed in release 10.23. In earlier releases, before an opening
|
||||||
|
parenthesis the length was that of the entire subpattern, and before an
|
||||||
|
alternation bar or a closing parenthesis the length was zero.)
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <i>pattern_position</i> and <i>next_item_length</i> fields are intended to
|
The <i>pattern_position</i> and <i>next_item_length</i> fields are intended to
|
||||||
|
@ -399,9 +413,9 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC8" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC8" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 23 March 2015
|
Last updated: 29 September 2016
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2015 University of Cambridge.
|
Copyright © 1997-2016 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||||
|
|
|
@ -107,7 +107,7 @@ processed as anchored at the point where they are tested.
|
||||||
one that is backtracked onto acts. For example, in the pattern
|
one that is backtracked onto acts. For example, in the pattern
|
||||||
A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure in C
|
A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure in C
|
||||||
triggers (*PRUNE). Perl's behaviour is more complex; in many cases it is the
|
triggers (*PRUNE). Perl's behaviour is more complex; in many cases it is the
|
||||||
same as PCRE2, but there are examples where it differs.
|
same as PCRE2, but there are cases where it differs.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
11. Most backtracking verbs in assertions have their normal actions. They are
|
11. Most backtracking verbs in assertions have their normal actions. They are
|
||||||
|
@ -123,7 +123,7 @@ the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE2 it is set to
|
||||||
13. PCRE2's handling of duplicate subpattern numbers and duplicate subpattern
|
13. PCRE2's handling of duplicate subpattern numbers and duplicate subpattern
|
||||||
names is not as general as Perl's. This is a consequence of the fact the PCRE2
|
names is not as general as Perl's. This is a consequence of the fact the PCRE2
|
||||||
works internally just with numbers, using an external table to translate
|
works internally just with numbers, using an external table to translate
|
||||||
between numbers and names. In particular, a pattern such as (?|(?<a>A)|(?<b)B),
|
between numbers and names. In particular, a pattern such as (?|(?<a>A)|(?<b>B),
|
||||||
where the two capturing parentheses have the same number but different names,
|
where the two capturing parentheses have the same number but different names,
|
||||||
is not supported, and causes an error at compile time. If it were allowed, it
|
is not supported, and causes an error at compile time. If it were allowed, it
|
||||||
would not be possible to distinguish which parentheses matched, because both
|
would not be possible to distinguish which parentheses matched, because both
|
||||||
|
@ -131,10 +131,11 @@ names map to capturing subpattern number 1. To avoid this confusing situation,
|
||||||
an error is given at compile time.
|
an error is given at compile time.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
14. Perl recognizes comments in some places that PCRE2 does not, for example,
|
14. Perl used to recognize comments in some places that PCRE2 does not, for
|
||||||
between the ( and ? at the start of a subpattern. If the /x modifier is set,
|
example, between the ( and ? at the start of a subpattern. If the /x modifier
|
||||||
Perl allows white space between ( and ? (though current Perls warn that this is
|
is set, Perl allowed white space between ( and ? though the latest Perls give
|
||||||
deprecated) but PCRE2 never does, even if the PCRE2_EXTENDED option is set.
|
an error (for a while it was just deprecated). There may still be some cases
|
||||||
|
where Perl behaves differently.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
15. Perl, when in warning mode, gives warnings for character classes such as
|
15. Perl, when in warning mode, gives warnings for character classes such as
|
||||||
|
@ -158,45 +159,50 @@ list is with respect to Perl 5.10:
|
||||||
<br>
|
<br>
|
||||||
(a) Although lookbehind assertions in PCRE2 must match fixed length strings,
|
(a) Although lookbehind assertions in PCRE2 must match fixed length strings,
|
||||||
each alternative branch of a lookbehind assertion can match a different length
|
each alternative branch of a lookbehind assertion can match a different length
|
||||||
of string. Perl requires them all to have the same length.
|
of string. Perl requires them all to have the same length.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
(b) If PCRE2_DOLLAR_ENDONLY is set and PCRE2_MULTILINE is not set, the $
|
(b) From PCRE2 10.23, back references to groups of fixed length are supported
|
||||||
|
in lookbehinds, provided that there is no possibility of referencing a
|
||||||
|
non-unique number or name. Perl does not support backreferences in lookbehinds.
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
(c) If PCRE2_DOLLAR_ENDONLY is set and PCRE2_MULTILINE is not set, the $
|
||||||
meta-character matches only at the very end of the string.
|
meta-character matches only at the very end of the string.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
(c) A backslash followed by a letter with no special meaning is faulted. (Perl
|
(d) A backslash followed by a letter with no special meaning is faulted. (Perl
|
||||||
can be made to issue a warning.)
|
can be made to issue a warning.)
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
(d) If PCRE2_UNGREEDY is set, the greediness of the repetition quantifiers is
|
(e) If PCRE2_UNGREEDY is set, the greediness of the repetition quantifiers is
|
||||||
inverted, that is, by default they are not greedy, but if followed by a
|
inverted, that is, by default they are not greedy, but if followed by a
|
||||||
question mark they are.
|
question mark they are.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
(e) PCRE2_ANCHORED can be used at matching time to force a pattern to be tried
|
(f) PCRE2_ANCHORED can be used at matching time to force a pattern to be tried
|
||||||
only at the first matching position in the subject string.
|
only at the first matching position in the subject string.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
(f) The PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, and
|
(g) The PCRE2_NOTBOL, PCRE2_NOTEOL, PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, and
|
||||||
PCRE2_NO_AUTO_CAPTURE options have no Perl equivalents.
|
PCRE2_NO_AUTO_CAPTURE options have no Perl equivalents.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
(g) The \R escape sequence can be restricted to match only CR, LF, or CRLF
|
(h) The \R escape sequence can be restricted to match only CR, LF, or CRLF
|
||||||
by the PCRE2_BSR_ANYCRLF option.
|
by the PCRE2_BSR_ANYCRLF option.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
(h) The callout facility is PCRE2-specific.
|
(i) The callout facility is PCRE2-specific.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
(i) The partial matching facility is PCRE2-specific.
|
(j) The partial matching facility is PCRE2-specific.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
(j) The alternative matching function (<b>pcre2_dfa_match()</b> matches in a
|
(k) The alternative matching function (<b>pcre2_dfa_match()</b> matches in a
|
||||||
different way and is not Perl-compatible.
|
different way and is not Perl-compatible.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
(k) PCRE2 recognizes some special sequences such as (*CR) at the start of
|
(l) PCRE2 recognizes some special sequences such as (*CR) at the start of
|
||||||
a pattern that set overall options that cannot be changed within the pattern.
|
a pattern that set overall options that cannot be changed within the pattern.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
|
@ -214,9 +220,9 @@ Cambridge, England.
|
||||||
REVISION
|
REVISION
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 15 March 2015
|
Last updated: 18 October 2016
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2015 University of Cambridge.
|
Copyright © 1997-2016 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||||
|
|
|
@ -80,11 +80,19 @@ span line boundaries. What defines a line boundary is controlled by the
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The amount of memory used for buffering files that are being scanned is
|
The amount of memory used for buffering files that are being scanned is
|
||||||
controlled by a parameter that can be set by the <b>--buffer-size</b> option.
|
controlled by parameters that can be set by the <b>--buffer-size</b> and
|
||||||
The default value for this parameter is specified when <b>pcre2grep</b> is
|
<b>--max-buffer-size</b> options. The first of these sets the size of buffer
|
||||||
built, with the default default being 20K. A block of memory three times this
|
that is obtained at the start of processing. If an input file contains very
|
||||||
size is used (to allow for buffering "before" and "after" lines). An error
|
long lines, a larger buffer may be needed; this is handled by automatically
|
||||||
occurs if a line overflows the buffer.
|
extending the buffer, up to the limit specified by <b>--max-buffer-size</b>. The
|
||||||
|
default values for these parameters are specified when <b>pcre2grep</b> is
|
||||||
|
built, with the default defaults being 20K and 1M respectively. An error occurs
|
||||||
|
if a line is too long and the buffer can no longer be expanded.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The block of memory that is actually used is three times the "buffer size", to
|
||||||
|
allow for buffering "before" and "after" lines. If the buffer size is too
|
||||||
|
small, fewer than requested "before" and "after" lines may be output.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Patterns can be no longer than 8K or BUFSIZ bytes, whichever is the greater.
|
Patterns can be no longer than 8K or BUFSIZ bytes, whichever is the greater.
|
||||||
|
@ -155,12 +163,13 @@ processing of patterns and file names that start with hyphens.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-A</b> <i>number</i>, <b>--after-context=</b><i>number</i>
|
<b>-A</b> <i>number</i>, <b>--after-context=</b><i>number</i>
|
||||||
Output <i>number</i> lines of context after each matching line. If file names
|
Output up to <i>number</i> lines of context after each matching line. Fewer
|
||||||
and/or line numbers are being output, a hyphen separator is used instead of a
|
lines are output if the next match or the end of the file is reached, or if the
|
||||||
colon for the context lines. A line containing "--" is output between each
|
processing buffer size has been set too small. If file names and/or line
|
||||||
group of lines, unless they are in fact contiguous in the input file. The value
|
numbers are being output, a hyphen separator is used instead of a colon for the
|
||||||
of <i>number</i> is expected to be relatively small. However, <b>pcre2grep</b>
|
context lines. A line containing "--" is output between each group of lines,
|
||||||
guarantees to have up to 8K of following text available for context output.
|
unless they are in fact contiguous in the input file. The value of <i>number</i>
|
||||||
|
is expected to be relatively small. When <b>-c</b> is used, <b>-A</b> is ignored.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-a</b>, <b>--text</b>
|
<b>-a</b>, <b>--text</b>
|
||||||
|
@ -169,12 +178,14 @@ Treat binary files as text. This is equivalent to
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-B</b> <i>number</i>, <b>--before-context=</b><i>number</i>
|
<b>-B</b> <i>number</i>, <b>--before-context=</b><i>number</i>
|
||||||
Output <i>number</i> lines of context before each matching line. If file names
|
Output up to <i>number</i> lines of context before each matching line. Fewer
|
||||||
and/or line numbers are being output, a hyphen separator is used instead of a
|
lines are output if the previous match or the start of the file is within
|
||||||
colon for the context lines. A line containing "--" is output between each
|
<i>number</i> lines, or if the processing buffer size has been set too small. If
|
||||||
group of lines, unless they are in fact contiguous in the input file. The value
|
file names and/or line numbers are being output, a hyphen separator is used
|
||||||
of <i>number</i> is expected to be relatively small. However, <b>pcre2grep</b>
|
instead of a colon for the context lines. A line containing "--" is output
|
||||||
guarantees to have up to 8K of preceding text available for context output.
|
between each group of lines, unless they are in fact contiguous in the input
|
||||||
|
file. The value of <i>number</i> is expected to be relatively small. When
|
||||||
|
<b>-c</b> is used, <b>-B</b> is ignored.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>--binary-files=</b><i>word</i>
|
<b>--binary-files=</b><i>word</i>
|
||||||
|
@ -191,8 +202,9 @@ return code.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>--buffer-size=</b><i>number</i>
|
<b>--buffer-size=</b><i>number</i>
|
||||||
Set the parameter that controls how much memory is used for buffering files
|
Set the parameter that controls how much memory is obtained at the start of
|
||||||
that are being scanned.
|
processing for buffering files that are being scanned. See also
|
||||||
|
<b>--max-buffer-size</b> below.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-C</b> <i>number</i>, <b>--context=</b><i>number</i>
|
<b>-C</b> <i>number</i>, <b>--context=</b><i>number</i>
|
||||||
|
@ -202,14 +214,16 @@ This is equivalent to setting both <b>-A</b> and <b>-B</b> to the same value.
|
||||||
<P>
|
<P>
|
||||||
<b>-c</b>, <b>--count</b>
|
<b>-c</b>, <b>--count</b>
|
||||||
Do not output lines from the files that are being scanned; instead output the
|
Do not output lines from the files that are being scanned; instead output the
|
||||||
number of matches (or non-matches if <b>-v</b> is used) that would otherwise
|
number of lines that would have been shown, either because they matched, or, if
|
||||||
have caused lines to be shown. By default, this count is the same as the number
|
<b>-v</b> is set, because they failed to match. By default, this count is
|
||||||
of suppressed lines, but if the <b>-M</b> (multiline) option is used (without
|
exactly the same as the number of lines that would have been output, but if the
|
||||||
<b>-v</b>), there may be more suppressed lines than the number of matches.
|
<b>-M</b> (multiline) option is used (without <b>-v</b>), there may be more
|
||||||
|
suppressed lines than the count (that is, the number of matches).
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
If no lines are selected, the number zero is output. If several files are are
|
If no lines are selected, the number zero is output. If several files are are
|
||||||
being scanned, a count is output for each of them. However, if the
|
being scanned, a count is output for each of them and the <b>-t</b> option can
|
||||||
|
be used to cause a total to be output at the end. However, if the
|
||||||
<b>--files-with-matches</b> option is also used, only those files whose counts
|
<b>--files-with-matches</b> option is also used, only those files whose counts
|
||||||
are greater than zero are listed. When <b>-c</b> is used, the <b>-A</b>,
|
are greater than zero are listed. When <b>-c</b> is used, the <b>-A</b>,
|
||||||
<b>-B</b>, and <b>-C</b> options are ignored.
|
<b>-B</b>, and <b>-C</b> options are ignored.
|
||||||
|
@ -232,11 +246,12 @@ just one, in order to colour them all.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
The colour that is used can be specified by setting the environment variable
|
The colour that is used can be specified by setting the environment variable
|
||||||
PCRE2GREP_COLOUR or PCRE2GREP_COLOR. The value of this variable should be a
|
PCRE2GREP_COLOUR or PCRE2GREP_COLOR. If neither of these are set,
|
||||||
string of two numbers, separated by a semicolon. They are copied directly into
|
<b>pcre2grep</b> looks for GREP_COLOUR or GREP_COLOR. The value of the variable
|
||||||
the control string for setting colour on a terminal, so it is your
|
should be a string of two numbers, separated by a semicolon. They are copied
|
||||||
responsibility to ensure that they make sense. If neither of the environment
|
directly into the control string for setting colour on a terminal, so it is
|
||||||
variables is set, the default is "1;31", which gives red.
|
your responsibility to ensure that they make sense. If neither of the
|
||||||
|
environment variables is set, the default is "1;31", which gives red.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-D</b> <i>action</i>, <b>--devices=</b><i>action</i>
|
<b>-D</b> <i>action</i>, <b>--devices=</b><i>action</i>
|
||||||
|
@ -321,24 +336,24 @@ files; it does not apply to patterns specified by any of the <b>--include</b> or
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-f</b> <i>filename</i>, <b>--file=</b><i>filename</i>
|
<b>-f</b> <i>filename</i>, <b>--file=</b><i>filename</i>
|
||||||
Read patterns from the file, one per line, and match them against
|
Read patterns from the file, one per line, and match them against each line of
|
||||||
each line of input. What constitutes a newline when reading the file is the
|
input. What constitutes a newline when reading the file is the operating
|
||||||
operating system's default. The <b>--newline</b> option has no effect on this
|
system's default. The <b>--newline</b> option has no effect on this option.
|
||||||
option. Trailing white space is removed from each line, and blank lines are
|
Trailing white space is removed from each line, and blank lines are ignored. An
|
||||||
ignored. An empty file contains no patterns and therefore matches nothing. See
|
empty file contains no patterns and therefore matches nothing. See also the
|
||||||
also the comments about multiple patterns versus a single pattern with
|
comments about multiple patterns versus a single pattern with alternatives in
|
||||||
alternatives in the description of <b>-e</b> above.
|
the description of <b>-e</b> above.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
If this option is given more than once, all the specified files are
|
If this option is given more than once, all the specified files are read. A
|
||||||
read. A data line is output if any of the patterns match it. A file name can
|
data line is output if any of the patterns match it. A file name can be given
|
||||||
be given as "-" to refer to the standard input. When <b>-f</b> is used, patterns
|
as "-" to refer to the standard input. When <b>-f</b> is used, patterns
|
||||||
specified on the command line using <b>-e</b> may also be present; they are
|
specified on the command line using <b>-e</b> may also be present; they are
|
||||||
tested before the file's patterns. However, no other pattern is taken from the
|
tested before the file's patterns. However, no other pattern is taken from the
|
||||||
command line; all arguments are treated as the names of paths to be searched.
|
command line; all arguments are treated as the names of paths to be searched.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>--file-list</b>=<i>filename</i>
|
<b>--file-list</b>=<i>filename</i>
|
||||||
Read a list of files and/or directories that are to be scanned from the given
|
Read a list of files and/or directories that are to be scanned from the given
|
||||||
file, one per line. Trailing white space is removed from each line, and blank
|
file, one per line. Trailing white space is removed from each line, and blank
|
||||||
lines are ignored. These paths are processed before any that are listed on the
|
lines are ignored. These paths are processed before any that are listed on the
|
||||||
|
@ -502,22 +517,24 @@ There are no short forms for these options. The default settings are specified
|
||||||
when the PCRE2 library is compiled, with the default default being 10 million.
|
when the PCRE2 library is compiled, with the default default being 10 million.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
\fB--max-buffer-size=<i>number</i>
|
||||||
|
This limits the expansion of the processing buffer, whose initial size can be
|
||||||
|
set by <b>--buffer-size</b>. The maximum buffer size is silently forced to be no
|
||||||
|
smaller than the starting buffer size.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
<b>-M</b>, <b>--multiline</b>
|
<b>-M</b>, <b>--multiline</b>
|
||||||
Allow patterns to match more than one line. When this option is given, patterns
|
Allow patterns to match more than one line. When this option is set, the PCRE2
|
||||||
may usefully contain literal newline characters and internal occurrences of ^
|
library is called in "multiline" mode. This allows a matched string to extend
|
||||||
and $ characters. The output for a successful match may consist of more than
|
past the end of a line and continue on one or more subsequent lines. Patterns
|
||||||
one line. The first is the line in which the match started, and the last is the
|
used with <b>-M</b> may usefully contain literal newline characters and internal
|
||||||
line in which the match ended. If the matched string ends with a newline
|
occurrences of ^ and $ characters. The output for a successful match may
|
||||||
sequence the output ends at the end of that line.
|
consist of more than one line. The first line is the line in which the match
|
||||||
<br>
|
started, and the last line is the line in which the match ended. If the matched
|
||||||
<br>
|
string ends with a newline sequence, the output ends at the end of that line.
|
||||||
When this option is set, the PCRE2 library is called in "multiline" mode. This
|
If <b>-v</b> is set, none of the lines in a multi-line match are output. Once a
|
||||||
allows a matched string to extend past the end of a line and continue on one or
|
match has been handled, scanning restarts at the beginning of the line after
|
||||||
more subsequent lines. However, <b>pcre2grep</b> still processes the input line
|
the one in which the match ended.
|
||||||
by line. Once a match has been handled, scanning restarts at the beginning of
|
|
||||||
the next line, just as it does when <b>-M</b> is not present. This means that it
|
|
||||||
is possible for the second or subsequent lines in a multiline match to be
|
|
||||||
output again as part of another match.
|
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
The newline sequence that separates multiple lines must be matched as part of
|
The newline sequence that separates multiple lines must be matched as part of
|
||||||
|
@ -533,11 +550,8 @@ well as possibly handling a two-character newline sequence.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
There is a limit to the number of lines that can be matched, imposed by the way
|
There is a limit to the number of lines that can be matched, imposed by the way
|
||||||
that <b>pcre2grep</b> buffers the input file as it scans it. However,
|
that <b>pcre2grep</b> buffers the input file as it scans it. With a sufficiently
|
||||||
<b>pcre2grep</b> ensures that at least 8K characters or the rest of the file
|
large processing buffer, this should not be a problem, but the <b>-M</b> option
|
||||||
(whichever is the shorter) are available for forward matching, and similarly
|
|
||||||
the previous 8K characters (or all the previous characters, if fewer than 8K)
|
|
||||||
are guaranteed to be available for lookbehind assertions. The <b>-M</b> option
|
|
||||||
does not work when input is read line by line (see \fP--line-buffered\fP.)
|
does not work when input is read line by line (see \fP--line-buffered\fP.)
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -585,12 +599,13 @@ It should never be needed in normal use.
|
||||||
Show only the part of the line that matched a pattern instead of the whole
|
Show only the part of the line that matched a pattern instead of the whole
|
||||||
line. In this mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>, and
|
line. In this mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>, and
|
||||||
<b>-C</b> options are ignored. If there is more than one match in a line, each
|
<b>-C</b> options are ignored. If there is more than one match in a line, each
|
||||||
of them is shown separately. If <b>-o</b> is combined with <b>-v</b> (invert the
|
of them is shown separately, on a separate line of output. If <b>-o</b> is
|
||||||
sense of the match to find non-matching lines), no output is generated, but the
|
combined with <b>-v</b> (invert the sense of the match to find non-matching
|
||||||
return code is set appropriately. If the matched portion of the line is empty,
|
lines), no output is generated, but the return code is set appropriately. If
|
||||||
nothing is output unless the file name or line number are being printed, in
|
the matched portion of the line is empty, nothing is output unless the file
|
||||||
which case they are shown on an otherwise empty line. This option is mutually
|
name or line number are being printed, in which case they are shown on an
|
||||||
exclusive with <b>--file-offsets</b> and <b>--line-offsets</b>.
|
otherwise empty line. This option is mutually exclusive with
|
||||||
|
<b>--file-offsets</b> and <b>--line-offsets</b>.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-o</b><i>number</i>, <b>--only-matching</b>=<i>number</i>
|
<b>-o</b><i>number</i>, <b>--only-matching</b>=<i>number</i>
|
||||||
|
@ -604,10 +619,11 @@ capturing parentheses do not exist in the pattern, or were not set in the
|
||||||
match, nothing is output unless the file name or line number are being output.
|
match, nothing is output unless the file name or line number are being output.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
If this option is given multiple times, multiple substrings are output, in the
|
If this option is given multiple times, multiple substrings are output for each
|
||||||
order the options are given. For example, -o3 -o1 -o3 causes the substrings
|
match, in the order the options are given, and all on one line. For example,
|
||||||
matched by capturing parentheses 3 and 1 and then 3 again to be output. By
|
-o3 -o1 -o3 causes the substrings matched by capturing parentheses 3 and 1 and
|
||||||
default, there is no separator (but see the next option).
|
then 3 again to be output. By default, there is no separator (but see the next
|
||||||
|
option).
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>--om-separator</b>=<i>text</i>
|
<b>--om-separator</b>=<i>text</i>
|
||||||
|
@ -638,6 +654,18 @@ quietly skipped. However, the return code is still 2, even if matches were
|
||||||
found in other files.
|
found in other files.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
<b>-t</b>, <b>--total-count</b>
|
||||||
|
This option is useful when scanning more than one file. If used on its own,
|
||||||
|
<b>-t</b> suppresses all output except for a grand total number of matching
|
||||||
|
lines (or non-matching lines if <b>-v</b> is used) in all the files. If <b>-t</b>
|
||||||
|
is used with <b>-c</b>, a grand total is output except when the previous output
|
||||||
|
is just one line. In other words, it is not output when just one file's count
|
||||||
|
is listed. If file names are being output, the grand total is preceded by
|
||||||
|
"TOTAL:". Otherwise, it appears as just another number. The <b>-t</b> option is
|
||||||
|
ignored when used with <b>-L</b> (list files without matches), because the grand
|
||||||
|
total would always be zero.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
<b>-u</b>, <b>--utf-8</b>
|
<b>-u</b>, <b>--utf-8</b>
|
||||||
Operate in UTF-8 mode. This option is available only if PCRE2 has been compiled
|
Operate in UTF-8 mode. This option is available only if PCRE2 has been compiled
|
||||||
with UTF-8 support. All patterns (including those for any <b>--exclude</b> and
|
with UTF-8 support. All patterns (including those for any <b>--exclude</b> and
|
||||||
|
@ -665,11 +693,12 @@ specified by any of the <b>--include</b> or <b>--exclude</b> options.
|
||||||
<P>
|
<P>
|
||||||
<b>-x</b>, <b>--line-regex</b>, <b>--line-regexp</b>
|
<b>-x</b>, <b>--line-regex</b>, <b>--line-regexp</b>
|
||||||
Force the patterns to be anchored (each must start matching at the beginning of
|
Force the patterns to be anchored (each must start matching at the beginning of
|
||||||
a line) and in addition, require them to match entire lines. This is equivalent
|
a line) and in addition, require them to match entire lines. In multiline mode
|
||||||
to having ^ and $ characters at the start and end of each alternative top-level
|
the match may be more than one line. This is equivalent to having \A and \Z
|
||||||
branch in every pattern. This option applies only to the patterns that are
|
characters at the start and end of each alternative top-level branch in every
|
||||||
matched against the contents of files; it does not apply to patterns specified
|
pattern. This option applies only to the patterns that are matched against the
|
||||||
by any of the <b>--include</b> or <b>--exclude</b> options.
|
contents of files; it does not apply to patterns specified by any of the
|
||||||
|
<b>--include</b> or <b>--exclude</b> options.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC6" href="#TOC1">ENVIRONMENT VARIABLES</a><br>
|
<br><a name="SEC6" href="#TOC1">ENVIRONMENT VARIABLES</a><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -831,7 +860,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC15" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC15" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 19 June 2016
|
Last updated: 31 October 2016
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2016 University of Cambridge.
|
Copyright © 1997-2016 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -61,14 +61,10 @@ The maximum length of a lookbehind assertion is 65535 characters.
|
||||||
There is no limit to the number of parenthesized subpatterns, but there can be
|
There is no limit to the number of parenthesized subpatterns, but there can be
|
||||||
no more than 65535 capturing subpatterns. There is, however, a limit to the
|
no more than 65535 capturing subpatterns. There is, however, a limit to the
|
||||||
depth of nesting of parenthesized subpatterns of all kinds. This is imposed in
|
depth of nesting of parenthesized subpatterns of all kinds. This is imposed in
|
||||||
order to limit the amount of system stack used at compile time. The limit can
|
order to limit the amount of system stack used at compile time. The default
|
||||||
be specified when PCRE2 is built; the default is 250.
|
limit can be specified when PCRE2 is built; the default default is 250. An
|
||||||
</P>
|
application can change this limit by calling pcre2_set_parens_nest_limit() to
|
||||||
<P>
|
set the limit in a compile context.
|
||||||
There is a limit to the number of forward references to subsequent subpatterns
|
|
||||||
of around 200,000. Repeated forward references with fixed upper limits, for
|
|
||||||
example, (?2){0,100} when subpattern number 2 is to the right, are included in
|
|
||||||
the count. There is no limit to the number of backward references.
|
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The maximum length of name for a named subpattern is 32 code units, and the
|
The maximum length of name for a named subpattern is 32 code units, and the
|
||||||
|
@ -76,7 +72,12 @@ maximum number of named subpatterns is 10000.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb
|
The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb
|
||||||
is 255 for the 8-bit library and 65535 for the 16-bit and 32-bit libraries.
|
is 255 code units for the 8-bit library and 65535 code units for the 16-bit and
|
||||||
|
32-bit libraries.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The maximum length of a string argument to a callout is the largest number a
|
||||||
|
32-bit unsigned integer can hold.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
AUTHOR
|
AUTHOR
|
||||||
|
@ -93,9 +94,9 @@ Cambridge, England.
|
||||||
REVISION
|
REVISION
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 05 November 2015
|
Last updated: 26 October 2016
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2015 University of Cambridge.
|
Copyright © 1997-2016 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||||
|
|
|
@ -379,32 +379,31 @@ case letter, it is converted to upper case. Then bit 6 of the character (hex
|
||||||
40) is inverted. Thus \cA to \cZ become hex 01 to hex 1A (A is 41, Z is 5A),
|
40) is inverted. Thus \cA to \cZ become hex 01 to hex 1A (A is 41, Z is 5A),
|
||||||
but \c{ becomes hex 3B ({ is 7B), and \c; becomes hex 7B (; is 3B). If the
|
but \c{ becomes hex 3B ({ is 7B), and \c; becomes hex 7B (; is 3B). If the
|
||||||
code unit following \c has a value less than 32 or greater than 126, a
|
code unit following \c has a value less than 32 or greater than 126, a
|
||||||
compile-time error occurs. This locks out non-printable ASCII characters in all
|
compile-time error occurs.
|
||||||
modes.
|
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
When PCRE2 is compiled in EBCDIC mode, \a, \e, \f, \n, \r, and \t
|
When PCRE2 is compiled in EBCDIC mode, \a, \e, \f, \n, \r, and \t
|
||||||
generate the appropriate EBCDIC code values. The \c escape is processed
|
generate the appropriate EBCDIC code values. The \c escape is processed
|
||||||
as specified for Perl in the <b>perlebcdic</b> document. The only characters
|
as specified for Perl in the <b>perlebcdic</b> document. The only characters
|
||||||
that are allowed after \c are A-Z, a-z, or one of @, [, \, ], ^, _, or ?. Any
|
that are allowed after \c are A-Z, a-z, or one of @, [, \, ], ^, _, or ?. Any
|
||||||
other character provokes a compile-time error. The sequence \@ encodes
|
other character provokes a compile-time error. The sequence \c@ encodes
|
||||||
character code 0; the letters (in either case) encode characters 1-26 (hex 01
|
character code 0; after \c the letters (in either case) encode characters 1-26
|
||||||
to hex 1A); [, \, ], ^, and _ encode characters 27-31 (hex 1B to hex 1F), and
|
(hex 01 to hex 1A); [, \, ], ^, and _ encode characters 27-31 (hex 1B to hex
|
||||||
\? becomes either 255 (hex FF) or 95 (hex 5F).
|
1F), and \c? becomes either 255 (hex FF) or 95 (hex 5F).
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Thus, apart from \?, these escapes generate the same character code values as
|
Thus, apart from \c?, these escapes generate the same character code values as
|
||||||
they do in an ASCII environment, though the meanings of the values mostly
|
they do in an ASCII environment, though the meanings of the values mostly
|
||||||
differ. For example, \G always generates code value 7, which is BEL in ASCII
|
differ. For example, \cG always generates code value 7, which is BEL in ASCII
|
||||||
but DEL in EBCDIC.
|
but DEL in EBCDIC.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The sequence \? generates DEL (127, hex 7F) in an ASCII environment, but
|
The sequence \c? generates DEL (127, hex 7F) in an ASCII environment, but
|
||||||
because 127 is not a control character in EBCDIC, Perl makes it generate the
|
because 127 is not a control character in EBCDIC, Perl makes it generate the
|
||||||
APC character. Unfortunately, there are several variants of EBCDIC. In most of
|
APC character. Unfortunately, there are several variants of EBCDIC. In most of
|
||||||
them the APC character has the value 255 (hex FF), but in the one Perl calls
|
them the APC character has the value 255 (hex FF), but in the one Perl calls
|
||||||
POSIX-BC its value is 95 (hex 5F). If certain other characters have POSIX-BC
|
POSIX-BC its value is 95 (hex 5F). If certain other characters have POSIX-BC
|
||||||
values, PCRE2 makes \? generate 95; otherwise it generates 255.
|
values, PCRE2 makes \c? generate 95; otherwise it generates 255.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
After \0 up to two further octal digits are read. If there are fewer than two
|
After \0 up to two further octal digits are read. If there are fewer than two
|
||||||
|
@ -526,9 +525,9 @@ by code point, as described in the previous section.
|
||||||
Absolute and relative back references
|
Absolute and relative back references
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
The sequence \g followed by an unsigned or a negative number, optionally
|
The sequence \g followed by a signed or unsigned number, optionally enclosed
|
||||||
enclosed in braces, is an absolute or relative back reference. A named back
|
in braces, is an absolute or relative back reference. A named back reference
|
||||||
reference can be coded as \g{name}. Back references are discussed
|
can be coded as \g{name}. Back references are discussed
|
||||||
<a href="#backreferences">later,</a>
|
<a href="#backreferences">later,</a>
|
||||||
following the discussion of
|
following the discussion of
|
||||||
<a href="#subpattern">parenthesized subpatterns.</a>
|
<a href="#subpattern">parenthesized subpatterns.</a>
|
||||||
|
@ -1326,13 +1325,32 @@ whatever setting of the PCRE2_DOTALL and PCRE2_MULTILINE options is used. A
|
||||||
class such as [^a] always matches one of these characters.
|
class such as [^a] always matches one of these characters.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
The character escape sequences \d, \D, \h, \H, \p, \P, \s, \S, \v,
|
||||||
|
\V, \w, and \W may appear in a character class, and add the characters that
|
||||||
|
they match to the class. For example, [\dABCDEF] matches any hexadecimal
|
||||||
|
digit. In UTF modes, the PCRE2_UCP option affects the meanings of \d, \s, \w
|
||||||
|
and their upper case partners, just as it does when they appear outside a
|
||||||
|
character class, as described in the section entitled
|
||||||
|
<a href="#genericchartypes">"Generic character types"</a>
|
||||||
|
above. The escape sequence \b has a different meaning inside a character
|
||||||
|
class; it matches the backspace character. The sequences \B, \N, \R, and \X
|
||||||
|
are not special inside a character class. Like any other unrecognized escape
|
||||||
|
sequences, they cause an error.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
The minus (hyphen) character can be used to specify a range of characters in a
|
The minus (hyphen) character can be used to specify a range of characters in a
|
||||||
character class. For example, [d-m] matches any letter between d and m,
|
character class. For example, [d-m] matches any letter between d and m,
|
||||||
inclusive. If a minus character is required in a class, it must be escaped with
|
inclusive. If a minus character is required in a class, it must be escaped with
|
||||||
a backslash or appear in a position where it cannot be interpreted as
|
a backslash or appear in a position where it cannot be interpreted as
|
||||||
indicating a range, typically as the first or last character in the class, or
|
indicating a range, typically as the first or last character in the class,
|
||||||
immediately after a range. For example, [b-d-z] matches letters in the range b
|
or immediately after a range. For example, [b-d-z] matches letters in the range
|
||||||
to d, a hyphen character, or z.
|
b to d, a hyphen character, or z.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
Perl treats a hyphen as a literal if it appears before a POSIX class (see
|
||||||
|
below) or a character type escape such as as \d, but gives a warning in its
|
||||||
|
warning mode, as this is most likely a user error. As PCRE2 has no facility for
|
||||||
|
warning, an error is given in these cases.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
It is not possible to have the literal character "]" as the end character of a
|
It is not possible to have the literal character "]" as the end character of a
|
||||||
|
@ -1344,12 +1362,6 @@ followed by two other characters. The octal or hexadecimal representation of
|
||||||
"]" can also be used to end a range.
|
"]" can also be used to end a range.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
An error is generated if a POSIX character class (see below) or an escape
|
|
||||||
sequence other than one that defines a single character appears at a point
|
|
||||||
where a range ending character is expected. For example, [z-\xff] is valid,
|
|
||||||
but [A-\d] and [A-[:digit:]] are not.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
Ranges normally include all code points between the start and end characters,
|
Ranges normally include all code points between the start and end characters,
|
||||||
inclusive. They can also be used for code points specified numerically, for
|
inclusive. They can also be used for code points specified numerically, for
|
||||||
example [\000-\037]. Ranges can include any characters that are valid for the
|
example [\000-\037]. Ranges can include any characters that are valid for the
|
||||||
|
@ -1372,19 +1384,6 @@ tables for a French locale are in use, [\xc8-\xcb] matches accented E
|
||||||
characters in both cases.
|
characters in both cases.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The character escape sequences \d, \D, \h, \H, \p, \P, \s, \S, \v,
|
|
||||||
\V, \w, and \W may appear in a character class, and add the characters that
|
|
||||||
they match to the class. For example, [\dABCDEF] matches any hexadecimal
|
|
||||||
digit. In UTF modes, the PCRE2_UCP option affects the meanings of \d, \s, \w
|
|
||||||
and their upper case partners, just as it does when they appear outside a
|
|
||||||
character class, as described in the section entitled
|
|
||||||
<a href="#genericchartypes">"Generic character types"</a>
|
|
||||||
above. The escape sequence \b has a different meaning inside a character
|
|
||||||
class; it matches the backspace character. The sequences \B, \N, \R, and \X
|
|
||||||
are not special inside a character class. Like any other unrecognized escape
|
|
||||||
sequences, they cause an error.
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
A circumflex can conveniently be used with the upper case character types to
|
A circumflex can conveniently be used with the upper case character types to
|
||||||
specify a more restricted set of characters than the matching lower case type.
|
specify a more restricted set of characters than the matching lower case type.
|
||||||
For example, the class [^\W_] matches any letter or digit, but not underscore,
|
For example, the class [^\W_] matches any letter or digit, but not underscore,
|
||||||
|
@ -1552,13 +1551,8 @@ respectively.
|
||||||
<P>
|
<P>
|
||||||
When one of these option changes occurs at top level (that is, not inside
|
When one of these option changes occurs at top level (that is, not inside
|
||||||
subpattern parentheses), the change applies to the remainder of the pattern
|
subpattern parentheses), the change applies to the remainder of the pattern
|
||||||
that follows. If the change is placed right at the start of a pattern, PCRE2
|
that follows. An option change within a subpattern (see below for a description
|
||||||
extracts it into the global options (and it will therefore show up in data
|
of subpatterns) affects only that part of the subpattern that follows it, so
|
||||||
extracted by the <b>pcre2_pattern_info()</b> function).
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
An option change within a subpattern (see below for a description of
|
|
||||||
subpatterns) affects only that part of the subpattern that follows it, so
|
|
||||||
<pre>
|
<pre>
|
||||||
(a(?i)b)c
|
(a(?i)b)c
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -2093,9 +2087,9 @@ subpattern is possible using named parentheses (see below).
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Another way of avoiding the ambiguity inherent in the use of digits following a
|
Another way of avoiding the ambiguity inherent in the use of digits following a
|
||||||
backslash is to use the \g escape sequence. This escape must be followed by an
|
backslash is to use the \g escape sequence. This escape must be followed by a
|
||||||
unsigned number or a negative number, optionally enclosed in braces. These
|
signed or unsigned number, optionally enclosed in braces. These examples are
|
||||||
examples are all identical:
|
all identical:
|
||||||
<pre>
|
<pre>
|
||||||
(ring), \1
|
(ring), \1
|
||||||
(ring), \g1
|
(ring), \g1
|
||||||
|
@ -2103,8 +2097,7 @@ examples are all identical:
|
||||||
</pre>
|
</pre>
|
||||||
An unsigned number specifies an absolute reference without the ambiguity that
|
An unsigned number specifies an absolute reference without the ambiguity that
|
||||||
is present in the older syntax. It is also useful when literal digits follow
|
is present in the older syntax. It is also useful when literal digits follow
|
||||||
the reference. A negative number is a relative reference. Consider this
|
the reference. A signed number is a relative reference. Consider this example:
|
||||||
example:
|
|
||||||
<pre>
|
<pre>
|
||||||
(abc(def)ghi)\g{-1}
|
(abc(def)ghi)\g{-1}
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -2115,6 +2108,11 @@ can be helpful in long patterns, and also in patterns that are created by
|
||||||
joining together fragments that contain references within themselves.
|
joining together fragments that contain references within themselves.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
The sequence \g{+1} is a reference to the next capturing subpattern. This kind
|
||||||
|
of forward reference can be useful it patterns that repeat. Perl does not
|
||||||
|
support the use of + in this way.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
A back reference matches whatever actually matched the capturing subpattern in
|
A back reference matches whatever actually matched the capturing subpattern in
|
||||||
the current subject string, rather than anything matching the subpattern
|
the current subject string, rather than anything matching the subpattern
|
||||||
itself (see
|
itself (see
|
||||||
|
@ -2214,6 +2212,14 @@ capturing is carried out only for positive assertions. (Perl sometimes, but not
|
||||||
always, does do capturing in negative assertions.)
|
always, does do capturing in negative assertions.)
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
WARNING: If a positive assertion containing one or more capturing subpatterns
|
||||||
|
succeeds, but failure to match later in the pattern causes backtracking over
|
||||||
|
this assertion, the captures within the assertion are reset only if no higher
|
||||||
|
numbered captures are already set. This is, unfortunately, a fundamental
|
||||||
|
limitation of the current implementation; it may get removed in a future
|
||||||
|
reworking.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
For compatibility with Perl, most assertion subpatterns may be repeated; though
|
For compatibility with Perl, most assertion subpatterns may be repeated; though
|
||||||
it makes no sense to assert the same thing several times, the side effect of
|
it makes no sense to assert the same thing several times, the side effect of
|
||||||
capturing parentheses may occasionally be useful. However, an assertion that
|
capturing parentheses may occasionally be useful. However, an assertion that
|
||||||
|
@ -2310,18 +2316,31 @@ match. If there are insufficient characters before the current position, the
|
||||||
assertion fails.
|
assertion fails.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
In a UTF mode, PCRE2 does not allow the \C escape (which matches a single code
|
In UTF-8 and UTF-16 modes, PCRE2 does not allow the \C escape (which matches a
|
||||||
unit even in a UTF mode) to appear in lookbehind assertions, because it makes
|
single code unit even in a UTF mode) to appear in lookbehind assertions,
|
||||||
it impossible to calculate the length of the lookbehind. The \X and \R
|
because it makes it impossible to calculate the length of the lookbehind. The
|
||||||
escapes, which can match different numbers of code units, are also not
|
\X and \R escapes, which can match different numbers of code units, are never
|
||||||
permitted.
|
permitted in lookbehinds.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<a href="#subpatternsassubroutines">"Subroutine"</a>
|
<a href="#subpatternsassubroutines">"Subroutine"</a>
|
||||||
calls (see below) such as (?2) or (?&X) are permitted in lookbehinds, as long
|
calls (see below) such as (?2) or (?&X) are permitted in lookbehinds, as long
|
||||||
as the subpattern matches a fixed-length string.
|
as the subpattern matches a fixed-length string. However,
|
||||||
<a href="#recursion">Recursion,</a>
|
<a href="#recursion">recursion,</a>
|
||||||
however, is not supported.
|
that is, a "subroutine" call into a group that is already active,
|
||||||
|
is not supported.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
Perl does not support back references in lookbehinds. PCRE2 does support them,
|
||||||
|
but only if certain conditions are met. The PCRE2_MATCH_UNSET_BACKREF option
|
||||||
|
must not be set, there must be no use of (?| in the pattern (it creates
|
||||||
|
duplicate subpattern numbers), and if the back reference is by name, the name
|
||||||
|
must be unique. Of course, the referenced subpattern must itself be of fixed
|
||||||
|
length. The following pattern matches words containing at least two characters
|
||||||
|
that begin and end with the same character:
|
||||||
|
<pre>
|
||||||
|
\b(\w)\w++(?<=\1)
|
||||||
|
</PRE>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Possessive quantifiers can be used in conjunction with lookbehind assertions to
|
Possessive quantifiers can be used in conjunction with lookbehind assertions to
|
||||||
|
@ -2459,7 +2478,9 @@ Checking for a used subpattern by name
|
||||||
<P>
|
<P>
|
||||||
Perl uses the syntax (?(<name>)...) or (?('name')...) to test for a used
|
Perl uses the syntax (?(<name>)...) or (?('name')...) to test for a used
|
||||||
subpattern by name. For compatibility with earlier versions of PCRE1, which had
|
subpattern by name. For compatibility with earlier versions of PCRE1, which had
|
||||||
this facility before Perl, the syntax (?(name)...) is also recognized.
|
this facility before Perl, the syntax (?(name)...) is also recognized. Note,
|
||||||
|
however, that undelimited names consisting of the letter R followed by digits
|
||||||
|
are ambiguous (see the following section).
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Rewriting the above example to use a named subpattern gives this:
|
Rewriting the above example to use a named subpattern gives this:
|
||||||
|
@ -2474,30 +2495,52 @@ matched.
|
||||||
Checking for pattern recursion
|
Checking for pattern recursion
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
If the condition is the string (R), and there is no subpattern with the name R,
|
"Recursion" in this sense refers to any subroutine-like call from one part of
|
||||||
the condition is true if a recursive call to the whole pattern or any
|
the pattern to another, whether or not it is actually recursive. See the
|
||||||
subpattern has been made. If digits or a name preceded by ampersand follow the
|
sections entitled
|
||||||
letter R, for example:
|
<a href="#recursion">"Recursive patterns"</a>
|
||||||
|
and
|
||||||
|
<a href="#subpatternsassubroutines">"Subpatterns as subroutines"</a>
|
||||||
|
below for details of recursion and subpattern calls.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
If a condition is the string (R), and there is no subpattern with the name R,
|
||||||
|
the condition is true if matching is currently in a recursion or subroutine
|
||||||
|
call to the whole pattern or any subpattern. If digits follow the letter R, and
|
||||||
|
there is no subpattern with that name, the condition is true if the most recent
|
||||||
|
call is into a subpattern with the given number, which must exist somewhere in
|
||||||
|
the overall pattern. This is a contrived example that is equivalent to a+b:
|
||||||
<pre>
|
<pre>
|
||||||
(?(R3)...) or (?(R&name)...)
|
((?(R1)a+|(?1)b))
|
||||||
</pre>
|
</pre>
|
||||||
the condition is true if the most recent recursion is into a subpattern whose
|
However, in both cases, if there is a subpattern with a matching name, the
|
||||||
number or name is given. This condition does not check the entire recursion
|
condition tests for its being set, as described in the section above, instead
|
||||||
stack. If the name used in a condition of this kind is a duplicate, the test is
|
of testing for recursion. For example, creating a group with the name R1 by
|
||||||
applied to all subpatterns of the same name, and is true if any one of them is
|
adding (?<R1>) to the above pattern completely changes its meaning.
|
||||||
the most recent recursion.
|
</P>
|
||||||
|
<P>
|
||||||
|
If a name preceded by ampersand follows the letter R, for example:
|
||||||
|
<pre>
|
||||||
|
(?(R&name)...)
|
||||||
|
</pre>
|
||||||
|
the condition is true if the most recent recursion is into a subpattern of that
|
||||||
|
name (which must exist within the pattern).
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
This condition does not check the entire recursion stack. It tests only the
|
||||||
|
current level. If the name used in a condition of this kind is a duplicate, the
|
||||||
|
test is applied to all subpatterns of the same name, and is true if any one of
|
||||||
|
them is the most recent recursion.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
At "top level", all these recursion test conditions are false.
|
At "top level", all these recursion test conditions are false.
|
||||||
<a href="#recursion">The syntax for recursive patterns</a>
|
|
||||||
is described below.
|
|
||||||
<a name="subdefine"></a></P>
|
<a name="subdefine"></a></P>
|
||||||
<br><b>
|
<br><b>
|
||||||
Defining subpatterns for use by reference only
|
Defining subpatterns for use by reference only
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
If the condition is the string (DEFINE), and there is no subpattern with the
|
If the condition is the string (DEFINE), the condition is always false, even if
|
||||||
name DEFINE, the condition is always false. In this case, there may be only one
|
there is a group with the name DEFINE. In this case, there may be only one
|
||||||
alternative in the subpattern. It is always skipped if control reaches this
|
alternative in the subpattern. It is always skipped if control reaches this
|
||||||
point in the pattern; the idea of DEFINE is that it can be used to define
|
point in the pattern; the idea of DEFINE is that it can be used to define
|
||||||
subroutines that can be referenced from elsewhere. (The use of
|
subroutines that can be referenced from elsewhere. (The use of
|
||||||
|
@ -2965,12 +3008,22 @@ depending on whether or not a name is present.
|
||||||
By default, for compatibility with Perl, a name is any sequence of characters
|
By default, for compatibility with Perl, a name is any sequence of characters
|
||||||
that does not include a closing parenthesis. The name is not processed in
|
that does not include a closing parenthesis. The name is not processed in
|
||||||
any way, and it is not possible to include a closing parenthesis in the name.
|
any way, and it is not possible to include a closing parenthesis in the name.
|
||||||
However, if the PCRE2_ALT_VERBNAMES option is set, normal backslash processing
|
This can be changed by setting the PCRE2_ALT_VERBNAMES option, but the result
|
||||||
is applied to verb names and only an unescaped closing parenthesis terminates
|
is no longer Perl-compatible.
|
||||||
the name. A closing parenthesis can be included in a name either as \) or
|
</P>
|
||||||
between \Q and \E. If the PCRE2_EXTENDED option is set, unescaped whitespace
|
<P>
|
||||||
in verb names is skipped and #-comments are recognized, exactly as in the rest
|
When PCRE2_ALT_VERBNAMES is set, backslash processing is applied to verb names
|
||||||
of the pattern.
|
and only an unescaped closing parenthesis terminates the name. However, the
|
||||||
|
only backslash items that are permitted are \Q, \E, and sequences such as
|
||||||
|
\x{100} that define character code points. Character type escapes such as \d
|
||||||
|
are faulted.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
A closing parenthesis can be included in a name either as \) or between \Q
|
||||||
|
and \E. In addition to backslash processing, if the PCRE2_EXTENDED option is
|
||||||
|
also set, unescaped whitespace in verb names is skipped, and #-comments are
|
||||||
|
recognized, exactly as in the rest of the pattern. PCRE2_EXTENDED does not
|
||||||
|
affect verb names unless PCRE2_ALT_VERBNAMES is also set.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The maximum length of a name is 255 in the 8-bit library and 65535 in the
|
The maximum length of a name is 255 in the 8-bit library and 65535 in the
|
||||||
|
@ -3393,7 +3446,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC30" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC30" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 20 June 2016
|
Last updated: 23 October 2016
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2016 University of Cambridge.
|
Copyright © 1997-2016 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -492,6 +492,9 @@ Each top-level branch of a look behind must be of a fixed length.
|
||||||
\n reference by number (can be ambiguous)
|
\n reference by number (can be ambiguous)
|
||||||
\gn reference by number
|
\gn reference by number
|
||||||
\g{n} reference by number
|
\g{n} reference by number
|
||||||
|
\g+n relative reference by number (PCRE2 extension)
|
||||||
|
\g-n relative reference by number
|
||||||
|
\g{+n} relative reference by number (PCRE2 extension)
|
||||||
\g{-n} relative reference by number
|
\g{-n} relative reference by number
|
||||||
\k<name> reference by name (Perl)
|
\k<name> reference by name (Perl)
|
||||||
\k'name' reference by name (Perl)
|
\k'name' reference by name (Perl)
|
||||||
|
@ -530,14 +533,17 @@ Each top-level branch of a look behind must be of a fixed length.
|
||||||
(?(-n) relative reference condition
|
(?(-n) relative reference condition
|
||||||
(?(<name>) named reference condition (Perl)
|
(?(<name>) named reference condition (Perl)
|
||||||
(?('name') named reference condition (Perl)
|
(?('name') named reference condition (Perl)
|
||||||
(?(name) named reference condition (PCRE2)
|
(?(name) named reference condition (PCRE2, deprecated)
|
||||||
(?(R) overall recursion condition
|
(?(R) overall recursion condition
|
||||||
(?(Rn) specific group recursion condition
|
(?(Rn) specific numbered group recursion condition
|
||||||
(?(R&name) specific recursion condition
|
(?(R&name) specific named group recursion condition
|
||||||
(?(DEFINE) define subpattern for reference
|
(?(DEFINE) define subpattern for reference
|
||||||
(?(VERSION[>]=n.m) test PCRE2 version
|
(?(VERSION[>]=n.m) test PCRE2 version
|
||||||
(?(assert) assertion condition
|
(?(assert) assertion condition
|
||||||
</PRE>
|
</pre>
|
||||||
|
Note the ambiguity of (?(R) and (?(Rn) which might be named reference
|
||||||
|
conditions or recursion tests. Such a condition is interpreted as a reference
|
||||||
|
condition if the relevant named group exists.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC23" href="#TOC1">BACKTRACKING CONTROL</a><br>
|
<br><a name="SEC23" href="#TOC1">BACKTRACKING CONTROL</a><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -589,9 +595,9 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC27" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC27" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 16 October 2015
|
Last updated: 28 September 2016
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2015 University of Cambridge.
|
Copyright © 1997-2016 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||||
|
|
|
@ -615,6 +615,7 @@ about the pattern:
|
||||||
pushcopy push a copy onto the stack
|
pushcopy push a copy onto the stack
|
||||||
stackguard=<number> test the stackguard feature
|
stackguard=<number> test the stackguard feature
|
||||||
tables=[0|1|2] select internal tables
|
tables=[0|1|2] select internal tables
|
||||||
|
use_length do not zero-terminate the pattern
|
||||||
utf8_input treat input as UTF-8
|
utf8_input treat input as UTF-8
|
||||||
</pre>
|
</pre>
|
||||||
The effects of these modifiers are described in the following sections.
|
The effects of these modifiers are described in the following sections.
|
||||||
|
@ -698,6 +699,18 @@ testing that <b>pcre2_compile()</b> behaves correctly in this case (it uses
|
||||||
default values).
|
default values).
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
|
Specifying the pattern's length
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
By default, patterns are passed to the compiling functions as zero-terminated
|
||||||
|
strings. When using the POSIX wrapper API, there is no other option. However,
|
||||||
|
when using PCRE2's native API, patterns can be passed by length instead of
|
||||||
|
being zero-terminated. The <b>use_length</b> modifier causes this to happen.
|
||||||
|
Using a length happens automatically (whether or not <b>use_length</b> is set)
|
||||||
|
when <b>hex</b> is set, because patterns specified in hexadecimal may contain
|
||||||
|
binary zeros.
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
Specifying pattern characters in hexadecimal
|
Specifying pattern characters in hexadecimal
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -720,10 +733,10 @@ the delimiter within a substring. The <b>hex</b> and <b>expand</b> modifiers are
|
||||||
mutually exclusive.
|
mutually exclusive.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
By default, <b>pcre2test</b> passes patterns as zero-terminated strings to
|
The POSIX API cannot be used with patterns specified in hexadecimal because
|
||||||
<b>pcre2_compile()</b>, giving the length as PCRE2_ZERO_TERMINATED. However, for
|
they may contain binary zeros, which conflicts with <b>regcomp()</b>'s
|
||||||
patterns specified with the <b>hex</b> modifier, the actual length of the
|
requirement for a zero-terminated string. Such patterns are always passed to
|
||||||
pattern is passed.
|
<b>pcre2_compile()</b> as a string with a length, not as zero-terminated.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
Specifying wide characters in 16-bit and 32-bit modes
|
Specifying wide characters in 16-bit and 32-bit modes
|
||||||
|
@ -1753,7 +1766,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 02 August 2016
|
Last updated: 04 November 2016
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2016 University of Cambridge.
|
Copyright © 1997-2016 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -94,6 +94,9 @@ in the library.
|
||||||
<tr><td><a href="pcre2_code_copy.html">pcre2_code_copy</a></td>
|
<tr><td><a href="pcre2_code_copy.html">pcre2_code_copy</a></td>
|
||||||
<td> Copy a compiled pattern</td></tr>
|
<td> Copy a compiled pattern</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre2_code_copy_with_tables.html">pcre2_code_copy_with_tables</a></td>
|
||||||
|
<td> Copy a compiled pattern and its character tables</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre2_code_free.html">pcre2_code_free</a></td>
|
<tr><td><a href="pcre2_code_free.html">pcre2_code_free</a></td>
|
||||||
<td> Free a compiled pattern</td></tr>
|
<td> Free a compiled pattern</td></tr>
|
||||||
|
|
||||||
|
|
1425
doc/pcre2.txt
1425
doc/pcre2.txt
File diff suppressed because it is too large
Load Diff
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2_CODE_COPY 3 "26 February 2016" "PCRE2 10.22"
|
.TH PCRE2_CODE_COPY 3 "22 November 2016" "PCRE2 10.23"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -16,8 +16,9 @@ PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
This function makes a copy of the memory used for a compiled pattern, excluding
|
This function makes a copy of the memory used for a compiled pattern, excluding
|
||||||
any memory used by the JIT compiler. Without a subsequent call to
|
any memory used by the JIT compiler. Without a subsequent call to
|
||||||
\fBpcre2_jit_compile()\fP, the copy can be used only for non-JIT matching. The
|
\fBpcre2_jit_compile()\fP, the copy can be used only for non-JIT matching. The
|
||||||
yield of the function is NULL if \fIcode\fP is NULL or if sufficient memory
|
pointer to the character tables is copied, not the tables themselves (see
|
||||||
cannot be obtained.
|
\fBpcre2_code_copy_with_tables()\fP). The yield of the function is NULL if
|
||||||
|
\fIcode\fP is NULL or if sufficient memory cannot be obtained.
|
||||||
.P
|
.P
|
||||||
There is a complete description of the PCRE2 native API in the
|
There is a complete description of the PCRE2 native API in the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
|
|
|
@ -0,0 +1,32 @@
|
||||||
|
.TH PCRE2_CODE_COPY 3 "22 November 2016" "PCRE2 10.23"
|
||||||
|
.SH NAME
|
||||||
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
|
.SH SYNOPSIS
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
.B #include <pcre2.h>
|
||||||
|
.PP
|
||||||
|
.nf
|
||||||
|
.B pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *\fIcode\fP);
|
||||||
|
.fi
|
||||||
|
.
|
||||||
|
.SH DESCRIPTION
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
This function makes a copy of the memory used for a compiled pattern, excluding
|
||||||
|
any memory used by the JIT compiler. Without a subsequent call to
|
||||||
|
\fBpcre2_jit_compile()\fP, the copy can be used only for non-JIT matching.
|
||||||
|
Unlike \fBpcre2_code_copy()\fP, a separate copy of the character tables is also
|
||||||
|
made, with the new code pointing to it. This memory will be automatically freed
|
||||||
|
when \fBpcre2_code_free()\fP is called. The yield of the function is NULL if
|
||||||
|
\fIcode\fP is NULL or if sufficient memory cannot be obtained.
|
||||||
|
.P
|
||||||
|
There is a complete description of the PCRE2 native API in the
|
||||||
|
.\" HREF
|
||||||
|
\fBpcre2api\fP
|
||||||
|
.\"
|
||||||
|
page and a description of the POSIX API in the
|
||||||
|
.\" HREF
|
||||||
|
\fBpcre2posix\fP
|
||||||
|
.\"
|
||||||
|
page.
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2API 3 "30 September 2016" "PCRE2 10.23"
|
.TH PCRE2API 3 "22 November 2016" "PCRE2 10.23"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.sp
|
.sp
|
||||||
|
@ -235,6 +235,8 @@ document for an overview of all the PCRE2 documentation.
|
||||||
.nf
|
.nf
|
||||||
.B pcre2_code *pcre2_code_copy(const pcre2_code *\fIcode\fP);
|
.B pcre2_code *pcre2_code_copy(const pcre2_code *\fIcode\fP);
|
||||||
.sp
|
.sp
|
||||||
|
.B pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *\fIcode\fP);
|
||||||
|
.sp
|
||||||
.B int pcre2_get_error_message(int \fIerrorcode\fP, PCRE2_UCHAR *\fIbuffer\fP,
|
.B int pcre2_get_error_message(int \fIerrorcode\fP, PCRE2_UCHAR *\fIbuffer\fP,
|
||||||
.B " PCRE2_SIZE \fIbufflen\fP);"
|
.B " PCRE2_SIZE \fIbufflen\fP);"
|
||||||
.sp
|
.sp
|
||||||
|
@ -509,8 +511,9 @@ If JIT is being used, but the JIT compilation is not being done immediately,
|
||||||
(perhaps waiting to see if the pattern is used often enough) similar logic is
|
(perhaps waiting to see if the pattern is used often enough) similar logic is
|
||||||
required. JIT compilation updates a pointer within the compiled code block, so
|
required. JIT compilation updates a pointer within the compiled code block, so
|
||||||
a thread must gain unique write access to the pointer before calling
|
a thread must gain unique write access to the pointer before calling
|
||||||
\fBpcre2_jit_compile()\fP. Alternatively, \fBpcre2_code_copy()\fP can be used
|
\fBpcre2_jit_compile()\fP. Alternatively, \fBpcre2_code_copy()\fP or
|
||||||
to obtain a private copy of the compiled code.
|
\fBpcre2_code_copy_with_tables()\fP can be used to obtain a private copy of the
|
||||||
|
compiled code.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SS "Context blocks"
|
.SS "Context blocks"
|
||||||
|
@ -1027,6 +1030,8 @@ zero.
|
||||||
.B void pcre2_code_free(pcre2_code *\fIcode\fP);
|
.B void pcre2_code_free(pcre2_code *\fIcode\fP);
|
||||||
.sp
|
.sp
|
||||||
.B pcre2_code *pcre2_code_copy(const pcre2_code *\fIcode\fP);
|
.B pcre2_code *pcre2_code_copy(const pcre2_code *\fIcode\fP);
|
||||||
|
.sp
|
||||||
|
.B pcre2_code *pcre2_code_copy_with_tables(const pcre2_code *\fIcode\fP);
|
||||||
.fi
|
.fi
|
||||||
.P
|
.P
|
||||||
The \fBpcre2_compile()\fP function compiles a pattern into an internal form.
|
The \fBpcre2_compile()\fP function compiles a pattern into an internal form.
|
||||||
|
@ -1049,9 +1054,24 @@ below),
|
||||||
.\"
|
.\"
|
||||||
the JIT information cannot be copied (because it is position-dependent).
|
the JIT information cannot be copied (because it is position-dependent).
|
||||||
The new copy can initially be used only for non-JIT matching, though it can be
|
The new copy can initially be used only for non-JIT matching, though it can be
|
||||||
passed to \fBpcre2_jit_compile()\fP if required. The \fBpcre2_code_copy()\fP
|
passed to \fBpcre2_jit_compile()\fP if required.
|
||||||
function provides a way for individual threads in a multithreaded application
|
.P
|
||||||
to acquire a private copy of shared compiled code.
|
The \fBpcre2_code_copy()\fP function provides a way for individual threads in a
|
||||||
|
multithreaded application to acquire a private copy of shared compiled code.
|
||||||
|
However, it does not make a copy of the character tables used by the compiled
|
||||||
|
pattern; the new pattern code points to the same tables as the original code.
|
||||||
|
(See
|
||||||
|
.\" HTML <a href="#jitcompiling">
|
||||||
|
.\" </a>
|
||||||
|
"Locale Support"
|
||||||
|
.\"
|
||||||
|
below for details of these character tables.) In many applications the same
|
||||||
|
tables are used throughout, so this behaviour is appropriate. Nevertheless,
|
||||||
|
there are occasions when a copy of a compiled pattern and the relevant tables
|
||||||
|
are needed. The \fBpcre2_code_copy_with_tables()\fP provides this facility.
|
||||||
|
Copies of both the code and the tables are made, with the new code pointing to
|
||||||
|
the new tables. The memory for the new tables is automatically freed when
|
||||||
|
\fBpcre2_code_free()\fP is called for the new copy of the compiled code.
|
||||||
.P
|
.P
|
||||||
NOTE: When one of the matching functions is called, pointers to the compiled
|
NOTE: When one of the matching functions is called, pointers to the compiled
|
||||||
pattern and the subject string are set in the match data block so that they can
|
pattern and the subject string are set in the match data block so that they can
|
||||||
|
@ -3299,6 +3319,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 30 September 2016
|
Last updated: 22 November 2016
|
||||||
Copyright (c) 1997-2016 University of Cambridge.
|
Copyright (c) 1997-2016 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -51,103 +51,115 @@ DESCRIPTION
|
||||||
boundary is controlled by the -N (--newline) option.
|
boundary is controlled by the -N (--newline) option.
|
||||||
|
|
||||||
The amount of memory used for buffering files that are being scanned is
|
The amount of memory used for buffering files that are being scanned is
|
||||||
controlled by a parameter that can be set by the --buffer-size option.
|
controlled by parameters that can be set by the --buffer-size and
|
||||||
The default value for this parameter is specified when pcre2grep is
|
--max-buffer-size options. The first of these sets the size of buffer
|
||||||
built, with the default default being 20K. A block of memory three
|
that is obtained at the start of processing. If an input file contains
|
||||||
times this size is used (to allow for buffering "before" and "after"
|
very long lines, a larger buffer may be needed; this is handled by
|
||||||
lines). An error occurs if a line overflows the buffer.
|
automatically extending the buffer, up to the limit specified by --max-
|
||||||
|
buffer-size. The default values for these parameters are specified when
|
||||||
|
pcre2grep is built, with the default defaults being 20K and 1M respec-
|
||||||
|
tively. An error occurs if a line is too long and the buffer can no
|
||||||
|
longer be expanded.
|
||||||
|
|
||||||
Patterns can be no longer than 8K or BUFSIZ bytes, whichever is the
|
The block of memory that is actually used is three times the "buffer
|
||||||
greater. BUFSIZ is defined in <stdio.h>. When there is more than one
|
size", to allow for buffering "before" and "after" lines. If the buffer
|
||||||
|
size is too small, fewer than requested "before" and "after" lines may
|
||||||
|
be output.
|
||||||
|
|
||||||
|
Patterns can be no longer than 8K or BUFSIZ bytes, whichever is the
|
||||||
|
greater. BUFSIZ is defined in <stdio.h>. When there is more than one
|
||||||
pattern (specified by the use of -e and/or -f), each pattern is applied
|
pattern (specified by the use of -e and/or -f), each pattern is applied
|
||||||
to each line in the order in which they are defined, except that all
|
to each line in the order in which they are defined, except that all
|
||||||
the -e patterns are tried before the -f patterns.
|
the -e patterns are tried before the -f patterns.
|
||||||
|
|
||||||
By default, as soon as one pattern matches a line, no further patterns
|
By default, as soon as one pattern matches a line, no further patterns
|
||||||
are considered. However, if --colour (or --color) is used to colour the
|
are considered. However, if --colour (or --color) is used to colour the
|
||||||
matching substrings, or if --only-matching, --file-offsets, or --line-
|
matching substrings, or if --only-matching, --file-offsets, or --line-
|
||||||
offsets is used to output only the part of the line that matched
|
offsets is used to output only the part of the line that matched
|
||||||
(either shown literally, or as an offset), scanning resumes immediately
|
(either shown literally, or as an offset), scanning resumes immediately
|
||||||
following the match, so that further matches on the same line can be
|
following the match, so that further matches on the same line can be
|
||||||
found. If there are multiple patterns, they are all tried on the
|
found. If there are multiple patterns, they are all tried on the
|
||||||
remainder of the line, but patterns that follow the one that matched
|
remainder of the line, but patterns that follow the one that matched
|
||||||
are not tried on the earlier part of the line.
|
are not tried on the earlier part of the line.
|
||||||
|
|
||||||
This behaviour means that the order in which multiple patterns are
|
This behaviour means that the order in which multiple patterns are
|
||||||
specified can affect the output when one of the above options is used.
|
specified can affect the output when one of the above options is used.
|
||||||
This is no longer the same behaviour as GNU grep, which now manages to
|
This is no longer the same behaviour as GNU grep, which now manages to
|
||||||
display earlier matches for later patterns (as long as there is no
|
display earlier matches for later patterns (as long as there is no
|
||||||
overlap).
|
overlap).
|
||||||
|
|
||||||
Patterns that can match an empty string are accepted, but empty string
|
Patterns that can match an empty string are accepted, but empty string
|
||||||
matches are never recognized. An example is the pattern
|
matches are never recognized. An example is the pattern
|
||||||
"(super)?(man)?", in which all components are optional. This pattern
|
"(super)?(man)?", in which all components are optional. This pattern
|
||||||
finds all occurrences of both "super" and "man"; the output differs
|
finds all occurrences of both "super" and "man"; the output differs
|
||||||
from matching with "super|man" when only the matching substrings are
|
from matching with "super|man" when only the matching substrings are
|
||||||
being shown.
|
being shown.
|
||||||
|
|
||||||
If the LC_ALL or LC_CTYPE environment variable is set, pcre2grep uses
|
If the LC_ALL or LC_CTYPE environment variable is set, pcre2grep uses
|
||||||
the value to set a locale when calling the PCRE2 library. The --locale
|
the value to set a locale when calling the PCRE2 library. The --locale
|
||||||
option can be used to override this.
|
option can be used to override this.
|
||||||
|
|
||||||
|
|
||||||
SUPPORT FOR COMPRESSED FILES
|
SUPPORT FOR COMPRESSED FILES
|
||||||
|
|
||||||
It is possible to compile pcre2grep so that it uses libz or libbz2 to
|
It is possible to compile pcre2grep so that it uses libz or libbz2 to
|
||||||
read files whose names end in .gz or .bz2, respectively. You can find
|
read files whose names end in .gz or .bz2, respectively. You can find
|
||||||
out whether your binary has support for one or both of these file types
|
out whether your binary has support for one or both of these file types
|
||||||
by running it with the --help option. If the appropriate support is not
|
by running it with the --help option. If the appropriate support is not
|
||||||
present, files are treated as plain text. The standard input is always
|
present, files are treated as plain text. The standard input is always
|
||||||
so treated.
|
so treated.
|
||||||
|
|
||||||
|
|
||||||
BINARY FILES
|
BINARY FILES
|
||||||
|
|
||||||
By default, a file that contains a binary zero byte within the first
|
By default, a file that contains a binary zero byte within the first
|
||||||
1024 bytes is identified as a binary file, and is processed specially.
|
1024 bytes is identified as a binary file, and is processed specially.
|
||||||
(GNU grep also identifies binary files in this manner.) See the
|
(GNU grep also identifies binary files in this manner.) See the
|
||||||
--binary-files option for a means of changing the way binary files are
|
--binary-files option for a means of changing the way binary files are
|
||||||
handled.
|
handled.
|
||||||
|
|
||||||
|
|
||||||
OPTIONS
|
OPTIONS
|
||||||
|
|
||||||
The order in which some of the options appear can affect the output.
|
The order in which some of the options appear can affect the output.
|
||||||
For example, both the -h and -l options affect the printing of file
|
For example, both the -h and -l options affect the printing of file
|
||||||
names. Whichever comes later in the command line will be the one that
|
names. Whichever comes later in the command line will be the one that
|
||||||
takes effect. Similarly, except where noted below, if an option is
|
takes effect. Similarly, except where noted below, if an option is
|
||||||
given twice, the later setting is used. Numerical values for options
|
given twice, the later setting is used. Numerical values for options
|
||||||
may be followed by K or M, to signify multiplication by 1024 or
|
may be followed by K or M, to signify multiplication by 1024 or
|
||||||
1024*1024 respectively.
|
1024*1024 respectively.
|
||||||
|
|
||||||
-- This terminates the list of options. It is useful if the next
|
-- This terminates the list of options. It is useful if the next
|
||||||
item on the command line starts with a hyphen but is not an
|
item on the command line starts with a hyphen but is not an
|
||||||
option. This allows for the processing of patterns and file
|
option. This allows for the processing of patterns and file
|
||||||
names that start with hyphens.
|
names that start with hyphens.
|
||||||
|
|
||||||
-A number, --after-context=number
|
-A number, --after-context=number
|
||||||
Output number lines of context after each matching line. If
|
Output up to number lines of context after each matching
|
||||||
file names and/or line numbers are being output, a hyphen
|
line. Fewer lines are output if the next match or the end of
|
||||||
separator is used instead of a colon for the context lines. A
|
the file is reached, or if the processing buffer size has
|
||||||
line containing "--" is output between each group of lines,
|
been set too small. If file names and/or line numbers are
|
||||||
unless they are in fact contiguous in the input file. The
|
being output, a hyphen separator is used instead of a colon
|
||||||
value of number is expected to be relatively small. However,
|
for the context lines. A line containing "--" is output
|
||||||
pcre2grep guarantees to have up to 8K of following text
|
between each group of lines, unless they are in fact contigu-
|
||||||
available for context output.
|
ous in the input file. The value of number is expected to be
|
||||||
|
relatively small. When -c is used, -A is ignored.
|
||||||
|
|
||||||
-a, --text
|
-a, --text
|
||||||
Treat binary files as text. This is equivalent to --binary-
|
Treat binary files as text. This is equivalent to --binary-
|
||||||
files=text.
|
files=text.
|
||||||
|
|
||||||
-B number, --before-context=number
|
-B number, --before-context=number
|
||||||
Output number lines of context before each matching line. If
|
Output up to number lines of context before each matching
|
||||||
file names and/or line numbers are being output, a hyphen
|
line. Fewer lines are output if the previous match or the
|
||||||
separator is used instead of a colon for the context lines. A
|
start of the file is within number lines, or if the process-
|
||||||
line containing "--" is output between each group of lines,
|
ing buffer size has been set too small. If file names and/or
|
||||||
unless they are in fact contiguous in the input file. The
|
line numbers are being output, a hyphen separator is used
|
||||||
value of number is expected to be relatively small. However,
|
instead of a colon for the context lines. A line containing
|
||||||
pcre2grep guarantees to have up to 8K of preceding text
|
"--" is output between each group of lines, unless they are
|
||||||
available for context output.
|
in fact contiguous in the input file. The value of number is
|
||||||
|
expected to be relatively small. When -c is used, -B is
|
||||||
|
ignored.
|
||||||
|
|
||||||
--binary-files=word
|
--binary-files=word
|
||||||
Specify how binary files are to be processed. If the word is
|
Specify how binary files are to be processed. If the word is
|
||||||
|
@ -164,54 +176,58 @@ OPTIONS
|
||||||
any output or affecting the return code.
|
any output or affecting the return code.
|
||||||
|
|
||||||
--buffer-size=number
|
--buffer-size=number
|
||||||
Set the parameter that controls how much memory is used for
|
Set the parameter that controls how much memory is obtained
|
||||||
buffering files that are being scanned.
|
at the start of processing for buffering files that are being
|
||||||
|
scanned. See also --max-buffer-size below.
|
||||||
|
|
||||||
-C number, --context=number
|
-C number, --context=number
|
||||||
Output number lines of context both before and after each
|
Output number lines of context both before and after each
|
||||||
matching line. This is equivalent to setting both -A and -B
|
matching line. This is equivalent to setting both -A and -B
|
||||||
to the same value.
|
to the same value.
|
||||||
|
|
||||||
-c, --count
|
-c, --count
|
||||||
Do not output lines from the files that are being scanned;
|
Do not output lines from the files that are being scanned;
|
||||||
instead output the number of matches (or non-matches if -v is
|
instead output the number of lines that would have been
|
||||||
used) that would otherwise have caused lines to be shown. By
|
shown, either because they matched, or, if -v is set, because
|
||||||
default, this count is the same as the number of suppressed
|
they failed to match. By default, this count is exactly the
|
||||||
lines, but if the -M (multiline) option is used (without -v),
|
same as the number of lines that would have been output, but
|
||||||
there may be more suppressed lines than the number of
|
if the -M (multiline) option is used (without -v), there may
|
||||||
matches.
|
be more suppressed lines than the count (that is, the number
|
||||||
|
of matches).
|
||||||
|
|
||||||
If no lines are selected, the number zero is output. If sev-
|
If no lines are selected, the number zero is output. If sev-
|
||||||
eral files are are being scanned, a count is output for each
|
eral files are are being scanned, a count is output for each
|
||||||
of them. However, if the --files-with-matches option is also
|
of them and the -t option can be used to cause a total to be
|
||||||
used, only those files whose counts are greater than zero are
|
output at the end. However, if the --files-with-matches
|
||||||
listed. When -c is used, the -A, -B, and -C options are
|
option is also used, only those files whose counts are
|
||||||
ignored.
|
greater than zero are listed. When -c is used, the -A, -B,
|
||||||
|
and -C options are ignored.
|
||||||
|
|
||||||
--colour, --color
|
--colour, --color
|
||||||
If this option is given without any data, it is equivalent to
|
If this option is given without any data, it is equivalent to
|
||||||
"--colour=auto". If data is required, it must be given in
|
"--colour=auto". If data is required, it must be given in
|
||||||
the same shell item, separated by an equals sign.
|
the same shell item, separated by an equals sign.
|
||||||
|
|
||||||
--colour=value, --color=value
|
--colour=value, --color=value
|
||||||
This option specifies under what circumstances the parts of a
|
This option specifies under what circumstances the parts of a
|
||||||
line that matched a pattern should be coloured in the output.
|
line that matched a pattern should be coloured in the output.
|
||||||
By default, the output is not coloured. The value (which is
|
By default, the output is not coloured. The value (which is
|
||||||
optional, see above) may be "never", "always", or "auto". In
|
optional, see above) may be "never", "always", or "auto". In
|
||||||
the latter case, colouring happens only if the standard out-
|
the latter case, colouring happens only if the standard out-
|
||||||
put is connected to a terminal. More resources are used when
|
put is connected to a terminal. More resources are used when
|
||||||
colouring is enabled, because pcre2grep has to search for all
|
colouring is enabled, because pcre2grep has to search for all
|
||||||
possible matches in a line, not just one, in order to colour
|
possible matches in a line, not just one, in order to colour
|
||||||
them all.
|
them all.
|
||||||
|
|
||||||
The colour that is used can be specified by setting the envi-
|
The colour that is used can be specified by setting the envi-
|
||||||
ronment variable PCRE2GREP_COLOUR or PCRE2GREP_COLOR. The
|
ronment variable PCRE2GREP_COLOUR or PCRE2GREP_COLOR. If nei-
|
||||||
value of this variable should be a string of two numbers,
|
ther of these are set, pcre2grep looks for GREP_COLOUR or
|
||||||
separated by a semicolon. They are copied directly into the
|
GREP_COLOR. The value of the variable should be a string of
|
||||||
control string for setting colour on a terminal, so it is
|
two numbers, separated by a semicolon. They are copied
|
||||||
your responsibility to ensure that they make sense. If nei-
|
directly into the control string for setting colour on a ter-
|
||||||
ther of the environment variables is set, the default is
|
minal, so it is your responsibility to ensure that they make
|
||||||
"1;31", which gives red.
|
sense. If neither of the environment variables is set, the
|
||||||
|
default is "1;31", which gives red.
|
||||||
|
|
||||||
-D action, --devices=action
|
-D action, --devices=action
|
||||||
If an input path is not a regular file or a directory,
|
If an input path is not a regular file or a directory,
|
||||||
|
@ -299,12 +315,12 @@ OPTIONS
|
||||||
Read patterns from the file, one per line, and match them
|
Read patterns from the file, one per line, and match them
|
||||||
against each line of input. What constitutes a newline when
|
against each line of input. What constitutes a newline when
|
||||||
reading the file is the operating system's default. The
|
reading the file is the operating system's default. The
|
||||||
--newline option has no effect on this option. Trailing white
|
--newline option has no effect on this option. Trailing
|
||||||
space is removed from each line, and blank lines are ignored.
|
white space is removed from each line, and blank lines are
|
||||||
An empty file contains no patterns and therefore matches
|
ignored. An empty file contains no patterns and therefore
|
||||||
nothing. See also the comments about multiple patterns versus
|
matches nothing. See also the comments about multiple pat-
|
||||||
a single pattern with alternatives in the description of -e
|
terns versus a single pattern with alternatives in the
|
||||||
above.
|
description of -e above.
|
||||||
|
|
||||||
If this option is given more than once, all the specified
|
If this option is given more than once, all the specified
|
||||||
files are read. A data line is output if any of the patterns
|
files are read. A data line is output if any of the patterns
|
||||||
|
@ -482,102 +498,101 @@ OPTIONS
|
||||||
tings are specified when the PCRE2 library is compiled, with
|
tings are specified when the PCRE2 library is compiled, with
|
||||||
the default default being 10 million.
|
the default default being 10 million.
|
||||||
|
|
||||||
|
--max-buffer-size=number
|
||||||
|
This limits the expansion of the processing buffer, whose
|
||||||
|
initial size can be set by --buffer-size. The maximum buffer
|
||||||
|
size is silently forced to be no smaller than the starting
|
||||||
|
buffer size.
|
||||||
|
|
||||||
-M, --multiline
|
-M, --multiline
|
||||||
Allow patterns to match more than one line. When this option
|
Allow patterns to match more than one line. When this option
|
||||||
is given, patterns may usefully contain literal newline char-
|
is set, the PCRE2 library is called in "multiline" mode. This
|
||||||
acters and internal occurrences of ^ and $ characters. The
|
allows a matched string to extend past the end of a line and
|
||||||
output for a successful match may consist of more than one
|
continue on one or more subsequent lines. Patterns used with
|
||||||
line. The first is the line in which the match started, and
|
-M may usefully contain literal newline characters and inter-
|
||||||
the last is the line in which the match ended. If the matched
|
nal occurrences of ^ and $ characters. The output for a suc-
|
||||||
string ends with a newline sequence the output ends at the
|
cessful match may consist of more than one line. The first
|
||||||
end of that line.
|
line is the line in which the match started, and the last
|
||||||
|
line is the line in which the match ended. If the matched
|
||||||
|
string ends with a newline sequence, the output ends at the
|
||||||
|
end of that line. If -v is set, none of the lines in a
|
||||||
|
multi-line match are output. Once a match has been handled,
|
||||||
|
scanning restarts at the beginning of the line after the one
|
||||||
|
in which the match ended.
|
||||||
|
|
||||||
When this option is set, the PCRE2 library is called in "mul-
|
The newline sequence that separates multiple lines must be
|
||||||
tiline" mode. This allows a matched string to extend past the
|
matched as part of the pattern. For example, to find the
|
||||||
end of a line and continue on one or more subsequent lines.
|
phrase "regular expression" in a file where "regular" might
|
||||||
However, pcre2grep still processes the input line by line.
|
be at the end of a line and "expression" at the start of the
|
||||||
Once a match has been handled, scanning restarts at the
|
|
||||||
beginning of the next line, just as it does when -M is not
|
|
||||||
present. This means that it is possible for the second or
|
|
||||||
subsequent lines in a multiline match to be output again as
|
|
||||||
part of another match.
|
|
||||||
|
|
||||||
The newline sequence that separates multiple lines must be
|
|
||||||
matched as part of the pattern. For example, to find the
|
|
||||||
phrase "regular expression" in a file where "regular" might
|
|
||||||
be at the end of a line and "expression" at the start of the
|
|
||||||
next line, you could use this command:
|
next line, you could use this command:
|
||||||
|
|
||||||
pcre2grep -M 'regular\s+expression' <file>
|
pcre2grep -M 'regular\s+expression' <file>
|
||||||
|
|
||||||
The \s escape sequence matches any white space character,
|
The \s escape sequence matches any white space character,
|
||||||
including newlines, and is followed by + so as to match
|
including newlines, and is followed by + so as to match
|
||||||
trailing white space on the first line as well as possibly
|
trailing white space on the first line as well as possibly
|
||||||
handling a two-character newline sequence.
|
handling a two-character newline sequence.
|
||||||
|
|
||||||
There is a limit to the number of lines that can be matched,
|
There is a limit to the number of lines that can be matched,
|
||||||
imposed by the way that pcre2grep buffers the input file as
|
imposed by the way that pcre2grep buffers the input file as
|
||||||
it scans it. However, pcre2grep ensures that at least 8K
|
it scans it. With a sufficiently large processing buffer,
|
||||||
characters or the rest of the file (whichever is the shorter)
|
this should not be a problem, but the -M option does not work
|
||||||
are available for forward matching, and similarly the previ-
|
when input is read line by line (see --line-buffered.)
|
||||||
ous 8K characters (or all the previous characters, if fewer
|
|
||||||
than 8K) are guaranteed to be available for lookbehind asser-
|
|
||||||
tions. The -M option does not work when input is read line by
|
|
||||||
line (see --line-buffered.)
|
|
||||||
|
|
||||||
-N newline-type, --newline=newline-type
|
-N newline-type, --newline=newline-type
|
||||||
The PCRE2 library supports five different conventions for
|
The PCRE2 library supports five different conventions for
|
||||||
indicating the ends of lines. They are the single-character
|
indicating the ends of lines. They are the single-character
|
||||||
sequences CR (carriage return) and LF (linefeed), the two-
|
sequences CR (carriage return) and LF (linefeed), the two-
|
||||||
character sequence CRLF, an "anycrlf" convention, which rec-
|
character sequence CRLF, an "anycrlf" convention, which rec-
|
||||||
ognizes any of the preceding three types, and an "any" con-
|
ognizes any of the preceding three types, and an "any" con-
|
||||||
vention, in which any Unicode line ending sequence is assumed
|
vention, in which any Unicode line ending sequence is assumed
|
||||||
to end a line. The Unicode sequences are the three just men-
|
to end a line. The Unicode sequences are the three just men-
|
||||||
tioned, plus VT (vertical tab, U+000B), FF (form feed,
|
tioned, plus VT (vertical tab, U+000B), FF (form feed,
|
||||||
U+000C), NEL (next line, U+0085), LS (line separator,
|
U+000C), NEL (next line, U+0085), LS (line separator,
|
||||||
U+2028), and PS (paragraph separator, U+2029).
|
U+2028), and PS (paragraph separator, U+2029).
|
||||||
|
|
||||||
When the PCRE2 library is built, a default line-ending
|
When the PCRE2 library is built, a default line-ending
|
||||||
sequence is specified. This is normally the standard
|
sequence is specified. This is normally the standard
|
||||||
sequence for the operating system. Unless otherwise specified
|
sequence for the operating system. Unless otherwise specified
|
||||||
by this option, pcre2grep uses the library's default. The
|
by this option, pcre2grep uses the library's default. The
|
||||||
possible values for this option are CR, LF, CRLF, ANYCRLF, or
|
possible values for this option are CR, LF, CRLF, ANYCRLF, or
|
||||||
ANY. This makes it possible to use pcre2grep to scan files
|
ANY. This makes it possible to use pcre2grep to scan files
|
||||||
that have come from other environments without having to mod-
|
that have come from other environments without having to mod-
|
||||||
ify their line endings. If the data that is being scanned
|
ify their line endings. If the data that is being scanned
|
||||||
does not agree with the convention set by this option,
|
does not agree with the convention set by this option,
|
||||||
pcre2grep may behave in strange ways. Note that this option
|
pcre2grep may behave in strange ways. Note that this option
|
||||||
does not apply to files specified by the -f, --exclude-from,
|
does not apply to files specified by the -f, --exclude-from,
|
||||||
or --include-from options, which are expected to use the
|
or --include-from options, which are expected to use the
|
||||||
operating system's standard newline sequence.
|
operating system's standard newline sequence.
|
||||||
|
|
||||||
-n, --line-number
|
-n, --line-number
|
||||||
Precede each output line by its line number in the file, fol-
|
Precede each output line by its line number in the file, fol-
|
||||||
lowed by a colon for matching lines or a hyphen for context
|
lowed by a colon for matching lines or a hyphen for context
|
||||||
lines. If the file name is also being output, it precedes the
|
lines. If the file name is also being output, it precedes the
|
||||||
line number. When the -M option causes a pattern to match
|
line number. When the -M option causes a pattern to match
|
||||||
more than one line, only the first is preceded by its line
|
more than one line, only the first is preceded by its line
|
||||||
number. This option is forced if --line-offsets is used.
|
number. This option is forced if --line-offsets is used.
|
||||||
|
|
||||||
--no-jit If the PCRE2 library is built with support for just-in-time
|
--no-jit If the PCRE2 library is built with support for just-in-time
|
||||||
compiling (which speeds up matching), pcre2grep automatically
|
compiling (which speeds up matching), pcre2grep automatically
|
||||||
makes use of this, unless it was explicitly disabled at build
|
makes use of this, unless it was explicitly disabled at build
|
||||||
time. This option can be used to disable the use of JIT at
|
time. This option can be used to disable the use of JIT at
|
||||||
run time. It is provided for testing and working round prob-
|
run time. It is provided for testing and working round prob-
|
||||||
lems. It should never be needed in normal use.
|
lems. It should never be needed in normal use.
|
||||||
|
|
||||||
-o, --only-matching
|
-o, --only-matching
|
||||||
Show only the part of the line that matched a pattern instead
|
Show only the part of the line that matched a pattern instead
|
||||||
of the whole line. In this mode, no context is shown. That
|
of the whole line. In this mode, no context is shown. That
|
||||||
is, the -A, -B, and -C options are ignored. If there is more
|
is, the -A, -B, and -C options are ignored. If there is more
|
||||||
than one match in a line, each of them is shown separately.
|
than one match in a line, each of them is shown separately,
|
||||||
If -o is combined with -v (invert the sense of the match to
|
on a separate line of output. If -o is combined with -v
|
||||||
find non-matching lines), no output is generated, but the
|
(invert the sense of the match to find non-matching lines),
|
||||||
return code is set appropriately. If the matched portion of
|
no output is generated, but the return code is set appropri-
|
||||||
the line is empty, nothing is output unless the file name or
|
ately. If the matched portion of the line is empty, nothing
|
||||||
line number are being printed, in which case they are shown
|
is output unless the file name or line number are being
|
||||||
on an otherwise empty line. This option is mutually exclusive
|
printed, in which case they are shown on an otherwise empty
|
||||||
with --file-offsets and --line-offsets.
|
line. This option is mutually exclusive with --file-offsets
|
||||||
|
and --line-offsets.
|
||||||
|
|
||||||
-onumber, --only-matching=number
|
-onumber, --only-matching=number
|
||||||
Show only the part of the line that matched the capturing
|
Show only the part of the line that matched the capturing
|
||||||
|
@ -593,65 +608,80 @@ OPTIONS
|
||||||
put.
|
put.
|
||||||
|
|
||||||
If this option is given multiple times, multiple substrings
|
If this option is given multiple times, multiple substrings
|
||||||
are output, in the order the options are given. For example,
|
are output for each match, in the order the options are
|
||||||
-o3 -o1 -o3 causes the substrings matched by capturing paren-
|
given, and all on one line. For example, -o3 -o1 -o3 causes
|
||||||
theses 3 and 1 and then 3 again to be output. By default,
|
the substrings matched by capturing parentheses 3 and 1 and
|
||||||
there is no separator (but see the next option).
|
then 3 again to be output. By default, there is no separator
|
||||||
|
(but see the next option).
|
||||||
|
|
||||||
--om-separator=text
|
--om-separator=text
|
||||||
Specify a separating string for multiple occurrences of -o.
|
Specify a separating string for multiple occurrences of -o.
|
||||||
The default is an empty string. Separating strings are never
|
The default is an empty string. Separating strings are never
|
||||||
coloured.
|
coloured.
|
||||||
|
|
||||||
-q, --quiet
|
-q, --quiet
|
||||||
Work quietly, that is, display nothing except error messages.
|
Work quietly, that is, display nothing except error messages.
|
||||||
The exit status indicates whether or not any matches were
|
The exit status indicates whether or not any matches were
|
||||||
found.
|
found.
|
||||||
|
|
||||||
-r, --recursive
|
-r, --recursive
|
||||||
If any given path is a directory, recursively scan the files
|
If any given path is a directory, recursively scan the files
|
||||||
it contains, taking note of any --include and --exclude set-
|
it contains, taking note of any --include and --exclude set-
|
||||||
tings. By default, a directory is read as a normal file; in
|
tings. By default, a directory is read as a normal file; in
|
||||||
some operating systems this gives an immediate end-of-file.
|
some operating systems this gives an immediate end-of-file.
|
||||||
This option is a shorthand for setting the -d option to
|
This option is a shorthand for setting the -d option to
|
||||||
"recurse".
|
"recurse".
|
||||||
|
|
||||||
--recursion-limit=number
|
--recursion-limit=number
|
||||||
See --match-limit above.
|
See --match-limit above.
|
||||||
|
|
||||||
-s, --no-messages
|
-s, --no-messages
|
||||||
Suppress error messages about non-existent or unreadable
|
Suppress error messages about non-existent or unreadable
|
||||||
files. Such files are quietly skipped. However, the return
|
files. Such files are quietly skipped. However, the return
|
||||||
code is still 2, even if matches were found in other files.
|
code is still 2, even if matches were found in other files.
|
||||||
|
|
||||||
|
-t, --total-count
|
||||||
|
This option is useful when scanning more than one file. If
|
||||||
|
used on its own, -t suppresses all output except for a grand
|
||||||
|
total number of matching lines (or non-matching lines if -v
|
||||||
|
is used) in all the files. If -t is used with -c, a grand
|
||||||
|
total is output except when the previous output is just one
|
||||||
|
line. In other words, it is not output when just one file's
|
||||||
|
count is listed. If file names are being output, the grand
|
||||||
|
total is preceded by "TOTAL:". Otherwise, it appears as just
|
||||||
|
another number. The -t option is ignored when used with -L
|
||||||
|
(list files without matches), because the grand total would
|
||||||
|
always be zero.
|
||||||
|
|
||||||
-u, --utf-8
|
-u, --utf-8
|
||||||
Operate in UTF-8 mode. This option is available only if PCRE2
|
Operate in UTF-8 mode. This option is available only if PCRE2
|
||||||
has been compiled with UTF-8 support. All patterns (including
|
has been compiled with UTF-8 support. All patterns (including
|
||||||
those for any --exclude and --include options) and all sub-
|
those for any --exclude and --include options) and all sub-
|
||||||
ject lines that are scanned must be valid strings of UTF-8
|
ject lines that are scanned must be valid strings of UTF-8
|
||||||
characters.
|
characters.
|
||||||
|
|
||||||
-V, --version
|
-V, --version
|
||||||
Write the version numbers of pcre2grep and the PCRE2 library
|
Write the version numbers of pcre2grep and the PCRE2 library
|
||||||
to the standard output and then exit. Anything else on the
|
to the standard output and then exit. Anything else on the
|
||||||
command line is ignored.
|
command line is ignored.
|
||||||
|
|
||||||
-v, --invert-match
|
-v, --invert-match
|
||||||
Invert the sense of the match, so that lines which do not
|
Invert the sense of the match, so that lines which do not
|
||||||
match any of the patterns are the ones that are found.
|
match any of the patterns are the ones that are found.
|
||||||
|
|
||||||
-w, --word-regex, --word-regexp
|
-w, --word-regex, --word-regexp
|
||||||
Force the patterns to match only whole words. This is equiva-
|
Force the patterns to match only whole words. This is equiva-
|
||||||
lent to having \b at the start and end of the pattern. This
|
lent to having \b at the start and end of the pattern. This
|
||||||
option applies only to the patterns that are matched against
|
option applies only to the patterns that are matched against
|
||||||
the contents of files; it does not apply to patterns speci-
|
the contents of files; it does not apply to patterns speci-
|
||||||
fied by any of the --include or --exclude options.
|
fied by any of the --include or --exclude options.
|
||||||
|
|
||||||
-x, --line-regex, --line-regexp
|
-x, --line-regex, --line-regexp
|
||||||
Force the patterns to be anchored (each must start matching
|
Force the patterns to be anchored (each must start matching
|
||||||
at the beginning of a line) and in addition, require them to
|
at the beginning of a line) and in addition, require them to
|
||||||
match entire lines. This is equivalent to having ^ and $
|
match entire lines. In multiline mode the match may be more
|
||||||
characters at the start and end of each alternative top-level
|
than one line. This is equivalent to having \A and \Z charac-
|
||||||
|
ters at the start and end of each alternative top-level
|
||||||
branch in every pattern. This option applies only to the pat-
|
branch in every pattern. This option applies only to the pat-
|
||||||
terns that are matched against the contents of files; it does
|
terns that are matched against the contents of files; it does
|
||||||
not apply to patterns specified by any of the --include or
|
not apply to patterns specified by any of the --include or
|
||||||
|
@ -822,5 +852,5 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 19 June 2016
|
Last updated: 31 October 2016
|
||||||
Copyright (c) 1997-2016 University of Cambridge.
|
Copyright (c) 1997-2016 University of Cambridge.
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -465,7 +465,9 @@ PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
|
||||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||||
pcre2_code_free(pcre2_code *); \
|
pcre2_code_free(pcre2_code *); \
|
||||||
PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
|
PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
|
||||||
*pcre2_code_copy(const pcre2_code *);
|
*pcre2_code_copy(const pcre2_code *); \
|
||||||
|
PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
|
||||||
|
*pcre2_code_copy_with_tables(const pcre2_code *);
|
||||||
|
|
||||||
|
|
||||||
/* Functions that give information about a compiled pattern. */
|
/* Functions that give information about a compiled pattern. */
|
||||||
|
@ -629,6 +631,7 @@ pcre2_compile are called by application code. */
|
||||||
|
|
||||||
#define pcre2_callout_enumerate PCRE2_SUFFIX(pcre2_callout_enumerate_)
|
#define pcre2_callout_enumerate PCRE2_SUFFIX(pcre2_callout_enumerate_)
|
||||||
#define pcre2_code_copy PCRE2_SUFFIX(pcre2_code_copy_)
|
#define pcre2_code_copy PCRE2_SUFFIX(pcre2_code_copy_)
|
||||||
|
#define pcre2_code_copy_with_tables PCRE2_SUFFIX(pcre2_code_copy_with_tables_)
|
||||||
#define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_)
|
#define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_)
|
||||||
#define pcre2_compile PCRE2_SUFFIX(pcre2_compile_)
|
#define pcre2_compile PCRE2_SUFFIX(pcre2_compile_)
|
||||||
#define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_)
|
#define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_)
|
||||||
|
|
|
@ -465,7 +465,9 @@ PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
|
||||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||||
pcre2_code_free(pcre2_code *); \
|
pcre2_code_free(pcre2_code *); \
|
||||||
PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
|
PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
|
||||||
*pcre2_code_copy(const pcre2_code *);
|
*pcre2_code_copy(const pcre2_code *); \
|
||||||
|
PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
|
||||||
|
*pcre2_code_copy_with_tables(const pcre2_code *);
|
||||||
|
|
||||||
|
|
||||||
/* Functions that give information about a compiled pattern. */
|
/* Functions that give information about a compiled pattern. */
|
||||||
|
@ -629,6 +631,7 @@ pcre2_compile are called by application code. */
|
||||||
|
|
||||||
#define pcre2_callout_enumerate PCRE2_SUFFIX(pcre2_callout_enumerate_)
|
#define pcre2_callout_enumerate PCRE2_SUFFIX(pcre2_callout_enumerate_)
|
||||||
#define pcre2_code_copy PCRE2_SUFFIX(pcre2_code_copy_)
|
#define pcre2_code_copy PCRE2_SUFFIX(pcre2_code_copy_)
|
||||||
|
#define pcre2_code_copy_with_tables PCRE2_SUFFIX(pcre2_code_copy_with_tables_)
|
||||||
#define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_)
|
#define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_)
|
||||||
#define pcre2_compile PCRE2_SUFFIX(pcre2_compile_)
|
#define pcre2_compile PCRE2_SUFFIX(pcre2_compile_)
|
||||||
#define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_)
|
#define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_)
|
||||||
|
|
|
@ -1036,7 +1036,46 @@ if ((code->flags & PCRE2_DEREF_TABLES) != 0)
|
||||||
ref_count = (PCRE2_SIZE *)(code->tables + tables_length);
|
ref_count = (PCRE2_SIZE *)(code->tables + tables_length);
|
||||||
(*ref_count)++;
|
(*ref_count)++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return newcode;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Copy compiled code and character tables *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* Compiled JIT code cannot be copied, so the new compiled block has no
|
||||||
|
associated JIT data. This version of code_copy also makes a separate copy of
|
||||||
|
the character tables. */
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN pcre2_code * PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_code_copy_with_tables(const pcre2_code *code)
|
||||||
|
{
|
||||||
|
PCRE2_SIZE* ref_count;
|
||||||
|
pcre2_code *newcode;
|
||||||
|
uint8_t *newtables;
|
||||||
|
|
||||||
|
if (code == NULL) return NULL;
|
||||||
|
newcode = code->memctl.malloc(code->blocksize, code->memctl.memory_data);
|
||||||
|
if (newcode == NULL) return NULL;
|
||||||
|
memcpy(newcode, code, code->blocksize);
|
||||||
|
newcode->executable_jit = NULL;
|
||||||
|
|
||||||
|
newtables = code->memctl.malloc(tables_length + sizeof(PCRE2_SIZE),
|
||||||
|
code->memctl.memory_data);
|
||||||
|
if (newtables == NULL)
|
||||||
|
{
|
||||||
|
code->memctl.free((void *)newcode, code->memctl.memory_data);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
memcpy(newtables, code->tables, tables_length);
|
||||||
|
ref_count = (PCRE2_SIZE *)(newtables + tables_length);
|
||||||
|
*ref_count = 1;
|
||||||
|
|
||||||
|
newcode->tables = newtables;
|
||||||
|
newcode->flags |= PCRE2_DEREF_TABLES;
|
||||||
return newcode;
|
return newcode;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2367,7 +2406,7 @@ while (ptr < ptrend)
|
||||||
assertion, possibly preceded by a callout. If the value is 1, we have just
|
assertion, possibly preceded by a callout. If the value is 1, we have just
|
||||||
had the callout and expect an assertion. There must be at least 3 more
|
had the callout and expect an assertion. There must be at least 3 more
|
||||||
characters in all cases. We know that the current character is an opening
|
characters in all cases. We know that the current character is an opening
|
||||||
parenthesis, as otherwise we wouldn't be here. Note that expect_cond_assert
|
parenthesis, as otherwise we wouldn't be here. Note that expect_cond_assert
|
||||||
may be negative, since all callouts just decrement it. */
|
may be negative, since all callouts just decrement it. */
|
||||||
|
|
||||||
if (expect_cond_assert > 0)
|
if (expect_cond_assert > 0)
|
||||||
|
@ -2377,23 +2416,23 @@ while (ptr < ptrend)
|
||||||
{
|
{
|
||||||
case CHAR_C:
|
case CHAR_C:
|
||||||
ok = expect_cond_assert == 2;
|
ok = expect_cond_assert == 2;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case CHAR_EQUALS_SIGN:
|
case CHAR_EQUALS_SIGN:
|
||||||
case CHAR_EXCLAMATION_MARK:
|
case CHAR_EXCLAMATION_MARK:
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case CHAR_LESS_THAN_SIGN:
|
case CHAR_LESS_THAN_SIGN:
|
||||||
ok = ptr[2] == CHAR_EQUALS_SIGN || ptr[2] == CHAR_EXCLAMATION_MARK;
|
ok = ptr[2] == CHAR_EQUALS_SIGN || ptr[2] == CHAR_EXCLAMATION_MARK;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
ok = FALSE;
|
ok = FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!ok)
|
if (!ok)
|
||||||
{
|
{
|
||||||
ptr--; /* Adjust error offset */
|
ptr--; /* Adjust error offset */
|
||||||
errorcode = ERR28;
|
errorcode = ERR28;
|
||||||
goto FAILED;
|
goto FAILED;
|
||||||
}
|
}
|
||||||
|
@ -3559,7 +3598,7 @@ while (ptr < ptrend)
|
||||||
if (*ptr == CHAR_QUESTION_MARK)
|
if (*ptr == CHAR_QUESTION_MARK)
|
||||||
{
|
{
|
||||||
*parsed_pattern++ = META_COND_ASSERT;
|
*parsed_pattern++ = META_COND_ASSERT;
|
||||||
ptr--; /* Pull pointer back to the opening parenthesis. */
|
ptr--; /* Pull pointer back to the opening parenthesis. */
|
||||||
expect_cond_assert = 2;
|
expect_cond_assert = 2;
|
||||||
break; /* End of conditional */
|
break; /* End of conditional */
|
||||||
}
|
}
|
||||||
|
|
|
@ -427,15 +427,13 @@ so many of them that they are split into two fields. */
|
||||||
#define CTL_NULLCONTEXT 0x00200000u
|
#define CTL_NULLCONTEXT 0x00200000u
|
||||||
#define CTL_POSIX 0x00400000u
|
#define CTL_POSIX 0x00400000u
|
||||||
#define CTL_POSIX_NOSUB 0x00800000u
|
#define CTL_POSIX_NOSUB 0x00800000u
|
||||||
#define CTL_PUSH 0x01000000u
|
#define CTL_PUSH 0x01000000u /* These three must be */
|
||||||
#define CTL_PUSHCOPY 0x02000000u
|
#define CTL_PUSHCOPY 0x02000000u /* all in the same */
|
||||||
#define CTL_STARTCHAR 0x04000000u
|
#define CTL_PUSHTABLESCOPY 0x04000000u /* word. */
|
||||||
#define CTL_USE_LENGTH 0x08000000u /* Same word as HEXPAT */
|
#define CTL_STARTCHAR 0x08000000u
|
||||||
#define CTL_UTF8_INPUT 0x10000000u
|
#define CTL_USE_LENGTH 0x10000000u /* Same word as HEXPAT */
|
||||||
#define CTL_ZERO_TERMINATE 0x20000000u
|
#define CTL_UTF8_INPUT 0x20000000u
|
||||||
|
#define CTL_ZERO_TERMINATE 0x40000000u
|
||||||
#define CTL_NL_SET 0x40000000u /* Informational */
|
|
||||||
#define CTL_BSR_SET 0x80000000u /* Informational */
|
|
||||||
|
|
||||||
/* Second control word */
|
/* Second control word */
|
||||||
|
|
||||||
|
@ -444,6 +442,9 @@ so many of them that they are split into two fields. */
|
||||||
#define CTL2_SUBSTITUTE_UNKNOWN_UNSET 0x00000004u
|
#define CTL2_SUBSTITUTE_UNKNOWN_UNSET 0x00000004u
|
||||||
#define CTL2_SUBSTITUTE_UNSET_EMPTY 0x00000008u
|
#define CTL2_SUBSTITUTE_UNSET_EMPTY 0x00000008u
|
||||||
|
|
||||||
|
#define CTL_NL_SET 0x40000000u /* Informational */
|
||||||
|
#define CTL_BSR_SET 0x80000000u /* Informational */
|
||||||
|
|
||||||
/* Combinations */
|
/* Combinations */
|
||||||
|
|
||||||
#define CTL_DEBUG (CTL_FULLBINCODE|CTL_INFO) /* For setting */
|
#define CTL_DEBUG (CTL_FULLBINCODE|CTL_INFO) /* For setting */
|
||||||
|
@ -607,7 +608,8 @@ static modstruct modlist[] = {
|
||||||
{ "posix_nosub", MOD_PAT, MOD_CTL, CTL_POSIX|CTL_POSIX_NOSUB, PO(control) },
|
{ "posix_nosub", MOD_PAT, MOD_CTL, CTL_POSIX|CTL_POSIX_NOSUB, PO(control) },
|
||||||
{ "ps", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
|
{ "ps", MOD_DAT, MOD_OPT, PCRE2_PARTIAL_SOFT, DO(options) },
|
||||||
{ "push", MOD_PAT, MOD_CTL, CTL_PUSH, PO(control) },
|
{ "push", MOD_PAT, MOD_CTL, CTL_PUSH, PO(control) },
|
||||||
{ "pushcopy", MOD_PAT, MOD_CTL, CTL_PUSHCOPY, PO(control) },
|
{ "pushcopy", MOD_PAT, MOD_CTL, CTL_PUSHCOPY, PO(control) },
|
||||||
|
{ "pushtablescopy", MOD_PAT, MOD_CTL, CTL_PUSHTABLESCOPY, PO(control) },
|
||||||
{ "recursion_limit", MOD_CTM, MOD_INT, 0, MO(recursion_limit) },
|
{ "recursion_limit", MOD_CTM, MOD_INT, 0, MO(recursion_limit) },
|
||||||
{ "regerror_buffsize", MOD_PAT, MOD_INT, 0, PO(regerror_buffsize) },
|
{ "regerror_buffsize", MOD_PAT, MOD_INT, 0, PO(regerror_buffsize) },
|
||||||
{ "replace", MOD_PND, MOD_STR, REPLACE_MODSIZE, PO(replacement) },
|
{ "replace", MOD_PND, MOD_STR, REPLACE_MODSIZE, PO(replacement) },
|
||||||
|
@ -651,10 +653,10 @@ static modstruct modlist[] = {
|
||||||
|
|
||||||
#define PUSH_SUPPORTED_COMPILE_CONTROLS ( \
|
#define PUSH_SUPPORTED_COMPILE_CONTROLS ( \
|
||||||
CTL_BINCODE|CTL_CALLOUT_INFO|CTL_FULLBINCODE|CTL_HEXPAT|CTL_INFO| \
|
CTL_BINCODE|CTL_CALLOUT_INFO|CTL_FULLBINCODE|CTL_HEXPAT|CTL_INFO| \
|
||||||
CTL_JITVERIFY|CTL_MEMORY|CTL_PUSH|CTL_PUSHCOPY|CTL_BSR_SET|CTL_NL_SET| \
|
CTL_JITVERIFY|CTL_MEMORY|CTL_PUSH|CTL_PUSHCOPY|CTL_PUSHTABLESCOPY| \
|
||||||
CTL_USE_LENGTH)
|
CTL_USE_LENGTH)
|
||||||
|
|
||||||
#define PUSH_SUPPORTED_COMPILE_CONTROLS2 (0)
|
#define PUSH_SUPPORTED_COMPILE_CONTROLS2 (CTL_BSR_SET|CTL_NL_SET)
|
||||||
|
|
||||||
/* Controls that apply only at compile time with 'push'. */
|
/* Controls that apply only at compile time with 'push'. */
|
||||||
|
|
||||||
|
@ -664,7 +666,7 @@ static modstruct modlist[] = {
|
||||||
/* Controls that are forbidden with #pop or #popcopy. */
|
/* Controls that are forbidden with #pop or #popcopy. */
|
||||||
|
|
||||||
#define NOTPOP_CONTROLS (CTL_HEXPAT|CTL_POSIX|CTL_POSIX_NOSUB|CTL_PUSH| \
|
#define NOTPOP_CONTROLS (CTL_HEXPAT|CTL_POSIX|CTL_POSIX_NOSUB|CTL_PUSH| \
|
||||||
CTL_PUSHCOPY|CTL_USE_LENGTH)
|
CTL_PUSHCOPY|CTL_PUSHTABLESCOPY|CTL_USE_LENGTH)
|
||||||
|
|
||||||
/* Pattern controls that are mutually exclusive. At present these are all in
|
/* Pattern controls that are mutually exclusive. At present these are all in
|
||||||
the first control word. Note that CTL_POSIX_NOSUB is always accompanied by
|
the first control word. Note that CTL_POSIX_NOSUB is always accompanied by
|
||||||
|
@ -674,6 +676,7 @@ static uint32_t exclusive_pat_controls[] = {
|
||||||
CTL_POSIX | CTL_HEXPAT,
|
CTL_POSIX | CTL_HEXPAT,
|
||||||
CTL_POSIX | CTL_PUSH,
|
CTL_POSIX | CTL_PUSH,
|
||||||
CTL_POSIX | CTL_PUSHCOPY,
|
CTL_POSIX | CTL_PUSHCOPY,
|
||||||
|
CTL_POSIX | CTL_PUSHTABLESCOPY,
|
||||||
CTL_POSIX | CTL_USE_LENGTH,
|
CTL_POSIX | CTL_USE_LENGTH,
|
||||||
CTL_EXPAND | CTL_HEXPAT };
|
CTL_EXPAND | CTL_HEXPAT };
|
||||||
|
|
||||||
|
@ -973,6 +976,14 @@ are supported. */
|
||||||
else \
|
else \
|
||||||
a = (void *)pcre2_code_copy_32(G(b,32))
|
a = (void *)pcre2_code_copy_32(G(b,32))
|
||||||
|
|
||||||
|
#define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
|
||||||
|
if (test_mode == PCRE8_MODE) \
|
||||||
|
a = (void *)pcre2_code_copy_with_tables_8(G(b,8)); \
|
||||||
|
else if (test_mode == PCRE16_MODE) \
|
||||||
|
a = (void *)pcre2_code_copy_with_tables_16(G(b,16)); \
|
||||||
|
else \
|
||||||
|
a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
|
||||||
|
|
||||||
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
|
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
|
||||||
if (test_mode == PCRE8_MODE) \
|
if (test_mode == PCRE8_MODE) \
|
||||||
G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g); \
|
G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g); \
|
||||||
|
@ -1436,6 +1447,12 @@ the three different cases. */
|
||||||
else \
|
else \
|
||||||
a = (void *)G(pcre2_code_copy_,BITTWO)(G(b,BITTWO))
|
a = (void *)G(pcre2_code_copy_,BITTWO)(G(b,BITTWO))
|
||||||
|
|
||||||
|
#define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) \
|
||||||
|
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
||||||
|
a = (void *)G(pcre2_code_copy_with_tables_,BITONE)(G(b,BITONE)); \
|
||||||
|
else \
|
||||||
|
a = (void *)G(pcre2_code_copy_with_tables_,BITTWO)(G(b,BITTWO))
|
||||||
|
|
||||||
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
|
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
|
||||||
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
||||||
G(a,BITONE) = G(pcre2_compile_,BITONE)(G(b,BITONE),c,d,e,f,g); \
|
G(a,BITONE) = G(pcre2_compile_,BITONE)(G(b,BITONE),c,d,e,f,g); \
|
||||||
|
@ -1773,6 +1790,7 @@ the three different cases. */
|
||||||
(int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c)
|
(int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c)
|
||||||
#define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,8) = pcre2_code_copy_8(b)
|
#define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,8) = pcre2_code_copy_8(b)
|
||||||
#define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_8(G(b,8))
|
#define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_8(G(b,8))
|
||||||
|
#define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_8(G(b,8))
|
||||||
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
|
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
|
||||||
G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g)
|
G(a,8) = pcre2_compile_8(G(b,8),c,d,e,f,g)
|
||||||
#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
|
#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
|
||||||
|
@ -1868,6 +1886,7 @@ the three different cases. */
|
||||||
(int (*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c)
|
(int (*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c)
|
||||||
#define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,16) = pcre2_code_copy_16(b)
|
#define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,16) = pcre2_code_copy_16(b)
|
||||||
#define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_16(G(b,16))
|
#define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_16(G(b,16))
|
||||||
|
#define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_16(G(b,16))
|
||||||
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
|
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
|
||||||
G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g)
|
G(a,16) = pcre2_compile_16(G(b,16),c,d,e,f,g)
|
||||||
#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
|
#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
|
||||||
|
@ -1963,6 +1982,7 @@ the three different cases. */
|
||||||
(int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
|
(int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c)
|
||||||
#define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,32) = pcre2_code_copy_32(b)
|
#define PCRE2_CODE_COPY_FROM_VOID(a,b) G(a,32) = pcre2_code_copy_32(b)
|
||||||
#define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_32(G(b,32))
|
#define PCRE2_CODE_COPY_TO_VOID(a,b) a = (void *)pcre2_code_copy_32(G(b,32))
|
||||||
|
#define PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(a,b) a = (void *)pcre2_code_copy_with_tables_32(G(b,32))
|
||||||
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
|
#define PCRE2_COMPILE(a,b,c,d,e,f,g) \
|
||||||
G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
|
G(a,32) = pcre2_compile_32(G(b,32),c,d,e,f,g)
|
||||||
#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
|
#define PCRE2_DFA_MATCH(a,b,c,d,e,f,g,h,i,j) \
|
||||||
|
@ -3435,8 +3455,8 @@ for (;;)
|
||||||
#else
|
#else
|
||||||
*((uint16_t *)field) = PCRE2_BSR_UNICODE;
|
*((uint16_t *)field) = PCRE2_BSR_UNICODE;
|
||||||
#endif
|
#endif
|
||||||
if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control &= ~CTL_BSR_SET;
|
if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL_BSR_SET;
|
||||||
else dctl->control &= ~CTL_BSR_SET;
|
else dctl->control2 &= ~CTL_BSR_SET;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -3445,8 +3465,8 @@ for (;;)
|
||||||
else if (len == 7 && strncmpic(pp, (const uint8_t *)"unicode", 7) == 0)
|
else if (len == 7 && strncmpic(pp, (const uint8_t *)"unicode", 7) == 0)
|
||||||
*((uint16_t *)field) = PCRE2_BSR_UNICODE;
|
*((uint16_t *)field) = PCRE2_BSR_UNICODE;
|
||||||
else goto INVALID_VALUE;
|
else goto INVALID_VALUE;
|
||||||
if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control |= CTL_BSR_SET;
|
if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL_BSR_SET;
|
||||||
else dctl->control |= CTL_BSR_SET;
|
else dctl->control2 |= CTL_BSR_SET;
|
||||||
}
|
}
|
||||||
pp = ep;
|
pp = ep;
|
||||||
break;
|
break;
|
||||||
|
@ -3513,14 +3533,14 @@ for (;;)
|
||||||
if (i == 0)
|
if (i == 0)
|
||||||
{
|
{
|
||||||
*((uint16_t *)field) = NEWLINE_DEFAULT;
|
*((uint16_t *)field) = NEWLINE_DEFAULT;
|
||||||
if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control &= ~CTL_NL_SET;
|
if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 &= ~CTL_NL_SET;
|
||||||
else dctl->control &= ~CTL_NL_SET;
|
else dctl->control2 &= ~CTL_NL_SET;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
*((uint16_t *)field) = i;
|
*((uint16_t *)field) = i;
|
||||||
if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control |= CTL_NL_SET;
|
if (ctx == CTX_PAT || ctx == CTX_DEFPAT) pctl->control2 |= CTL_NL_SET;
|
||||||
else dctl->control |= CTL_NL_SET;
|
else dctl->control2 |= CTL_NL_SET;
|
||||||
}
|
}
|
||||||
pp = ep;
|
pp = ep;
|
||||||
break;
|
break;
|
||||||
|
@ -3691,7 +3711,7 @@ Returns: nothing
|
||||||
static void
|
static void
|
||||||
show_controls(uint32_t controls, uint32_t controls2, const char *before)
|
show_controls(uint32_t controls, uint32_t controls2, const char *before)
|
||||||
{
|
{
|
||||||
fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||||
before,
|
before,
|
||||||
((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
|
((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "",
|
||||||
((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
|
((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "",
|
||||||
|
@ -3699,7 +3719,7 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s
|
||||||
((controls & CTL_ALLUSEDTEXT) != 0)? " allusedtext" : "",
|
((controls & CTL_ALLUSEDTEXT) != 0)? " allusedtext" : "",
|
||||||
((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "",
|
((controls & CTL_ALTGLOBAL) != 0)? " altglobal" : "",
|
||||||
((controls & CTL_BINCODE) != 0)? " bincode" : "",
|
((controls & CTL_BINCODE) != 0)? " bincode" : "",
|
||||||
((controls & CTL_BSR_SET) != 0)? " bsr" : "",
|
((controls2 & CTL_BSR_SET) != 0)? " bsr" : "",
|
||||||
((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "",
|
((controls & CTL_CALLOUT_CAPTURE) != 0)? " callout_capture" : "",
|
||||||
((controls & CTL_CALLOUT_INFO) != 0)? " callout_info" : "",
|
((controls & CTL_CALLOUT_INFO) != 0)? " callout_info" : "",
|
||||||
((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "",
|
((controls & CTL_CALLOUT_NONE) != 0)? " callout_none" : "",
|
||||||
|
@ -3715,12 +3735,13 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s
|
||||||
((controls & CTL_JITVERIFY) != 0)? " jitverify" : "",
|
((controls & CTL_JITVERIFY) != 0)? " jitverify" : "",
|
||||||
((controls & CTL_MARK) != 0)? " mark" : "",
|
((controls & CTL_MARK) != 0)? " mark" : "",
|
||||||
((controls & CTL_MEMORY) != 0)? " memory" : "",
|
((controls & CTL_MEMORY) != 0)? " memory" : "",
|
||||||
((controls & CTL_NL_SET) != 0)? " newline" : "",
|
((controls2 & CTL_NL_SET) != 0)? " newline" : "",
|
||||||
((controls & CTL_NULLCONTEXT) != 0)? " null_context" : "",
|
((controls & CTL_NULLCONTEXT) != 0)? " null_context" : "",
|
||||||
((controls & CTL_POSIX) != 0)? " posix" : "",
|
((controls & CTL_POSIX) != 0)? " posix" : "",
|
||||||
((controls & CTL_POSIX_NOSUB) != 0)? " posix_nosub" : "",
|
((controls & CTL_POSIX_NOSUB) != 0)? " posix_nosub" : "",
|
||||||
((controls & CTL_PUSH) != 0)? " push" : "",
|
((controls & CTL_PUSH) != 0)? " push" : "",
|
||||||
((controls & CTL_PUSHCOPY) != 0)? " pushcopy" : "",
|
((controls & CTL_PUSHCOPY) != 0)? " pushcopy" : "",
|
||||||
|
((controls & CTL_PUSHTABLESCOPY) != 0)? " pushtablescopy" : "",
|
||||||
((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
|
((controls & CTL_STARTCHAR) != 0)? " startchar" : "",
|
||||||
((controls2 & CTL2_SUBSTITUTE_EXTENDED) != 0)? " substitute_extended" : "",
|
((controls2 & CTL2_SUBSTITUTE_EXTENDED) != 0)? " substitute_extended" : "",
|
||||||
((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "",
|
((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "",
|
||||||
|
@ -4061,7 +4082,7 @@ if ((pat_patctl.control & CTL_INFO) != 0)
|
||||||
|
|
||||||
if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
|
if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
|
||||||
|
|
||||||
if ((pat_patctl.control & CTL_BSR_SET) != 0 ||
|
if ((pat_patctl.control2 & CTL_BSR_SET) != 0 ||
|
||||||
(FLD(compiled_code, flags) & PCRE2_BSR_SET) != 0)
|
(FLD(compiled_code, flags) & PCRE2_BSR_SET) != 0)
|
||||||
fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)?
|
fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)?
|
||||||
"any Unicode newline" : "CR, LF, or CRLF");
|
"any Unicode newline" : "CR, LF, or CRLF");
|
||||||
|
@ -4930,7 +4951,7 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
|
||||||
/* Handle compiling via the native interface. Controls that act later are
|
/* Handle compiling via the native interface. Controls that act later are
|
||||||
ignored with "push". Replacements are locked out. */
|
ignored with "push". Replacements are locked out. */
|
||||||
|
|
||||||
if ((pat_patctl.control & (CTL_PUSH|CTL_PUSHCOPY)) != 0)
|
if ((pat_patctl.control & (CTL_PUSH|CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
|
||||||
{
|
{
|
||||||
if (pat_patctl.replacement[0] != 0)
|
if (pat_patctl.replacement[0] != 0)
|
||||||
{
|
{
|
||||||
|
@ -5031,7 +5052,7 @@ if (test_mode == PCRE32_MODE && pbuffer32 != NULL)
|
||||||
appropriate default newline setting, local_newline_default will be non-zero. We
|
appropriate default newline setting, local_newline_default will be non-zero. We
|
||||||
use this if there is no explicit newline modifier. */
|
use this if there is no explicit newline modifier. */
|
||||||
|
|
||||||
if ((pat_patctl.control & CTL_NL_SET) == 0 && local_newline_default != 0)
|
if ((pat_patctl.control2 & CTL_NL_SET) == 0 && local_newline_default != 0)
|
||||||
{
|
{
|
||||||
SETFLD(pat_context, newline_convention, local_newline_default);
|
SETFLD(pat_context, newline_convention, local_newline_default);
|
||||||
}
|
}
|
||||||
|
@ -5163,7 +5184,7 @@ if (pattern_info(PCRE2_INFO_MAXLOOKBEHIND, &maxlookbehind, FALSE) != 0)
|
||||||
/* If an explicit newline modifier was given, set the information flag in the
|
/* If an explicit newline modifier was given, set the information flag in the
|
||||||
pattern so that it is preserved over push/pop. */
|
pattern so that it is preserved over push/pop. */
|
||||||
|
|
||||||
if ((pat_patctl.control & CTL_NL_SET) != 0)
|
if ((pat_patctl.control2 & CTL_NL_SET) != 0)
|
||||||
{
|
{
|
||||||
SETFLD(compiled_code, flags, FLD(compiled_code, flags) | PCRE2_NL_SET);
|
SETFLD(compiled_code, flags, FLD(compiled_code, flags) | PCRE2_NL_SET);
|
||||||
}
|
}
|
||||||
|
@ -5191,17 +5212,25 @@ if ((pat_patctl.control & CTL_PUSH) != 0)
|
||||||
SET(compiled_code, NULL);
|
SET(compiled_code, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The "pushcopy" control is similar, but pushes a copy of the pattern. This
|
/* The "pushcopy" and "pushtablescopy" controls are similar, but push a
|
||||||
tests the pcre2_code_copy() function. */
|
copy of the pattern, the latter with a copy of its character tables. This tests
|
||||||
|
the pcre2_code_copy() and pcre2_code_copy_with_tables() functions. */
|
||||||
|
|
||||||
if ((pat_patctl.control & CTL_PUSHCOPY) != 0)
|
if ((pat_patctl.control & (CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0)
|
||||||
{
|
{
|
||||||
if (patstacknext >= PATSTACKSIZE)
|
if (patstacknext >= PATSTACKSIZE)
|
||||||
{
|
{
|
||||||
fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
|
fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE);
|
||||||
return PR_ABEND;
|
return PR_ABEND;
|
||||||
}
|
}
|
||||||
PCRE2_CODE_COPY_TO_VOID(patstack[patstacknext++], compiled_code);
|
if ((pat_patctl.control & CTL_PUSHCOPY) != 0)
|
||||||
|
{
|
||||||
|
PCRE2_CODE_COPY_TO_VOID(patstack[patstacknext++], compiled_code);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(patstack[patstacknext++],
|
||||||
|
compiled_code); }
|
||||||
}
|
}
|
||||||
|
|
||||||
return PR_OK;
|
return PR_OK;
|
||||||
|
|
|
@ -88,4 +88,13 @@
|
||||||
|
|
||||||
#pop should give an error
|
#pop should give an error
|
||||||
|
|
||||||
|
/abcd/pushtablescopy
|
||||||
|
abcd
|
||||||
|
|
||||||
|
#popcopy
|
||||||
|
abcd
|
||||||
|
|
||||||
|
#pop
|
||||||
|
abcd
|
||||||
|
|
||||||
# End of testinput20
|
# End of testinput20
|
||||||
|
|
|
@ -135,4 +135,16 @@ Serialization failed: error -30: patterns do not all use the same character tabl
|
||||||
#pop should give an error
|
#pop should give an error
|
||||||
** Can't pop off an empty stack
|
** Can't pop off an empty stack
|
||||||
|
|
||||||
|
/abcd/pushtablescopy
|
||||||
|
abcd
|
||||||
|
0: abcd
|
||||||
|
|
||||||
|
#popcopy
|
||||||
|
abcd
|
||||||
|
0: abcd
|
||||||
|
|
||||||
|
#pop
|
||||||
|
abcd
|
||||||
|
0: abcd
|
||||||
|
|
||||||
# End of testinput20
|
# End of testinput20
|
||||||
|
|
Loading…
Reference in New Issue