Final preparations for 10.00-RC1
This commit is contained in:
parent
91f2e97474
commit
0acc416ed1
19
ChangeLog
19
ChangeLog
|
@ -1,12 +1,12 @@
|
|||
Change Log for PCRE2
|
||||
--------------------
|
||||
|
||||
Version 10.0 xx-xxxx-2014
|
||||
-------------------------
|
||||
Version 10.00 24-November-2014
|
||||
------------------------------
|
||||
|
||||
Version 10.0 is the first release of PCRE2, a revised API for the PCRE library.
|
||||
Changes prior to 10.0 are logged in the ChangeLog file for the old API, up to
|
||||
item 20 for release 8.36.
|
||||
Version 10.00 is the first release of PCRE2, a revised API for the PCRE
|
||||
library. Changes prior to 10.00 are logged in the ChangeLog file for the old
|
||||
API, up to item 20 for release 8.36.
|
||||
|
||||
The code of the library was heavily revised as part of the new API
|
||||
implementation. Details of each and every modification were not individually
|
||||
|
@ -25,7 +25,7 @@ matched by that pattern.
|
|||
|
||||
4. For the benefit of those who use PCRE2 via some other application, that is,
|
||||
not writing the function calls themselves, it is possible to check the PCRE2
|
||||
version by matching a pattern such as /(?(VERSION>=10.0)yes|no)/ against a
|
||||
version by matching a pattern such as /(?(VERSION>=10)yes|no)/ against a
|
||||
string such as "yesno".
|
||||
|
||||
5. There are case-equivalent Unicode characters whose encodings use different
|
||||
|
@ -46,14 +46,15 @@ characters, for example: /(?:(?=.)|(?<!x))a/.
|
|||
|
||||
7. When an (*ACCEPT) is triggered inside capturing parentheses, it arranges for
|
||||
those parentheses to be closed with whatever has been captured so far. However,
|
||||
it was failing to mark any other groups between the hightest capture so far and
|
||||
it was failing to mark any other groups between the highest capture so far and
|
||||
the currrent group as "unset". Thus, the ovector for those groups contained
|
||||
whatever was previously there. An example is the pattern /(x)|((*ACCEPT))/ when
|
||||
matched against "abcd".
|
||||
|
||||
8. The pcre2_substitute() function has been implemented.
|
||||
|
||||
9. If an assertion condition was quantified with a minimum of zero (an odd
|
||||
thing to do, but it happened), SIGSEGV or other misbehaviour could occur.
|
||||
9. If an assertion used as a condition was quantified with a minimum of zero
|
||||
(an odd thing to do, but it happened), SIGSEGV or other misbehaviour could
|
||||
occur.
|
||||
|
||||
****
|
||||
|
|
50
Makefile.am
50
Makefile.am
|
@ -375,28 +375,34 @@ CLEANFILES += src/pcre2_chartables.c
|
|||
# when pcre2_jit_compile.c is processed, so they must be distributed.
|
||||
|
||||
EXTRA_DIST += \
|
||||
sljit/sljitConfig.h \
|
||||
sljit/sljitConfigInternal.h \
|
||||
sljit/sljitExecAllocator.c \
|
||||
sljit/sljitLir.c \
|
||||
sljit/sljitLir.h \
|
||||
sljit/sljitNativeARM_32.c \
|
||||
sljit/sljitNativeARM_64.c \
|
||||
sljit/sljitNativeARM_T2_32.c \
|
||||
sljit/sljitNativeMIPS_32.c \
|
||||
sljit/sljitNativeMIPS_64.c \
|
||||
sljit/sljitNativeMIPS_common.c \
|
||||
sljit/sljitNativePPC_32.c \
|
||||
sljit/sljitNativePPC_64.c \
|
||||
sljit/sljitNativePPC_common.c \
|
||||
sljit/sljitNativeSPARC_32.c \
|
||||
sljit/sljitNativeSPARC_common.c \
|
||||
sljit/sljitNativeTILEGX-encoder.c \
|
||||
sljit/sljitNativeTILEGX_64.c \
|
||||
sljit/sljitNativeX86_32.c \
|
||||
sljit/sljitNativeX86_64.c \
|
||||
sljit/sljitNativeX86_common.c \
|
||||
sljit/sljitUtils.c
|
||||
src/sljit/sljitConfig.h \
|
||||
src/sljit/sljitConfigInternal.h \
|
||||
src/sljit/sljitExecAllocator.c \
|
||||
src/sljit/sljitLir.c \
|
||||
src/sljit/sljitLir.h \
|
||||
src/sljit/sljitNativeARM_32.c \
|
||||
src/sljit/sljitNativeARM_64.c \
|
||||
src/sljit/sljitNativeARM_T2_32.c \
|
||||
src/sljit/sljitNativeMIPS_32.c \
|
||||
src/sljit/sljitNativeMIPS_64.c \
|
||||
src/sljit/sljitNativeMIPS_common.c \
|
||||
src/sljit/sljitNativePPC_32.c \
|
||||
src/sljit/sljitNativePPC_64.c \
|
||||
src/sljit/sljitNativePPC_common.c \
|
||||
src/sljit/sljitNativeSPARC_32.c \
|
||||
src/sljit/sljitNativeSPARC_common.c \
|
||||
src/sljit/sljitNativeTILEGX-encoder.c \
|
||||
src/sljit/sljitNativeTILEGX_64.c \
|
||||
src/sljit/sljitNativeX86_32.c \
|
||||
src/sljit/sljitNativeX86_64.c \
|
||||
src/sljit/sljitNativeX86_common.c \
|
||||
src/sljit/sljitUtils.c
|
||||
|
||||
# Some of the JIT sources are also in separate files that are #included.
|
||||
|
||||
EXTRA_DIST += \
|
||||
src/pcre2_jit_match.c \
|
||||
src/pcre2_jit_misc.c
|
||||
|
||||
if WITH_PCRE2_8
|
||||
libpcre2_8_la_LDFLAGS = $(EXTRA_LIBPCRE2_8_LDFLAGS)
|
||||
|
|
12
NEWS
12
NEWS
|
@ -1,11 +1,13 @@
|
|||
News about PCRE2 releases
|
||||
-------------------------
|
||||
|
||||
Version 10.0 xx-xxxx-2014
|
||||
-------------------------
|
||||
Version 10.00 24-November-2014
|
||||
------------------------------
|
||||
|
||||
Version 10.0 is the first release of PCRE2, a revised API for the PCRE library.
|
||||
Changes prior to 10.0 are logged in the ChangeLog file for the old API, up to
|
||||
item 20 for release 8.36.
|
||||
Version 10.00 is the first release of PCRE2, a revised API for the PCRE
|
||||
library. Changes prior to 10.00 are logged in the ChangeLog file for the old
|
||||
API, up to item 20 for release 8.36. New programs are recommended to use the
|
||||
new library. Programs that use the original (PCRE1) API will need changing
|
||||
before linking with the new library.
|
||||
|
||||
****
|
||||
|
|
72
README
72
README
|
@ -5,11 +5,9 @@ PCRE2 is a re-implementation of the original PCRE library with an entirely new
|
|||
API. The latest release of PCRE2 is always available in three alternative
|
||||
formats from:
|
||||
|
||||
FIXME: THIS WILL NOT BE THE CASE UNTIL THERE IS A FORMAL RELEASE.
|
||||
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.tar.gz
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.tar.bz2
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.zip
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.tar.gz
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.tar.bz2
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.zip
|
||||
|
||||
There is a mailing list for discussion about the development of PCRE (both the
|
||||
original and new APIs) at pcre-dev@exim.org. You can access the archives and
|
||||
|
@ -46,7 +44,7 @@ there as yet no C++ wrappers.
|
|||
|
||||
The distribution does contain a set of C wrapper functions for the 8-bit
|
||||
library that are based on the POSIX regular expression API (see the pcre2posix
|
||||
man page). These end up in the library called libpcre2posix. Note that this
|
||||
man page). These can be found in a library called libpcre2posix. Note that this
|
||||
just provides a POSIX calling interface to PCRE2; the regular expressions
|
||||
themselves still follow Perl syntax and semantics. The POSIX API is restricted,
|
||||
and does not give full access to all of PCRE2's facilities.
|
||||
|
@ -72,7 +70,7 @@ new names.
|
|||
|
||||
|
||||
Documentation for PCRE2
|
||||
----------------------
|
||||
-----------------------
|
||||
|
||||
If you install PCRE2 in the normal way on a Unix-like system, you will end up
|
||||
with a set of man pages whose names all start with "pcre2". The one that is
|
||||
|
@ -95,7 +93,7 @@ PCRE2 documentation is supplied in two other forms:
|
|||
|
||||
|
||||
Building PCRE2 on non-Unix-like systems
|
||||
--------------------------------------
|
||||
---------------------------------------
|
||||
|
||||
For a non-Unix-like system, please read the comments in the file
|
||||
NON-AUTOTOOLS-BUILD, though if your system supports the use of "configure" and
|
||||
|
@ -112,7 +110,7 @@ library, because it uses only Standard C functions.
|
|||
|
||||
|
||||
Building PCRE2 without using autotools
|
||||
-------------------------------------
|
||||
--------------------------------------
|
||||
|
||||
The use of autotools (in particular, libtool) is problematic in some
|
||||
environments, even some that are Unix or Unix-like. See the NON-AUTOTOOLS-BUILD
|
||||
|
@ -120,7 +118,7 @@ file for ways of building PCRE2 without using autotools.
|
|||
|
||||
|
||||
Building PCRE2 using autotools
|
||||
-----------------------------
|
||||
------------------------------
|
||||
|
||||
The following instructions assume the use of the widely used "configure; make;
|
||||
make install" (autotools) process.
|
||||
|
@ -166,15 +164,15 @@ library. They are also documented in the pcre2build man page.
|
|||
|
||||
. By default, only the 8-bit library is built. If you add --enable-pcre2-16 to
|
||||
the "configure" command, the 16-bit library is also built. If you add
|
||||
--enable-pcre2-32 to the "configure" command, the 32-bit library is also built.
|
||||
If you want only the 16-bit or 32-bit library, use --disable-pcre2-8 to disable
|
||||
building the 8-bit library.
|
||||
--enable-pcre2-32 to the "configure" command, the 32-bit library is also
|
||||
built. If you want only the 16-bit or 32-bit library, use --disable-pcre2-8
|
||||
to disable building the 8-bit library.
|
||||
|
||||
. If you want to include support for just-in-time compiling, which can give
|
||||
large performance improvements on certain platforms, add --enable-jit to the
|
||||
"configure" command. This support is available only for certain hardware
|
||||
architectures. If you try to enable it on an unsupported architecture, there
|
||||
will be a compile time error. FIXME: NOT YET IMPLEMENTED.
|
||||
will be a compile time error.
|
||||
|
||||
. When JIT support is enabled, pcre2grep automatically makes use of it, unless
|
||||
you add --disable-pcre2grep-jit to the "configure" command.
|
||||
|
@ -196,13 +194,13 @@ library. They are also documented in the pcre2build man page.
|
|||
\P, \p, and \X sequences that recognize Unicode character properties.
|
||||
However, only the basic two-letter properties such as Lu are supported.
|
||||
|
||||
. You can build PCRE2 to recognize either CR or LF or the sequence CRLF or any
|
||||
of the preceding, or any of the Unicode newline sequences as indicating the
|
||||
. You can build PCRE2 to recognize either CR or LF or the sequence CRLF, or any
|
||||
of the preceding, or any of the Unicode newline sequences, as indicating the
|
||||
end of a line. Whatever you specify at build time is the default; the caller
|
||||
of PCRE2 can change the selection at run time. The default newline indicator
|
||||
is a single LF character (the Unix standard). You can specify the default
|
||||
newline indicator by adding --enable-newline-is-cr or --enable-newline-is-lf
|
||||
or --enable-newline-is-crlf or --enable-newline-is-anycrlf or
|
||||
newline indicator by adding --enable-newline-is-cr, --enable-newline-is-lf,
|
||||
--enable-newline-is-crlf, --enable-newline-is-anycrlf, or
|
||||
--enable-newline-is-any to the "configure" command, respectively.
|
||||
|
||||
If you specify --enable-newline-is-cr or --enable-newline-is-crlf, some of
|
||||
|
@ -251,8 +249,9 @@ library. They are also documented in the pcre2build man page.
|
|||
command. PCRE2 then uses three bytes instead of two for offsets to different
|
||||
parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is
|
||||
the same as --with-link-size=4, which (in both libraries) uses four-byte
|
||||
offsets. Increasing the internal link size reduces performance. In the 32-bit
|
||||
library, the link size setting is ignored, as 4-byte offsets are always used.
|
||||
offsets. Increasing the internal link size reduces performance in the 8-bit
|
||||
and 16-bit libraries. In the 32-bit library, the link size setting is
|
||||
ignored, as 4-byte offsets are always used.
|
||||
|
||||
. You can build PCRE2 so that its internal match() function that is called from
|
||||
pcre2_match() does not call itself recursively. Instead, it uses memory
|
||||
|
@ -376,12 +375,13 @@ contains compiler output from tests that "configure" runs.
|
|||
Once "configure" has run, you can run "make". This builds whichever of the
|
||||
libraries libpcre2-8, libpcre2-16 and libpcre2-32 are configured, and a test
|
||||
program called pcre2test. If you enabled JIT support with --enable-jit, another
|
||||
test program called pcre2_jit_test is built as well. FIXME: still to be
|
||||
implemented. If the 8-bit library is built, libpcre2-posix and the pcre2grep
|
||||
command are also built.
|
||||
test program called pcre2_jit_test is built as well. If the 8-bit library is
|
||||
built, libpcre2-posix and the pcre2grep command are also built. Running
|
||||
"make" with the -j option may speed up compilation on multiprocessor systems.
|
||||
|
||||
The command "make check" runs all the appropriate tests. Details of the PCRE2
|
||||
tests are given below in a separate section of this document.
|
||||
tests are given below in a separate section of this document. The -j option of
|
||||
"make" can also be used when running the tests.
|
||||
|
||||
You can use "make install" to install PCRE2 into live directories on your
|
||||
system. The following are installed (file names are all relative to the
|
||||
|
@ -528,7 +528,7 @@ Testing PCRE2
|
|||
|
||||
To test the basic PCRE2 library on a Unix-like system, run the RunTest script.
|
||||
There is another script called RunGrepTest that tests the options of the
|
||||
pcre2grep command. When JIT support is enabled, another test program called
|
||||
pcre2grep command. When JIT support is enabled, a third test program called
|
||||
pcre2_jit_test is built. Both the scripts and all the program tests are run if
|
||||
you obey "make check". For other environments, see the instructions in
|
||||
NON-AUTOTOOLS-BUILD.
|
||||
|
@ -709,7 +709,6 @@ The distribution should contain the files listed below.
|
|||
src/pcre2_context.c )
|
||||
src/pcre2_dfa_match.c )
|
||||
src/pcre2_error.c )
|
||||
src/pcre2_exec.c )
|
||||
src/pcre2_jit_compile.c )
|
||||
src/pcre2_jit_match.c ) sources for the functions in the library,
|
||||
src/pcre2_jit_misc.c ) and some internal functions that they use
|
||||
|
@ -721,6 +720,7 @@ The distribution should contain the files listed below.
|
|||
src/pcre2_pattern_info.c )
|
||||
src/pcre2_string_utils.c )
|
||||
src/pcre2_study.c )
|
||||
src/pcre2_substitute.c )
|
||||
src/pcre2_substring.c )
|
||||
src/pcre2_tables.c )
|
||||
src/pcre2_ucd.c )
|
||||
|
@ -736,13 +736,15 @@ The distribution should contain the files listed below.
|
|||
src/pcre2_intmodedep.h a mode-specific internal header
|
||||
src/pcre2_ucp.h header for Unicode property handling
|
||||
|
||||
sljit/* 16 files that make up the JIT compiler FIXME
|
||||
sljit/* source files for the JIT compiler
|
||||
|
||||
(B) Source files for programs that use PCRE2:
|
||||
|
||||
src/pcre2demo.c simple demonstration of coding calls to PCRE2
|
||||
src/pcre2grep.c source of a grep utility that uses PCRE2
|
||||
src/pcre2test.c comprehensive test program
|
||||
src/pcre2_printint.c part of pcre2test
|
||||
src/pcre2_jit_test.c JIT test program
|
||||
|
||||
(C) Auxiliary files:
|
||||
|
||||
|
@ -790,7 +792,6 @@ The distribution should contain the files listed below.
|
|||
mkinstalldirs script for making install directories
|
||||
perltest.sh Script for running a Perl test program
|
||||
pcre2-config.in source of script which retains PCRE2 information
|
||||
pcre2_jit_test.c test program for the JIT compiler
|
||||
testdata/testinput* test data for main library tests
|
||||
testdata/testoutput* expected test results
|
||||
testdata/grep* input and output for pcre2grep tests
|
||||
|
@ -805,25 +806,14 @@ The distribution should contain the files listed below.
|
|||
CMakeLists.txt
|
||||
config-cmake.h.in
|
||||
|
||||
(E) Auxiliary files for VPASCAL FIXME FIXME
|
||||
|
||||
makevp.bat
|
||||
makevp_c.txt
|
||||
makevp_l.txt
|
||||
pcre2gexp.pas
|
||||
|
||||
(F) Auxiliary files for building PCRE2 "by hand"
|
||||
(E) Auxiliary files for building PCRE2 "by hand"
|
||||
|
||||
pcre2.h.generic ) a version of the public PCRE2 header file
|
||||
) for use in non-"configure" environments
|
||||
config.h.generic ) a version of config.h for use in non-"configure"
|
||||
) environments
|
||||
|
||||
(F) Miscellaneous
|
||||
|
||||
RunTest.bat a script for running tests under Windows FIXME
|
||||
|
||||
Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
Last updated: 03 November 2014
|
||||
Last updated: 24 November 2014
|
||||
|
|
|
@ -10,8 +10,8 @@ dnl be defined as -RC2, for example. For real releases, it should be empty.
|
|||
|
||||
m4_define(pcre2_major, [10])
|
||||
m4_define(pcre2_minor, [00])
|
||||
m4_define(pcre2_prerelease, [-DEV])
|
||||
m4_define(pcre2_date, [2014-99-99])
|
||||
m4_define(pcre2_prerelease, [-RC1])
|
||||
m4_define(pcre2_date, [2014-11-24])
|
||||
|
||||
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
||||
# 50 lines of this file. Please update that if the variables above are moved.
|
||||
|
|
|
@ -5,11 +5,9 @@ PCRE2 is a re-implementation of the original PCRE library with an entirely new
|
|||
API. The latest release of PCRE2 is always available in three alternative
|
||||
formats from:
|
||||
|
||||
FIXME: THIS WILL NOT BE THE CASE UNTIL THERE IS A FORMAL RELEASE.
|
||||
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.tar.gz
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.tar.bz2
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.zip
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.tar.gz
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.tar.bz2
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre2-xxx.zip
|
||||
|
||||
There is a mailing list for discussion about the development of PCRE (both the
|
||||
original and new APIs) at pcre-dev@exim.org. You can access the archives and
|
||||
|
@ -46,7 +44,7 @@ there as yet no C++ wrappers.
|
|||
|
||||
The distribution does contain a set of C wrapper functions for the 8-bit
|
||||
library that are based on the POSIX regular expression API (see the pcre2posix
|
||||
man page). These end up in the library called libpcre2posix. Note that this
|
||||
man page). These can be found in a library called libpcre2posix. Note that this
|
||||
just provides a POSIX calling interface to PCRE2; the regular expressions
|
||||
themselves still follow Perl syntax and semantics. The POSIX API is restricted,
|
||||
and does not give full access to all of PCRE2's facilities.
|
||||
|
@ -72,7 +70,7 @@ new names.
|
|||
|
||||
|
||||
Documentation for PCRE2
|
||||
----------------------
|
||||
-----------------------
|
||||
|
||||
If you install PCRE2 in the normal way on a Unix-like system, you will end up
|
||||
with a set of man pages whose names all start with "pcre2". The one that is
|
||||
|
@ -95,7 +93,7 @@ PCRE2 documentation is supplied in two other forms:
|
|||
|
||||
|
||||
Building PCRE2 on non-Unix-like systems
|
||||
--------------------------------------
|
||||
---------------------------------------
|
||||
|
||||
For a non-Unix-like system, please read the comments in the file
|
||||
NON-AUTOTOOLS-BUILD, though if your system supports the use of "configure" and
|
||||
|
@ -112,7 +110,7 @@ library, because it uses only Standard C functions.
|
|||
|
||||
|
||||
Building PCRE2 without using autotools
|
||||
-------------------------------------
|
||||
--------------------------------------
|
||||
|
||||
The use of autotools (in particular, libtool) is problematic in some
|
||||
environments, even some that are Unix or Unix-like. See the NON-AUTOTOOLS-BUILD
|
||||
|
@ -120,7 +118,7 @@ file for ways of building PCRE2 without using autotools.
|
|||
|
||||
|
||||
Building PCRE2 using autotools
|
||||
-----------------------------
|
||||
------------------------------
|
||||
|
||||
The following instructions assume the use of the widely used "configure; make;
|
||||
make install" (autotools) process.
|
||||
|
@ -166,15 +164,15 @@ library. They are also documented in the pcre2build man page.
|
|||
|
||||
. By default, only the 8-bit library is built. If you add --enable-pcre2-16 to
|
||||
the "configure" command, the 16-bit library is also built. If you add
|
||||
--enable-pcre2-32 to the "configure" command, the 32-bit library is also built.
|
||||
If you want only the 16-bit or 32-bit library, use --disable-pcre2-8 to disable
|
||||
building the 8-bit library.
|
||||
--enable-pcre2-32 to the "configure" command, the 32-bit library is also
|
||||
built. If you want only the 16-bit or 32-bit library, use --disable-pcre2-8
|
||||
to disable building the 8-bit library.
|
||||
|
||||
. If you want to include support for just-in-time compiling, which can give
|
||||
large performance improvements on certain platforms, add --enable-jit to the
|
||||
"configure" command. This support is available only for certain hardware
|
||||
architectures. If you try to enable it on an unsupported architecture, there
|
||||
will be a compile time error. FIXME: NOT YET IMPLEMENTED.
|
||||
will be a compile time error.
|
||||
|
||||
. When JIT support is enabled, pcre2grep automatically makes use of it, unless
|
||||
you add --disable-pcre2grep-jit to the "configure" command.
|
||||
|
@ -196,13 +194,13 @@ library. They are also documented in the pcre2build man page.
|
|||
\P, \p, and \X sequences that recognize Unicode character properties.
|
||||
However, only the basic two-letter properties such as Lu are supported.
|
||||
|
||||
. You can build PCRE2 to recognize either CR or LF or the sequence CRLF or any
|
||||
of the preceding, or any of the Unicode newline sequences as indicating the
|
||||
. You can build PCRE2 to recognize either CR or LF or the sequence CRLF, or any
|
||||
of the preceding, or any of the Unicode newline sequences, as indicating the
|
||||
end of a line. Whatever you specify at build time is the default; the caller
|
||||
of PCRE2 can change the selection at run time. The default newline indicator
|
||||
is a single LF character (the Unix standard). You can specify the default
|
||||
newline indicator by adding --enable-newline-is-cr or --enable-newline-is-lf
|
||||
or --enable-newline-is-crlf or --enable-newline-is-anycrlf or
|
||||
newline indicator by adding --enable-newline-is-cr, --enable-newline-is-lf,
|
||||
--enable-newline-is-crlf, --enable-newline-is-anycrlf, or
|
||||
--enable-newline-is-any to the "configure" command, respectively.
|
||||
|
||||
If you specify --enable-newline-is-cr or --enable-newline-is-crlf, some of
|
||||
|
@ -251,8 +249,9 @@ library. They are also documented in the pcre2build man page.
|
|||
command. PCRE2 then uses three bytes instead of two for offsets to different
|
||||
parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is
|
||||
the same as --with-link-size=4, which (in both libraries) uses four-byte
|
||||
offsets. Increasing the internal link size reduces performance. In the 32-bit
|
||||
library, the link size setting is ignored, as 4-byte offsets are always used.
|
||||
offsets. Increasing the internal link size reduces performance in the 8-bit
|
||||
and 16-bit libraries. In the 32-bit library, the link size setting is
|
||||
ignored, as 4-byte offsets are always used.
|
||||
|
||||
. You can build PCRE2 so that its internal match() function that is called from
|
||||
pcre2_match() does not call itself recursively. Instead, it uses memory
|
||||
|
@ -376,12 +375,13 @@ contains compiler output from tests that "configure" runs.
|
|||
Once "configure" has run, you can run "make". This builds whichever of the
|
||||
libraries libpcre2-8, libpcre2-16 and libpcre2-32 are configured, and a test
|
||||
program called pcre2test. If you enabled JIT support with --enable-jit, another
|
||||
test program called pcre2_jit_test is built as well. FIXME: still to be
|
||||
implemented. If the 8-bit library is built, libpcre2-posix and the pcre2grep
|
||||
command are also built.
|
||||
test program called pcre2_jit_test is built as well. If the 8-bit library is
|
||||
built, libpcre2-posix and the pcre2grep command are also built. Running
|
||||
"make" with the -j option may speed up compilation on multiprocessor systems.
|
||||
|
||||
The command "make check" runs all the appropriate tests. Details of the PCRE2
|
||||
tests are given below in a separate section of this document.
|
||||
tests are given below in a separate section of this document. The -j option of
|
||||
"make" can also be used when running the tests.
|
||||
|
||||
You can use "make install" to install PCRE2 into live directories on your
|
||||
system. The following are installed (file names are all relative to the
|
||||
|
@ -528,7 +528,7 @@ Testing PCRE2
|
|||
|
||||
To test the basic PCRE2 library on a Unix-like system, run the RunTest script.
|
||||
There is another script called RunGrepTest that tests the options of the
|
||||
pcre2grep command. When JIT support is enabled, another test program called
|
||||
pcre2grep command. When JIT support is enabled, a third test program called
|
||||
pcre2_jit_test is built. Both the scripts and all the program tests are run if
|
||||
you obey "make check". For other environments, see the instructions in
|
||||
NON-AUTOTOOLS-BUILD.
|
||||
|
@ -709,7 +709,6 @@ The distribution should contain the files listed below.
|
|||
src/pcre2_context.c )
|
||||
src/pcre2_dfa_match.c )
|
||||
src/pcre2_error.c )
|
||||
src/pcre2_exec.c )
|
||||
src/pcre2_jit_compile.c )
|
||||
src/pcre2_jit_match.c ) sources for the functions in the library,
|
||||
src/pcre2_jit_misc.c ) and some internal functions that they use
|
||||
|
@ -721,6 +720,7 @@ The distribution should contain the files listed below.
|
|||
src/pcre2_pattern_info.c )
|
||||
src/pcre2_string_utils.c )
|
||||
src/pcre2_study.c )
|
||||
src/pcre2_substitute.c )
|
||||
src/pcre2_substring.c )
|
||||
src/pcre2_tables.c )
|
||||
src/pcre2_ucd.c )
|
||||
|
@ -736,13 +736,15 @@ The distribution should contain the files listed below.
|
|||
src/pcre2_intmodedep.h a mode-specific internal header
|
||||
src/pcre2_ucp.h header for Unicode property handling
|
||||
|
||||
sljit/* 16 files that make up the JIT compiler FIXME
|
||||
sljit/* source files for the JIT compiler
|
||||
|
||||
(B) Source files for programs that use PCRE2:
|
||||
|
||||
src/pcre2demo.c simple demonstration of coding calls to PCRE2
|
||||
src/pcre2grep.c source of a grep utility that uses PCRE2
|
||||
src/pcre2test.c comprehensive test program
|
||||
src/pcre2_printint.c part of pcre2test
|
||||
src/pcre2_jit_test.c JIT test program
|
||||
|
||||
(C) Auxiliary files:
|
||||
|
||||
|
@ -790,7 +792,6 @@ The distribution should contain the files listed below.
|
|||
mkinstalldirs script for making install directories
|
||||
perltest.sh Script for running a Perl test program
|
||||
pcre2-config.in source of script which retains PCRE2 information
|
||||
pcre2_jit_test.c test program for the JIT compiler
|
||||
testdata/testinput* test data for main library tests
|
||||
testdata/testoutput* expected test results
|
||||
testdata/grep* input and output for pcre2grep tests
|
||||
|
@ -805,25 +806,14 @@ The distribution should contain the files listed below.
|
|||
CMakeLists.txt
|
||||
config-cmake.h.in
|
||||
|
||||
(E) Auxiliary files for VPASCAL FIXME FIXME
|
||||
|
||||
makevp.bat
|
||||
makevp_c.txt
|
||||
makevp_l.txt
|
||||
pcre2gexp.pas
|
||||
|
||||
(F) Auxiliary files for building PCRE2 "by hand"
|
||||
(E) Auxiliary files for building PCRE2 "by hand"
|
||||
|
||||
pcre2.h.generic ) a version of the public PCRE2 header file
|
||||
) for use in non-"configure" environments
|
||||
config.h.generic ) a version of config.h for use in non-"configure"
|
||||
) environments
|
||||
|
||||
(F) Miscellaneous
|
||||
|
||||
RunTest.bat a script for running tests under Windows FIXME
|
||||
|
||||
Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
Last updated: 03 November 2014
|
||||
Last updated: 24 November 2014
|
||||
|
|
|
@ -2074,6 +2074,12 @@ returned by <b>pcre2_get_startchar()</b>. For a non-partial match, this can be
|
|||
different to the value of <i>ovector[0]</i> if the pattern contains the \K
|
||||
escape sequence. After a partial match, however, this value is always the same
|
||||
as <i>ovector[0]</i> because \K does not affect the result of a partial match.
|
||||
</P>
|
||||
<P>
|
||||
The <b>startchar</b> field is also used to return the offset of an invalid
|
||||
UTF character when UTF checking fails. Details are given in the
|
||||
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
|
||||
page.
|
||||
<a name="errorlist"></a></P>
|
||||
<br><a name="SEC26" href="#TOC1">ERROR RETURNS FROM <b>pcre2_match()</b></a><br>
|
||||
<P>
|
||||
|
@ -2658,7 +2664,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC36" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 21 November 2014
|
||||
Last updated: 23 November 2014
|
||||
<br>
|
||||
Copyright © 1997-2014 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -197,9 +197,9 @@ the string "dog" matched against the ungreedy pattern shown above:
|
|||
<pre>
|
||||
/dog(sbody)??/
|
||||
</pre>
|
||||
Whereas the standard functions stop as soon as they find the complete match for
|
||||
"dog", the DFA functions also find the partial match for "dogsbody", and so
|
||||
return that when PCRE2_PARTIAL_HARD is set.
|
||||
Whereas the standard function stops as soon as it finds the complete match for
|
||||
"dog", the DFA function also finds the partial match for "dogsbody", and so
|
||||
returns that when PCRE2_PARTIAL_HARD is set.
|
||||
</P>
|
||||
<br><a name="SEC4" href="#TOC1">PARTIAL MATCHING AND WORD BOUNDARIES</a><br>
|
||||
<P>
|
||||
|
|
|
@ -244,7 +244,7 @@ input lines. Each set starts with a regular expression pattern, followed by any
|
|||
number of subject lines to be matched against that pattern. In between sets of
|
||||
test data, command lines that begin with a hash (#) character may appear. This
|
||||
file format, with some restrictions, can also be processed by the
|
||||
<b>perltest.pl</b> script that is distributed with PCRE2 as a means of checking
|
||||
<b>perltest.sh</b> script that is distributed with PCRE2 as a means of checking
|
||||
that the behaviour of PCRE2 and Perl is the same.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -287,11 +287,11 @@ patterns. Modifiers on a pattern can change these settings.
|
|||
#perltest
|
||||
</pre>
|
||||
The appearance of this line causes all subsequent modifier settings to be
|
||||
checked for compatibility with the <b>perltest.pl</b> script, which is used to
|
||||
checked for compatibility with the <b>perltest.sh</b> script, which is used to
|
||||
confirm that Perl gives the same results as PCRE2. Also, apart from comment
|
||||
lines, none of the other command lines are permitted, because they and many
|
||||
of the modifiers are specific to <b>pcre2test</b>, and should not be used in
|
||||
test files that are also processed by <b>perltest.pl</b>. The \fP#perltest\fB
|
||||
test files that are also processed by <b>perltest.sh</b>. The \fP#perltest\fB
|
||||
command helps detect tests that are accidentally put in the wrong file.
|
||||
<pre>
|
||||
#subject <modifier-list>
|
||||
|
@ -307,7 +307,7 @@ for both patterns and subject lines, whereas others are valid for one or the
|
|||
other only. Each modifier has a long name, for example "anchored", and some of
|
||||
them must be followed by an equals sign and a value, for example, "offset=12".
|
||||
Modifiers that do not take values may be preceded by a minus sign to turn off a
|
||||
previous default setting.
|
||||
previous setting.
|
||||
</P>
|
||||
<P>
|
||||
A few of the more common modifiers can also be specified as single letters, for
|
||||
|
@ -376,7 +376,7 @@ encoding non-printing characters in a visible way:
|
|||
\xhh hexadecimal byte (up to 2 hex digits)
|
||||
\x{hh...} hexadecimal character (any number of hex digits)
|
||||
</pre>
|
||||
The use of \x{hh...} is not dependent on the use of the utf modifier on
|
||||
The use of \x{hh...} is not dependent on the use of the <b>utf</b> modifier on
|
||||
the pattern. It is recognized always. There may be any number of hexadecimal
|
||||
digits inside the braces; invalid values provoke error messages.
|
||||
</P>
|
||||
|
@ -411,7 +411,7 @@ is converted to "abcabcabcabc". This feature does not support nesting. To
|
|||
include a closing square bracket in the characters, code it as \x5D.
|
||||
</P>
|
||||
<P>
|
||||
A backslash followed by an equals sign marke the end of the subject string and
|
||||
A backslash followed by an equals sign marks the end of the subject string and
|
||||
the start of a modifier list. For example:
|
||||
<pre>
|
||||
abc\=notbol,notempty
|
||||
|
@ -503,8 +503,8 @@ is built, with the default default being Unicode.
|
|||
</P>
|
||||
<P>
|
||||
The <b>newline</b> modifier specifies which characters are to be interpreted as
|
||||
newlines, both in the pattern and (by default) in subject lines. The type must
|
||||
be one of CR, LF, CRLF, ANYCRLF, or ANY.
|
||||
newlines, both in the pattern and in subject lines. The type must be one of CR,
|
||||
LF, CRLF, ANYCRLF, or ANY (in upper or lower case).
|
||||
</P>
|
||||
<br><b>
|
||||
Information about a pattern
|
||||
|
@ -522,8 +522,8 @@ regression tests can be used in different environments.
|
|||
</P>
|
||||
<P>
|
||||
The <b>fullbincode</b> modifier, by contrast, <i>does</i> include length and
|
||||
offset values. This is used in a few special tests and is also useful for
|
||||
one-off tests.
|
||||
offset values. This is used in a few special tests that run only for specific
|
||||
code unit widths and link sizes, and is also useful for one-off tests.
|
||||
</P>
|
||||
<P>
|
||||
The <b>info</b> modifier requests information about the compiled pattern
|
||||
|
@ -546,13 +546,14 @@ some typical examples:
|
|||
Last code unit = 'c' (caseless)
|
||||
Subject length lower bound = 3
|
||||
</pre>
|
||||
"Compile options" are those specified to the compile function; "overall
|
||||
options" have added options that are taken or deduced from the pattern. If both
|
||||
sets of options are the same, just a single "options" line is output. "First
|
||||
code unit" is where any match must start; if there is more than one they are
|
||||
listed as "starting code units". "Last code unit" is the last literal code unit
|
||||
that must be present in any match. This is not necessarily the last character.
|
||||
These lines are omitted if no starting or ending code units are recorded.
|
||||
"Compile options" are those specified by modifiers; "overall options" have
|
||||
added options that are taken or deduced from the pattern. If both sets of
|
||||
options are the same, just a single "options" line is output; if there are no
|
||||
options, the line is omitted. "First code unit" is where any match must start;
|
||||
if there is more than one they are listed as "starting code units". "Last code
|
||||
unit" is the last literal code unit that must be present in any match. This is
|
||||
not necessarily the last character. These lines are omitted if no starting or
|
||||
ending code units are recorded.
|
||||
</P>
|
||||
<br><b>
|
||||
Specifying a pattern in hex
|
||||
|
@ -565,16 +566,16 @@ pairs. For example:
|
|||
/ab 32 59/hex
|
||||
</pre>
|
||||
This feature is provided as a way of creating patterns that contain binary zero
|
||||
characters. By default, <b>pcre2test</b> passes patterns as zero-terminated
|
||||
strings to <b>pcre2_compile()</b>, giving the length as PCRE2_ZERO_TERMINATED.
|
||||
However, for patterns specified in hexadecimal, the actual length of the
|
||||
pattern is passed.
|
||||
and other non-printing characters. By default, <b>pcre2test</b> passes patterns
|
||||
as zero-terminated strings to <b>pcre2_compile()</b>, giving the length as
|
||||
PCRE2_ZERO_TERMINATED. However, for patterns specified in hexadecimal, the
|
||||
actual length of the pattern is passed.
|
||||
</P>
|
||||
<br><b>
|
||||
JIT compilation
|
||||
</b><br>
|
||||
<P>
|
||||
The <b>/jit</b> modifier may optionally be followed by and equals sign and a
|
||||
The <b>/jit</b> modifier may optionally be followed by an equals sign and a
|
||||
number in the range 0 to 7:
|
||||
<pre>
|
||||
0 disable JIT
|
||||
|
@ -606,7 +607,7 @@ pattern shows whether JIT compilation was or was not successful. If
|
|||
<b>jitverify</b> is specified without <b>jit</b>, jit=7 is assumed. If JIT
|
||||
compilation is successful when <b>jitverify</b> is set, the text "(JIT)" is
|
||||
added to the first output line after a match or non match when JIT-compiled
|
||||
code was actually used.
|
||||
code was actually used in the match.
|
||||
</P>
|
||||
<br><b>
|
||||
Setting a locale
|
||||
|
@ -689,8 +690,8 @@ be aborted.
|
|||
Using alternative character tables
|
||||
</b><br>
|
||||
<P>
|
||||
The <b>/tables</b> modifier must be followed by a single digit. It causes a
|
||||
specific set of built-in character tables to be passed to
|
||||
The value specified for the <b>/tables</b> modifier must be one of the digits 0,
|
||||
1, or 2. It causes a specific set of built-in character tables to be passed to
|
||||
<b>pcre2_compile()</b>. This is used in the PCRE2 tests to check behaviour with
|
||||
different character tables. The digit specifies the tables as follows:
|
||||
<pre>
|
||||
|
@ -800,13 +801,13 @@ The effects of these modifiers are described in the following sections.
|
|||
Showing more text
|
||||
</b><br>
|
||||
<P>
|
||||
The <b>aftertext</b> modifier requests that as well as outputting the substring
|
||||
that matched the entire pattern, <b>pcre2test</b> should in addition output the
|
||||
remainder of the subject string. This is useful for tests where the subject
|
||||
contains multiple copies of the same substring. The <b>allaftertext</b> modifier
|
||||
requests the same action for captured substrings as well as the main matched
|
||||
substring. In each case the remainder is output on the following line with a
|
||||
plus character following the capture number.
|
||||
The <b>aftertext</b> modifier requests that as well as outputting the part of
|
||||
the subject string that matched the entire pattern, <b>pcre2test</b> should in
|
||||
addition output the remainder of the subject string. This is useful for tests
|
||||
where the subject contains multiple copies of the same substring. The
|
||||
<b>allaftertext</b> modifier requests the same action for captured substrings as
|
||||
well as the main matched substring. In each case the remainder is output on the
|
||||
following line with a plus character following the capture number.
|
||||
</P>
|
||||
<P>
|
||||
The <b>allusedtext</b> modifier requests that all the text that was consulted
|
||||
|
@ -824,7 +825,8 @@ underneath them. Here is an example:
|
|||
<<< >>>
|
||||
</pre>
|
||||
This shows that the matched string is "abc", with the preceding and following
|
||||
strings "pqr" and "xyz" also consulted during the match.
|
||||
strings "pqr" and "xyz" having been consulted during the match (when processing
|
||||
the assertions).
|
||||
</P>
|
||||
<P>
|
||||
The <b>startchar</b> modifier requests that the starting character for the match
|
||||
|
@ -881,7 +883,7 @@ function is called again to search the remainder of the subject. The difference
|
|||
between <b>global</b> and <b>altglobal</b> is that the former uses the
|
||||
<i>start_offset</i> argument to <b>pcre2_match()</b> or <b>pcre2_dfa_match()</b>
|
||||
to start searching at a new point within the entire string (which is what Perl
|
||||
does), whereas the latter passes over a shortened substring. This makes a
|
||||
does), whereas the latter passes over a shortened subject. This makes a
|
||||
difference to the matching process if the pattern begins with a lookbehind
|
||||
assertion (including \b or \B).
|
||||
</P>
|
||||
|
@ -893,7 +895,7 @@ fails, the start offset is advanced, and the normal match is retried. This
|
|||
imitates the way Perl handles such cases when using the <b>/g</b> modifier or
|
||||
the <b>split()</b> function. Normally, the start offset is advanced by one
|
||||
character, but if the newline convention recognizes CRLF as a newline, and the
|
||||
current character is CR followed by LF, an advance of two is used.
|
||||
current character is CR followed by LF, an advance of two characters occurs.
|
||||
</P>
|
||||
<br><b>
|
||||
Testing substring extraction functions
|
||||
|
@ -906,9 +908,9 @@ for example:
|
|||
<pre>
|
||||
abcd\=copy=1,copy=3,get=G1
|
||||
</pre>
|
||||
If the <b>#subject</b> command is used to set default copy and get lists, these
|
||||
can be unset by specifying a negative number for numbered groups and an empty
|
||||
name for named groups.
|
||||
If the <b>#subject</b> command is used to set default copy and/or get lists,
|
||||
these can be unset by specifying a negative number to cancel all numbered
|
||||
groups and an empty name to cancel all named groups.
|
||||
</P>
|
||||
<P>
|
||||
The <b>getall</b> modifier tests <b>pcre2_substring_list_get()</b>, which
|
||||
|
@ -919,7 +921,8 @@ If the subject line is successfully matched, the substrings extracted by the
|
|||
convenience functions are output with C, G, or L after the string number
|
||||
instead of a colon. This is in addition to the normal full list. The string
|
||||
length (that is, the return from the extraction function) is given in
|
||||
parentheses after each substring.
|
||||
parentheses after each substring, followed by the name when the extraction was
|
||||
by name.
|
||||
</P>
|
||||
<br><b>
|
||||
Testing the substitution function
|
||||
|
@ -1093,11 +1096,10 @@ characters before the actual match start if a lookbehind assertion, \K, \b,
|
|||
or \B was involved.)
|
||||
</P>
|
||||
<P>
|
||||
For any other return, <b>pcre2test</b> outputs the PCRE2
|
||||
negative error number and a short descriptive phrase. If the error is a failed
|
||||
UTF string check, the offset of the start of the failing character and the
|
||||
reason code are also output. Here is an example of an interactive
|
||||
<b>pcre2test</b> run.
|
||||
For any other return, <b>pcre2test</b> outputs the PCRE2 negative error number
|
||||
and a short descriptive phrase. If the error is a failed UTF string check, the
|
||||
code unit offset of the start of the failing character is also output. Here is
|
||||
an example of an interactive <b>pcre2test</b> run.
|
||||
<pre>
|
||||
$ pcre2test
|
||||
PCRE2 version 9.00 2014-05-10
|
||||
|
@ -1110,10 +1112,10 @@ reason code are also output. Here is an example of an interactive
|
|||
No match
|
||||
</pre>
|
||||
Unset capturing substrings that are not followed by one that is set are not
|
||||
returned by <b>pcre2_match()</b>, and are not shown by <b>pcre2test</b>. In the
|
||||
following example, there are two capturing substrings, but when the first data
|
||||
line is matched, the second, unset substring is not shown. An "internal" unset
|
||||
substring is shown as "<unset>", as for the second data line.
|
||||
shown by <b>pcre2test</b> unless the <b>allcaptures</b> modifier is specified. In
|
||||
the following example, there are two capturing substrings, but when the first
|
||||
data line is matched, the second, unset substring is not shown. An "internal"
|
||||
unset substring is shown as "<unset>", as for the second data line.
|
||||
<pre>
|
||||
re> /(a)|(b)/
|
||||
data> a
|
||||
|
@ -1149,8 +1151,8 @@ are output in sequence, like this:
|
|||
1: pp
|
||||
</pre>
|
||||
"No match" is output only if the first match attempt fails. Here is an example
|
||||
of a failure message (the offset 4 that is specified by \>4 is past the end of
|
||||
the subject string):
|
||||
of a failure message (the offset 4 that is specified by the <b>offset</b>
|
||||
modifier is past the end of the subject string):
|
||||
<pre>
|
||||
re> /xyz/
|
||||
data> xyz\=offset=4
|
||||
|
@ -1175,12 +1177,13 @@ the subject where there is at least one match. For example:
|
|||
1: tang
|
||||
2: tan
|
||||
</pre>
|
||||
(Using the normal matching function on this data finds only "tang".) The
|
||||
Using the normal matching function on this data finds only "tang". The
|
||||
longest matching string is always given first (and numbered zero). After a
|
||||
PCRE2_ERROR_PARTIAL return, the output is "Partial match:", followed by the
|
||||
partially matching substring. (Note that this is the entire substring that was
|
||||
partially matching substring. Note that this is the entire substring that was
|
||||
inspected during the partial match; it may include characters before the actual
|
||||
match start if a lookbehind assertion, \K, \b, or \B was involved.)
|
||||
match start if a lookbehind assertion, \b, or \B was involved. (\K is not
|
||||
supported for DFA matching.)
|
||||
</P>
|
||||
<P>
|
||||
If global matching is requested, the search for further matches resumes
|
||||
|
@ -1217,9 +1220,9 @@ documentation.
|
|||
</P>
|
||||
<br><a name="SEC16" href="#TOC1">CALLOUTS</a><br>
|
||||
<P>
|
||||
If the pattern contains any callout requests, <b>pcre2test</b>'s callout function
|
||||
is called during matching. This works with both matching functions. By default,
|
||||
the called function displays the callout number, the start and current
|
||||
If the pattern contains any callout requests, <b>pcre2test</b>'s callout
|
||||
function is called during matching. This works with both matching functions. By
|
||||
default, the called function displays the callout number, the start and current
|
||||
positions in the text at the callout time, and the next pattern item to be
|
||||
tested. For example:
|
||||
<pre>
|
||||
|
@ -1306,7 +1309,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC20" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 14 November 2014
|
||||
Last updated: 23 November 2014
|
||||
<br>
|
||||
Copyright © 1997-2014 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -115,7 +115,10 @@ VALIDITY OF UTF STRINGS
|
|||
<P>
|
||||
When the PCRE2_UTF option is set, the strings passed as patterns and subjects
|
||||
are (by default) checked for validity on entry to the relevant functions.
|
||||
If an invalid UTF string is passed, an error return is given.
|
||||
If an invalid UTF string is passed, an negative error code is returned. The
|
||||
code unit offset to the offending character can be extracted from the match
|
||||
data block by calling <b>pcre2_get_startchar()</b>, which is used for this
|
||||
purpose after a UTF error.
|
||||
</P>
|
||||
<P>
|
||||
UTF-16 and UTF-32 strings can indicate their endianness by special code knows
|
||||
|
|
|
@ -2057,6 +2057,10 @@ OTHER INFORMATION ABOUT A MATCH
|
|||
value is always the same as ovector[0] because \K does not affect the
|
||||
result of a partial match.
|
||||
|
||||
The startchar field is also used to return the offset of an invalid UTF
|
||||
character when UTF checking fails. Details are given in the pcre2uni-
|
||||
code page.
|
||||
|
||||
|
||||
ERROR RETURNS FROM pcre2_match()
|
||||
|
||||
|
@ -2601,7 +2605,7 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 21 November 2014
|
||||
Last updated: 23 November 2014
|
||||
Copyright (c) 1997-2014 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
@ -4327,9 +4331,9 @@ PARTIAL MATCHING USING pcre2_dfa_match()
|
|||
|
||||
/dog(sbody)??/
|
||||
|
||||
Whereas the standard functions stop as soon as they find the complete
|
||||
match for "dog", the DFA functions also find the partial match for
|
||||
"dogsbody", and so return that when PCRE2_PARTIAL_HARD is set.
|
||||
Whereas the standard function stops as soon as it finds the complete
|
||||
match for "dog", the DFA function also finds the partial match for
|
||||
"dogsbody", and so returns that when PCRE2_PARTIAL_HARD is set.
|
||||
|
||||
|
||||
PARTIAL MATCHING AND WORD BOUNDARIES
|
||||
|
@ -4681,8 +4685,10 @@ VALIDITY OF UTF STRINGS
|
|||
|
||||
When the PCRE2_UTF option is set, the strings passed as patterns and
|
||||
subjects are (by default) checked for validity on entry to the relevant
|
||||
functions. If an invalid UTF string is passed, an error return is
|
||||
given.
|
||||
functions. If an invalid UTF string is passed, an negative error code
|
||||
is returned. The code unit offset to the offending character can be
|
||||
extracted from the match data block by calling pcre2_get_startchar(),
|
||||
which is used for this purpose after a UTF error.
|
||||
|
||||
UTF-16 and UTF-32 strings can indicate their endianness by special code
|
||||
knows as a byte-order mark (BOM). The PCRE2 functions do not handle
|
||||
|
|
|
@ -169,9 +169,9 @@ the string "dog" matched against the ungreedy pattern shown above:
|
|||
.sp
|
||||
/dog(sbody)??/
|
||||
.sp
|
||||
Whereas the standard functions stop as soon as they find the complete match for
|
||||
"dog", the DFA functions also find the partial match for "dogsbody", and so
|
||||
return that when PCRE2_PARTIAL_HARD is set.
|
||||
Whereas the standard function stops as soon as it finds the complete match for
|
||||
"dog", the DFA function also finds the partial match for "dogsbody", and so
|
||||
returns that when PCRE2_PARTIAL_HARD is set.
|
||||
.
|
||||
.
|
||||
.SH "PARTIAL MATCHING AND WORD BOUNDARIES"
|
||||
|
|
|
@ -188,7 +188,7 @@ DESCRIPTION
|
|||
followed by any number of subject lines to be matched against that pat-
|
||||
tern. In between sets of test data, command lines that begin with a
|
||||
hash (#) character may appear. This file format, with some restric-
|
||||
tions, can also be processed by the perltest.pl script that is distrib-
|
||||
tions, can also be processed by the perltest.sh script that is distrib-
|
||||
uted with PCRE2 as a means of checking that the behaviour of PCRE2 and
|
||||
Perl is the same.
|
||||
|
||||
|
@ -232,11 +232,11 @@ COMMAND LINES
|
|||
#perltest
|
||||
|
||||
The appearance of this line causes all subsequent modifier settings to
|
||||
be checked for compatibility with the perltest.pl script, which is used
|
||||
be checked for compatibility with the perltest.sh script, which is used
|
||||
to confirm that Perl gives the same results as PCRE2. Also, apart from
|
||||
comment lines, none of the other command lines are permitted, because
|
||||
they and many of the modifiers are specific to pcre2test, and should
|
||||
not be used in test files that are also processed by perltest.pl. The
|
||||
not be used in test files that are also processed by perltest.sh. The
|
||||
#perltest command helps detect tests that are accidentally put in the
|
||||
wrong file.
|
||||
|
||||
|
@ -255,8 +255,7 @@ MODIFIER SYNTAX
|
|||
valid for one or the other only. Each modifier has a long name, for
|
||||
example "anchored", and some of them must be followed by an equals sign
|
||||
and a value, for example, "offset=12". Modifiers that do not take val-
|
||||
ues may be preceded by a minus sign to turn off a previous default set-
|
||||
ting.
|
||||
ues may be preceded by a minus sign to turn off a previous setting.
|
||||
|
||||
A few of the more common modifiers can also be specified as single let-
|
||||
ters, for example "i" for "caseless". In documentation, following the
|
||||
|
@ -361,7 +360,7 @@ SUBJECT LINE SYNTAX
|
|||
is converted to "abcabcabcabc". This feature does not support nesting.
|
||||
To include a closing square bracket in the characters, code it as \x5D.
|
||||
|
||||
A backslash followed by an equals sign marke the end of the subject
|
||||
A backslash followed by an equals sign marks the end of the subject
|
||||
string and the start of a modifier list. For example:
|
||||
|
||||
abc\=notbol,notempty
|
||||
|
@ -447,8 +446,8 @@ PATTERN MODIFIERS
|
|||
specified when PCRE2 is built, with the default default being Unicode.
|
||||
|
||||
The newline modifier specifies which characters are to be interpreted
|
||||
as newlines, both in the pattern and (by default) in subject lines. The
|
||||
type must be one of CR, LF, CRLF, ANYCRLF, or ANY.
|
||||
as newlines, both in the pattern and in subject lines. The type must be
|
||||
one of CR, LF, CRLF, ANYCRLF, or ANY (in upper or lower case).
|
||||
|
||||
Information about a pattern
|
||||
|
||||
|
@ -463,8 +462,8 @@ PATTERN MODIFIERS
|
|||
ments.
|
||||
|
||||
The fullbincode modifier, by contrast, does include length and offset
|
||||
values. This is used in a few special tests and is also useful for one-
|
||||
off tests.
|
||||
values. This is used in a few special tests that run only for specific
|
||||
code unit widths and link sizes, and is also useful for one-off tests.
|
||||
|
||||
The info modifier requests information about the compiled pattern
|
||||
(whether it is anchored, has a fixed first character, and so on). The
|
||||
|
@ -486,14 +485,15 @@ PATTERN MODIFIERS
|
|||
Last code unit = 'c' (caseless)
|
||||
Subject length lower bound = 3
|
||||
|
||||
"Compile options" are those specified to the compile function; "overall
|
||||
options" have added options that are taken or deduced from the pattern.
|
||||
If both sets of options are the same, just a single "options" line is
|
||||
output. "First code unit" is where any match must start; if there is
|
||||
more than one they are listed as "starting code units". "Last code
|
||||
unit" is the last literal code unit that must be present in any match.
|
||||
This is not necessarily the last character. These lines are omitted if
|
||||
no starting or ending code units are recorded.
|
||||
"Compile options" are those specified by modifiers; "overall options"
|
||||
have added options that are taken or deduced from the pattern. If both
|
||||
sets of options are the same, just a single "options" line is output;
|
||||
if there are no options, the line is omitted. "First code unit" is
|
||||
where any match must start; if there is more than one they are listed
|
||||
as "starting code units". "Last code unit" is the last literal code
|
||||
unit that must be present in any match. This is not necessarily the
|
||||
last character. These lines are omitted if no starting or ending code
|
||||
units are recorded.
|
||||
|
||||
Specifying a pattern in hex
|
||||
|
||||
|
@ -504,14 +504,14 @@ PATTERN MODIFIERS
|
|||
/ab 32 59/hex
|
||||
|
||||
This feature is provided as a way of creating patterns that contain
|
||||
binary zero characters. By default, pcre2test passes patterns as zero-
|
||||
terminated strings to pcre2_compile(), giving the length as
|
||||
PCRE2_ZERO_TERMINATED. However, for patterns specified in hexadecimal,
|
||||
the actual length of the pattern is passed.
|
||||
binary zero and other non-printing characters. By default, pcre2test
|
||||
passes patterns as zero-terminated strings to pcre2_compile(), giving
|
||||
the length as PCRE2_ZERO_TERMINATED. However, for patterns specified in
|
||||
hexadecimal, the actual length of the pattern is passed.
|
||||
|
||||
JIT compilation
|
||||
|
||||
The /jit modifier may optionally be followed by and equals sign and a
|
||||
The /jit modifier may optionally be followed by an equals sign and a
|
||||
number in the range 0 to 7:
|
||||
|
||||
0 disable JIT
|
||||
|
@ -540,7 +540,7 @@ PATTERN MODIFIERS
|
|||
jitverify is specified without jit, jit=7 is assumed. If JIT compila-
|
||||
tion is successful when jitverify is set, the text "(JIT)" is added to
|
||||
the first output line after a match or non match when JIT-compiled code
|
||||
was actually used.
|
||||
was actually used in the match.
|
||||
|
||||
Setting a locale
|
||||
|
||||
|
@ -609,10 +609,11 @@ PATTERN MODIFIERS
|
|||
|
||||
Using alternative character tables
|
||||
|
||||
The /tables modifier must be followed by a single digit. It causes a
|
||||
specific set of built-in character tables to be passed to pcre2_com-
|
||||
pile(). This is used in the PCRE2 tests to check behaviour with differ-
|
||||
ent character tables. The digit specifies the tables as follows:
|
||||
The value specified for the /tables modifier must be one of the digits
|
||||
0, 1, or 2. It causes a specific set of built-in character tables to be
|
||||
passed to pcre2_compile(). This is used in the PCRE2 tests to check be-
|
||||
haviour with different character tables. The digit specifies the tables
|
||||
as follows:
|
||||
|
||||
0 do not pass any special character tables
|
||||
1 the default ASCII tables, as distributed in
|
||||
|
@ -710,14 +711,14 @@ SUBJECT MODIFIERS
|
|||
|
||||
Showing more text
|
||||
|
||||
The aftertext modifier requests that as well as outputting the sub-
|
||||
string that matched the entire pattern, pcre2test should in addition
|
||||
output the remainder of the subject string. This is useful for tests
|
||||
where the subject contains multiple copies of the same substring. The
|
||||
allaftertext modifier requests the same action for captured substrings
|
||||
as well as the main matched substring. In each case the remainder is
|
||||
output on the following line with a plus character following the cap-
|
||||
ture number.
|
||||
The aftertext modifier requests that as well as outputting the part of
|
||||
the subject string that matched the entire pattern, pcre2test should in
|
||||
addition output the remainder of the subject string. This is useful for
|
||||
tests where the subject contains multiple copies of the same substring.
|
||||
The allaftertext modifier requests the same action for captured sub-
|
||||
strings as well as the main matched substring. In each case the remain-
|
||||
der is output on the following line with a plus character following the
|
||||
capture number.
|
||||
|
||||
The allusedtext modifier requests that all the text that was consulted
|
||||
during a successful pattern match by the interpreter should be shown.
|
||||
|
@ -735,7 +736,8 @@ SUBJECT MODIFIERS
|
|||
<<< >>>
|
||||
|
||||
This shows that the matched string is "abc", with the preceding and
|
||||
following strings "pqr" and "xyz" also consulted during the match.
|
||||
following strings "pqr" and "xyz" having been consulted during the
|
||||
match (when processing the assertions).
|
||||
|
||||
The startchar modifier requests that the starting character for the
|
||||
match be indicated, if it is different to the start of the matched
|
||||
|
@ -784,9 +786,9 @@ SUBJECT MODIFIERS
|
|||
difference between global and altglobal is that the former uses the
|
||||
start_offset argument to pcre2_match() or pcre2_dfa_match() to start
|
||||
searching at a new point within the entire string (which is what Perl
|
||||
does), whereas the latter passes over a shortened substring. This makes
|
||||
a difference to the matching process if the pattern begins with a look-
|
||||
behind assertion (including \b or \B).
|
||||
does), whereas the latter passes over a shortened subject. This makes a
|
||||
difference to the matching process if the pattern begins with a lookbe-
|
||||
hind assertion (including \b or \B).
|
||||
|
||||
If an empty string is matched, the next match is done with the
|
||||
PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set, in order to search
|
||||
|
@ -796,7 +798,7 @@ SUBJECT MODIFIERS
|
|||
/g modifier or the split() function. Normally, the start offset is
|
||||
advanced by one character, but if the newline convention recognizes
|
||||
CRLF as a newline, and the current character is CR followed by LF, an
|
||||
advance of two is used.
|
||||
advance of two characters occurs.
|
||||
|
||||
Testing substring extraction functions
|
||||
|
||||
|
@ -807,9 +809,9 @@ SUBJECT MODIFIERS
|
|||
|
||||
abcd\=copy=1,copy=3,get=G1
|
||||
|
||||
If the #subject command is used to set default copy and get lists,
|
||||
these can be unset by specifying a negative number for numbered groups
|
||||
and an empty name for named groups.
|
||||
If the #subject command is used to set default copy and/or get lists,
|
||||
these can be unset by specifying a negative number to cancel all num-
|
||||
bered groups and an empty name to cancel all named groups.
|
||||
|
||||
The getall modifier tests pcre2_substring_list_get(), which extracts
|
||||
all captured substrings.
|
||||
|
@ -818,7 +820,8 @@ SUBJECT MODIFIERS
|
|||
by the convenience functions are output with C, G, or L after the
|
||||
string number instead of a colon. This is in addition to the normal
|
||||
full list. The string length (that is, the return from the extraction
|
||||
function) is given in parentheses after each substring.
|
||||
function) is given in parentheses after each substring, followed by the
|
||||
name when the extraction was by name.
|
||||
|
||||
Testing the substitution function
|
||||
|
||||
|
@ -973,9 +976,8 @@ DEFAULT OUTPUT FROM pcre2test
|
|||
|
||||
For any other return, pcre2test outputs the PCRE2 negative error number
|
||||
and a short descriptive phrase. If the error is a failed UTF string
|
||||
check, the offset of the start of the failing character and the reason
|
||||
code are also output. Here is an example of an interactive pcre2test
|
||||
run.
|
||||
check, the code unit offset of the start of the failing character is
|
||||
also output. Here is an example of an interactive pcre2test run.
|
||||
|
||||
$ pcre2test
|
||||
PCRE2 version 9.00 2014-05-10
|
||||
|
@ -988,8 +990,8 @@ DEFAULT OUTPUT FROM pcre2test
|
|||
No match
|
||||
|
||||
Unset capturing substrings that are not followed by one that is set are
|
||||
not returned by pcre2_match(), and are not shown by pcre2test. In the
|
||||
following example, there are two capturing substrings, but when the
|
||||
not shown by pcre2test unless the allcaptures modifier is specified. In
|
||||
the following example, there are two capturing substrings, but when the
|
||||
first data line is matched, the second, unset substring is not shown.
|
||||
An "internal" unset substring is shown as "<unset>", as for the second
|
||||
data line.
|
||||
|
@ -1028,8 +1030,8 @@ DEFAULT OUTPUT FROM pcre2test
|
|||
1: pp
|
||||
|
||||
"No match" is output only if the first match attempt fails. Here is an
|
||||
example of a failure message (the offset 4 that is specified by \>4 is
|
||||
past the end of the subject string):
|
||||
example of a failure message (the offset 4 that is specified by the
|
||||
offset modifier is past the end of the subject string):
|
||||
|
||||
re> /xyz/
|
||||
data> xyz\=offset=4
|
||||
|
@ -1053,13 +1055,13 @@ OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION
|
|||
1: tang
|
||||
2: tan
|
||||
|
||||
(Using the normal matching function on this data finds only "tang".)
|
||||
The longest matching string is always given first (and numbered zero).
|
||||
Using the normal matching function on this data finds only "tang". The
|
||||
longest matching string is always given first (and numbered zero).
|
||||
After a PCRE2_ERROR_PARTIAL return, the output is "Partial match:",
|
||||
followed by the partially matching substring. (Note that this is the
|
||||
followed by the partially matching substring. Note that this is the
|
||||
entire substring that was inspected during the partial match; it may
|
||||
include characters before the actual match start if a lookbehind asser-
|
||||
tion, \K, \b, or \B was involved.)
|
||||
tion, \b, or \B was involved. (\K is not supported for DFA matching.)
|
||||
|
||||
If global matching is requested, the search for further matches resumes
|
||||
at the end of the longest match. For example:
|
||||
|
@ -1183,5 +1185,5 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 14 November 2014
|
||||
Last updated: 23 November 2014
|
||||
Copyright (c) 1997-2014 University of Cambridge.
|
||||
|
|
|
@ -18,10 +18,10 @@ to set the macro values. In this case, you do not have to set -DHAVE_CONFIG_H,
|
|||
but if you do, default values will be taken from config.h for non-boolean
|
||||
macros that are not defined on the command line.
|
||||
|
||||
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be
|
||||
defined (conventionally to 1) for TRUE, and not defined at all for FALSE. All
|
||||
such macros are listed as a commented #undef in config.h.generic. Macros such
|
||||
as MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
|
||||
Boolean macros such as HAVE_STDLIB_H and SUPPORT_PCRE2_8 should either be defined
|
||||
(conventionally to 1) for TRUE, and not defined at all for FALSE. All such
|
||||
macros are listed as a commented #undef in config.h.generic. Macros such as
|
||||
MATCH_LIMIT, whose actual value is relevant, have defaults defined, but are
|
||||
surrounded by #ifndef/#endif lines so that the value can be overridden by -D.
|
||||
|
||||
PCRE2 uses memmove() if HAVE_MEMMOVE is defined; otherwise it uses bcopy() if
|
||||
|
@ -201,7 +201,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#define PACKAGE_NAME "PCRE2"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "PCRE2 10.00-DEV"
|
||||
#define PACKAGE_STRING "PCRE2 10.00-RC1"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "pcre2"
|
||||
|
@ -210,7 +210,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#define PACKAGE_URL ""
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "10.00-DEV"
|
||||
#define PACKAGE_VERSION "10.00-RC1"
|
||||
|
||||
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||
|
@ -288,7 +288,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
/* #undef SUPPORT_VALGRIND */
|
||||
|
||||
/* Version number of package */
|
||||
#define VERSION "10.00-DEV"
|
||||
#define VERSION "10.00-RC1"
|
||||
|
||||
/* Define to empty if `const' does not conform to ANSI C. */
|
||||
/* #undef const */
|
||||
|
|
|
@ -43,8 +43,8 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define PCRE2_MAJOR 10
|
||||
#define PCRE2_MINOR 00
|
||||
#define PCRE2_PRERELEASE -DEV
|
||||
#define PCRE2_DATE 2014-99-99
|
||||
#define PCRE2_PRERELEASE -RC1
|
||||
#define PCRE2_DATE 2014-11-24
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE2, the appropriate
|
||||
|
@ -125,8 +125,8 @@ D is inspected during pcre2_dfa_match() execution
|
|||
#define PCRE2_JIT_PARTIAL_HARD 0x00000004u
|
||||
|
||||
/* These are for pcre2_match() and pcre2_dfa_match(). Note that PCRE2_ANCHORED,
|
||||
PCRE2_NO_START_OPTIMIZE, and PCRE2_NO_UTF_CHECK can also be passed to these
|
||||
functions, so take care not to define synonyms by mistake. */
|
||||
and PCRE2_NO_UTF_CHECK can also be passed to these functions, so take care not
|
||||
to define synonyms by mistake. */
|
||||
|
||||
#define PCRE2_NOTBOL 0x00000001u
|
||||
#define PCRE2_NOTEOL 0x00000002u
|
||||
|
@ -140,6 +140,10 @@ functions, so take care not to define synonyms by mistake. */
|
|||
#define PCRE2_DFA_RESTART 0x00000040u
|
||||
#define PCRE2_DFA_SHORTEST 0x00000080u
|
||||
|
||||
/* This is an additional option for pcre2_substitute(). */
|
||||
|
||||
#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u
|
||||
|
||||
/* Newline and \R settings, for use in compile contexts. The newline values
|
||||
must be kept in step with values set in config.h and both sets must all be
|
||||
greater than zero. */
|
||||
|
@ -202,24 +206,25 @@ context functions. */
|
|||
#define PCRE2_ERROR_BADMODE (-32)
|
||||
#define PCRE2_ERROR_BADOFFSET (-33)
|
||||
#define PCRE2_ERROR_BADOPTION (-34)
|
||||
#define PCRE2_ERROR_BADUTFOFFSET (-35)
|
||||
#define PCRE2_ERROR_CALLOUT (-36) /* Never used by PCRE2 itself */
|
||||
#define PCRE2_ERROR_DFA_BADRESTART (-37)
|
||||
#define PCRE2_ERROR_DFA_RECURSE (-38)
|
||||
#define PCRE2_ERROR_DFA_UCOND (-39)
|
||||
#define PCRE2_ERROR_DFA_UITEM (-40)
|
||||
#define PCRE2_ERROR_DFA_WSSIZE (-41)
|
||||
#define PCRE2_ERROR_INTERNAL (-42)
|
||||
#define PCRE2_ERROR_JIT_BADOPTION (-43)
|
||||
#define PCRE2_ERROR_JIT_STACKLIMIT (-44)
|
||||
#define PCRE2_ERROR_MATCHLIMIT (-45)
|
||||
#define PCRE2_ERROR_NOMEMORY (-46)
|
||||
#define PCRE2_ERROR_NOSUBSTRING (-47)
|
||||
#define PCRE2_ERROR_NOUNIQUESUBSTRING (-48)
|
||||
#define PCRE2_ERROR_NULL (-49)
|
||||
#define PCRE2_ERROR_RECURSELOOP (-50)
|
||||
#define PCRE2_ERROR_RECURSIONLIMIT (-51)
|
||||
#define PCRE2_ERROR_UNSET (-52)
|
||||
#define PCRE2_ERROR_BADREPLACEMENT (-35)
|
||||
#define PCRE2_ERROR_BADUTFOFFSET (-36)
|
||||
#define PCRE2_ERROR_CALLOUT (-37) /* Never used by PCRE2 itself */
|
||||
#define PCRE2_ERROR_DFA_BADRESTART (-38)
|
||||
#define PCRE2_ERROR_DFA_RECURSE (-39)
|
||||
#define PCRE2_ERROR_DFA_UCOND (-40)
|
||||
#define PCRE2_ERROR_DFA_UITEM (-41)
|
||||
#define PCRE2_ERROR_DFA_WSSIZE (-42)
|
||||
#define PCRE2_ERROR_INTERNAL (-43)
|
||||
#define PCRE2_ERROR_JIT_BADOPTION (-44)
|
||||
#define PCRE2_ERROR_JIT_STACKLIMIT (-45)
|
||||
#define PCRE2_ERROR_MATCHLIMIT (-46)
|
||||
#define PCRE2_ERROR_NOMEMORY (-47)
|
||||
#define PCRE2_ERROR_NOSUBSTRING (-48)
|
||||
#define PCRE2_ERROR_NOUNIQUESUBSTRING (-49)
|
||||
#define PCRE2_ERROR_NULL (-50)
|
||||
#define PCRE2_ERROR_RECURSELOOP (-51)
|
||||
#define PCRE2_ERROR_RECURSIONLIMIT (-52)
|
||||
#define PCRE2_ERROR_UNSET (-53)
|
||||
|
||||
/* Request types for pcre2_pattern_info() */
|
||||
|
||||
|
@ -406,7 +411,8 @@ PCRE2_EXP_DECL \
|
|||
pcre2_match_data *pcre2_match_data_create(uint32_t, \
|
||||
pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL \
|
||||
pcre2_match_data *pcre2_match_data_create_from_pattern(pcre2_code *, \
|
||||
pcre2_match_data *pcre2_match_data_create_from_pattern(\
|
||||
const pcre2_code *, \
|
||||
pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL int pcre2_dfa_match(const pcre2_code *, PCRE2_SPTR, \
|
||||
PCRE2_SIZE, PCRE2_SIZE, uint32_t, \
|
||||
|
@ -447,19 +453,28 @@ PCRE2_EXP_DECL int pcre2_substring_list_get(pcre2_match_data *, \
|
|||
PCRE2_UCHAR ***, PCRE2_SIZE **);
|
||||
|
||||
|
||||
/* Convenience function for match + substitute. */
|
||||
|
||||
#define PCRE2_SUBSTITUTE_FUNCTION \
|
||||
PCRE2_EXP_DECL int pcre2_substitute(const pcre2_code *, \
|
||||
PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, \
|
||||
pcre2_match_data *, pcre2_match_context *, \
|
||||
PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *, \
|
||||
PCRE2_SIZE *);
|
||||
|
||||
|
||||
/* Functions for JIT processing */
|
||||
|
||||
#define PCRE2_JIT_FUNCTIONS \
|
||||
PCRE2_EXP_DECL int pcre2_jit_compile(pcre2_code *, uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_jit_match(const pcre2_code *, \
|
||||
PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE, uint32_t, \
|
||||
pcre2_match_data *, pcre2_match_context *, \
|
||||
pcre2_jit_stack *); \
|
||||
pcre2_match_data *, pcre2_match_context *); \
|
||||
PCRE2_EXP_DECL void pcre2_jit_free_unused_memory(pcre2_general_context *); \
|
||||
PCRE2_EXP_DECL \
|
||||
pcre2_jit_stack *pcre2_jit_stack_create(pcre2_general_context *, \
|
||||
PCRE2_SIZE, PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL void pcre2_jit_stack_assign(const pcre2_code *, \
|
||||
PCRE2_EXP_DECL void pcre2_jit_stack_assign(pcre2_match_context *, \
|
||||
pcre2_jit_callback, void *); \
|
||||
PCRE2_EXP_DECL void pcre2_jit_stack_free(pcre2_jit_stack *);
|
||||
|
||||
|
@ -551,6 +566,7 @@ pcre2_compile are called by application code. */
|
|||
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
|
||||
#define pcre2_set_recursion_limit PCRE2_SUFFIX(pcre2_set_recursion_limit_)
|
||||
#define pcre2_set_recursion_memory_management PCRE2_SUFFIX(pcre2_set_recursion_memory_management_)
|
||||
#define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_)
|
||||
#define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_)
|
||||
#define pcre2_substring_copy_bynumber PCRE2_SUFFIX(pcre2_substring_copy_bynumber_)
|
||||
#define pcre2_substring_free PCRE2_SUFFIX(pcre2_substring_free_)
|
||||
|
@ -577,6 +593,7 @@ PCRE2_MATCH_CONTEXT_FUNCTIONS \
|
|||
PCRE2_COMPILE_FUNCTIONS \
|
||||
PCRE2_PATTERN_INFO_FUNCTIONS \
|
||||
PCRE2_MATCH_FUNCTIONS \
|
||||
PCRE2_SUBSTITUTE_FUNCTION \
|
||||
PCRE2_SUBSTRING_FUNCTIONS \
|
||||
PCRE2_JIT_FUNCTIONS \
|
||||
PCRE2_OTHER_FUNCTIONS
|
||||
|
|
|
@ -1570,13 +1570,13 @@ enum {
|
|||
|
||||
/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
|
||||
definitions that follow must also be updated to match. There are also tables
|
||||
called "opcode_possessify" in pcre_compile.c and "coptable" and "poptable" in
|
||||
pcre_dfa_exec.c that must be updated. */
|
||||
called "opcode_possessify" in pcre2_compile.c and "coptable" and "poptable" in
|
||||
pcre2_dfa_exec.c that must be updated. */
|
||||
|
||||
|
||||
/* This macro defines textual names for all the opcodes. These are used only
|
||||
for debugging, and some of them are only partial names. The macro is referenced
|
||||
only in pcre_printint.c, which fills out the full names in many cases (and in
|
||||
only in pcre2_printint.c, which fills out the full names in many cases (and in
|
||||
some cases doesn't actually use these names at all). */
|
||||
|
||||
#define OP_NAME_LIST \
|
||||
|
|
Loading…
Reference in New Issue