Tidy a lot of files (remove trailing spaces)
This commit is contained in:
parent
4352f00bb9
commit
c3799e750f
|
@ -382,21 +382,21 @@ SET(PCRE2_SOURCES
|
|||
${PROJECT_BINARY_DIR}/pcre2_chartables.c
|
||||
src/pcre2_compile.c
|
||||
src/pcre2_config.c
|
||||
src/pcre2_context.c
|
||||
src/pcre2_context.c
|
||||
src/pcre2_dfa_match.c
|
||||
src/pcre2_error.c
|
||||
src/pcre2_error.c
|
||||
src/pcre2_jit_compile.c
|
||||
src/pcre2_jit_match.c
|
||||
src/pcre2_jit_misc.c
|
||||
src/pcre2_maketables.c
|
||||
src/pcre2_match.c
|
||||
src/pcre2_match_data.c
|
||||
src/pcre2_match_data.c
|
||||
src/pcre2_newline.c
|
||||
src/pcre2_ord2utf.c
|
||||
src/pcre2_pattern_info.c
|
||||
src/pcre2_pattern_info.c
|
||||
src/pcre2_string_utils.c
|
||||
src/pcre2_study.c
|
||||
src/pcre2_substring.c
|
||||
src/pcre2_substring.c
|
||||
src/pcre2_tables.c
|
||||
src/pcre2_ucd.c
|
||||
src/pcre2_valid_utf.c
|
||||
|
@ -462,11 +462,11 @@ SET(targets)
|
|||
IF(PCRE2_BUILD_PCRE2_8)
|
||||
ADD_LIBRARY(pcre2-8 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||
SET_PROPERTY(TARGET pcre2-8
|
||||
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
|
||||
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
|
||||
SET(targets ${targets} pcre2-8)
|
||||
ADD_LIBRARY(pcre2posix ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES})
|
||||
SET_PROPERTY(TARGET pcre2posix
|
||||
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
|
||||
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
|
||||
SET(targets ${targets} pcre2posix)
|
||||
TARGET_LINK_LIBRARIES(pcre2posix pcre2-8)
|
||||
|
||||
|
@ -503,7 +503,7 @@ ENDIF(PCRE2_BUILD_PCRE2_16)
|
|||
IF(PCRE2_BUILD_PCRE2_32)
|
||||
ADD_LIBRARY(pcre2-32 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||
SET_PROPERTY(TARGET pcre2-32
|
||||
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32)
|
||||
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32)
|
||||
SET(targets ${targets} pcre2-32)
|
||||
|
||||
IF(MINGW AND NOT PCRE2_STATIC)
|
||||
|
@ -521,7 +521,7 @@ ENDIF(PCRE2_BUILD_PCRE2_32)
|
|||
IF(PCRE2_BUILD_PCRE2GREP)
|
||||
ADD_EXECUTABLE(pcre2grep src/pcre2grep.c)
|
||||
SET_PROPERTY(TARGET pcre2grep
|
||||
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
|
||||
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
|
||||
SET(targets ${targets} pcre2grep)
|
||||
TARGET_LINK_LIBRARIES(pcre2grep pcre2posix ${PCRE2GREP_LIBS})
|
||||
ENDIF(PCRE2_BUILD_PCRE2GREP)
|
||||
|
|
38
ChangeLog
38
ChangeLog
|
@ -5,41 +5,41 @@ Version 10.0 xx-xxxx-2014
|
|||
-------------------------
|
||||
|
||||
Version 10.0 is the first release of PCRE2, a revised API for the PCRE library.
|
||||
Changes prior to 10.0 are logged in the ChangeLog file for the old API, up to
|
||||
Changes prior to 10.0 are logged in the ChangeLog file for the old API, up to
|
||||
item 20 for release 8.36.
|
||||
|
||||
The code of the library was heavily revised as part of the new API
|
||||
implementation. Details of each and every modification were not individually
|
||||
logged. In addition to the API changes, the following changes were made. They
|
||||
are either new functionality, or bug fixes and other noticeable changes of
|
||||
The code of the library was heavily revised as part of the new API
|
||||
implementation. Details of each and every modification were not individually
|
||||
logged. In addition to the API changes, the following changes were made. They
|
||||
are either new functionality, or bug fixes and other noticeable changes of
|
||||
behaviour that were implemented after the code had been forked.
|
||||
|
||||
1. The test program, now called pcre2test, was re-specified and almost
|
||||
1. The test program, now called pcre2test, was re-specified and almost
|
||||
completely re-written. Its input is not compatible with input for pcretest.
|
||||
|
||||
2. Patterns may start with (*NOTEMPTY) or (*NOTEMPTY_ATSTART) to set the
|
||||
PCRE2_NOTEMPTY or PCRE2_NOTEMPTY_ATSTART options for every subject line that is
|
||||
PCRE2_NOTEMPTY or PCRE2_NOTEMPTY_ATSTART options for every subject line that is
|
||||
matched by that pattern.
|
||||
|
||||
3. For the benefit of those who use PCRE2 via some other application, that is,
|
||||
not writing the function calls themselves, it is possible to check the PCRE2
|
||||
version by matching a pattern such as /(?(VERSION>=10.0)yes|no)/ against a
|
||||
3. For the benefit of those who use PCRE2 via some other application, that is,
|
||||
not writing the function calls themselves, it is possible to check the PCRE2
|
||||
version by matching a pattern such as /(?(VERSION>=10.0)yes|no)/ against a
|
||||
string such as "yesno".
|
||||
|
||||
4. There are case-equivalent Unicode characters whose encodings use different
|
||||
numbers of code units in UTF-8. U+023A and U+2C65 are one example. (It is
|
||||
theoretically possible for this to happen in UTF-16 too.) If a backreference to
|
||||
a group containing one of these characters was greedily repeated, and during
|
||||
4. There are case-equivalent Unicode characters whose encodings use different
|
||||
numbers of code units in UTF-8. U+023A and U+2C65 are one example. (It is
|
||||
theoretically possible for this to happen in UTF-16 too.) If a backreference to
|
||||
a group containing one of these characters was greedily repeated, and during
|
||||
the match a backtrack occurred, the subject might be backtracked by the wrong
|
||||
number of code units. For example, if /^(\x{23a})\1*(.)/ is matched caselessly
|
||||
(and in UTF-8 mode) against "\x{23a}\x{2c65}\x{2c65}\x{2c65}", group 2 should
|
||||
number of code units. For example, if /^(\x{23a})\1*(.)/ is matched caselessly
|
||||
(and in UTF-8 mode) against "\x{23a}\x{2c65}\x{2c65}\x{2c65}", group 2 should
|
||||
capture the final character, which is the three bytes E2, B1, and A5 in UTF-8.
|
||||
Incorrect backtracking meant that group 2 captured only the last two bytes.
|
||||
This bug has been fixed; the new code is slower, but it is used only when the
|
||||
Incorrect backtracking meant that group 2 captured only the last two bytes.
|
||||
This bug has been fixed; the new code is slower, but it is used only when the
|
||||
strings matched by the repetition are not all the same length.
|
||||
|
||||
5. A pattern such as /()a/ was not setting the "first character must be 'a'"
|
||||
information. This applied to any pattern with a group that matched no
|
||||
information. This applied to any pattern with a group that matched no
|
||||
characters, for example: /(?:(?=.)|(?<!x))a/.
|
||||
|
||||
****
|
||||
|
|
2
NEWS
2
NEWS
|
@ -5,7 +5,7 @@ Version 10.0 xx-xxxx-2014
|
|||
-------------------------
|
||||
|
||||
Version 10.0 is the first release of PCRE2, a revised API for the PCRE library.
|
||||
Changes prior to 10.0 are logged in the ChangeLog file for the old API, up to
|
||||
Changes prior to 10.0 are logged in the ChangeLog file for the old API, up to
|
||||
item 20 for release 8.36.
|
||||
|
||||
****
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
Building PCRE2 without using autotools
|
||||
--------------------------------------
|
||||
|
||||
This document has been converted from the PCRE1 document, but is not yet
|
||||
complete. I have removed a number of quite old sections about building in
|
||||
various environments, as they applied only to PCRE1 and are probably out of
|
||||
This document has been converted from the PCRE1 document, but is not yet
|
||||
complete. I have removed a number of quite old sections about building in
|
||||
various environments, as they applied only to PCRE1 and are probably out of
|
||||
date.
|
||||
|
||||
|
||||
|
@ -57,7 +57,7 @@ can skip ahead to the CMake section.
|
|||
environment. In particular, you can alter the definition of the NEWLINE
|
||||
macro to specify what character(s) you want to be interpreted as line
|
||||
terminators.
|
||||
|
||||
|
||||
When you compile any of the PCRE2 modules, you must specify
|
||||
-DHAVE_CONFIG_H to your compiler so that src/config.h is included in the
|
||||
sources.
|
||||
|
@ -100,7 +100,7 @@ can skip ahead to the CMake section.
|
|||
pcre2_chartables.c
|
||||
pcre2_compile.c
|
||||
pcre2_config.c
|
||||
pcre2_context.c
|
||||
pcre2_context.c
|
||||
pcre2_dfa_match.c
|
||||
pcre2_error.c
|
||||
pcre2_jit_compile.c
|
||||
|
@ -114,7 +114,7 @@ can skip ahead to the CMake section.
|
|||
pcre2_pattern_info.c
|
||||
pcre2_string_utils.c
|
||||
pcre2_study.c
|
||||
pcre2_substring.c
|
||||
pcre2_substring.c
|
||||
pcre2_tables.c
|
||||
pcre2_ucd.c
|
||||
pcre2_valid_utf.c
|
||||
|
@ -138,8 +138,8 @@ can skip ahead to the CMake section.
|
|||
|
||||
(6) If you want to build a 16-bit library or 32-bit library (as well as, or
|
||||
instead of the 8-bit library) just supply 16 or 32 as the value of
|
||||
-DPCRE2_CODE_UNIT_WIDTH when you are compiling.
|
||||
|
||||
-DPCRE2_CODE_UNIT_WIDTH when you are compiling.
|
||||
|
||||
(7) If you want to build the POSIX wrapper functions (which apply only to the
|
||||
8-bit library), ensure that you have the pcre2posix.h file and then
|
||||
compile pcre2posix.c. Link the result (on its own) as the pcre2posix
|
||||
|
@ -295,7 +295,7 @@ Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no
|
|||
spaces in the names for your CMake installation and your PCRE2 source and build
|
||||
directories.
|
||||
|
||||
The following instructions were contributed by a PCRE1 user, but they should
|
||||
The following instructions were contributed by a PCRE1 user, but they should
|
||||
also work for PCRE2. If they are not followed exactly, errors may occur. In the
|
||||
event that errors do occur, it is recommended that you delete the CMake cache
|
||||
before attempting to repeat the CMake build process. In the CMake GUI, the
|
||||
|
@ -394,9 +394,9 @@ required. For details, please see this web site:
|
|||
There is also a mirror here:
|
||||
|
||||
http://www.vsoft-software.com/downloads.html
|
||||
|
||||
The site currently has ports for PCRE1 releases, but PCRE2 should follow in due
|
||||
course.
|
||||
|
||||
The site currently has ports for PCRE1 releases, but PCRE2 should follow in due
|
||||
course.
|
||||
|
||||
==========================
|
||||
Last Updated: 28 September 2014
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
|
||||
# README & NON-AUTOTOOLS-BUILD
|
||||
# These files are copied into the doc/html directory, with .txt
|
||||
# extensions so that they can by hyperlinked from the HTML
|
||||
# extensions so that they can by hyperlinked from the HTML
|
||||
# documentation, because some people just go to the HTML without
|
||||
# looking for text files.
|
||||
|
||||
|
@ -71,7 +71,7 @@ for file in pcre2 pcre2api pcre2build pcre2callout pcre2compat pcre2jit \
|
|||
# pcre2syntax \
|
||||
# pcre2precompile pcre2perform pcre2posix pcre2sample \
|
||||
# pcre2stack ; do
|
||||
|
||||
|
||||
echo " Processing $file.3"
|
||||
nroff -c -man $file.3 >$file.rawtxt
|
||||
perl ../CleanTxt <$file.rawtxt >>pcre2.txt
|
||||
|
@ -168,17 +168,13 @@ cd ..
|
|||
echo Documentation done
|
||||
if [ "$1" = "doc" ] ; then exit; fi
|
||||
|
||||
# FIXME pro tem only do docs
|
||||
exit
|
||||
|
||||
# These files are detrailed; do not detrail the test data because there may be
|
||||
# significant trailing spaces. Do not detrail RunTest.bat, because it has CRLF
|
||||
# line endings and the detrail script removes all trailing white space. The
|
||||
# configure files are also omitted from the detrailing.
|
||||
# configure files are also omitted from the detrailing.
|
||||
|
||||
files="\
|
||||
Makefile.am \
|
||||
Makefile.in \
|
||||
configure.ac \
|
||||
README \
|
||||
LICENCE \
|
||||
|
@ -195,54 +191,45 @@ files="\
|
|||
RunGrepTest \
|
||||
RunTest \
|
||||
pcre2-config.in \
|
||||
libpcre.pc.in \
|
||||
libpcre16.pc.in \
|
||||
libpcre32.pc.in \
|
||||
libpcreposix.pc.in \
|
||||
libpcrecpp.pc.in \
|
||||
config.h.in \
|
||||
pcre2_chartables.c.dist \
|
||||
pcre2demo.c \
|
||||
pcre2grep.c \
|
||||
pcre2test.c \
|
||||
dftables.c \
|
||||
pcre2posix.c \
|
||||
pcre2posix.h \
|
||||
pcre2.h.in \
|
||||
pcre2_internal.h \
|
||||
pcre2_byte_order.c \
|
||||
pcre2_compile.c \
|
||||
pcre2_config.c \
|
||||
pcre2_dfa_exec.c \
|
||||
pcre2_exec.c \
|
||||
pcre2_fullinfo.c \
|
||||
pcre2_get.c \
|
||||
pcre2_globals.c \
|
||||
pcre2_jit_compile.c \
|
||||
pcre2_jit_test.c \
|
||||
pcre2_maketables.c \
|
||||
pcre2_newline.c \
|
||||
pcre2_ord2utf8.c \
|
||||
pcre16_ord2utf16.c \
|
||||
pcre32_ord2utf32.c \
|
||||
pcre2_printint.c \
|
||||
pcre2_refcount.c \
|
||||
pcre2_string_utils.c \
|
||||
pcre2_study.c \
|
||||
pcre2_tables.c \
|
||||
pcre2_valid_utf8.c \
|
||||
pcre2_version.c \
|
||||
pcre2_xclass.c \
|
||||
pcre16_utf16_utils.c \
|
||||
pcre32_utf32_utils.c \
|
||||
pcre16_valid_utf16.c \
|
||||
pcre32_valid_utf32.c \
|
||||
perltest.pl \
|
||||
ucp.h \
|
||||
makevp.bat \
|
||||
pcre.def \
|
||||
libpcre.def \
|
||||
libpcreposix.def"
|
||||
libpcre2-8.pc.in \
|
||||
libpcre2-16.pc.in \
|
||||
libpcre2-32.pc.in \
|
||||
libpcre2-posix.pc.in \
|
||||
src/dftables.c \
|
||||
src/pcre2.h.in \
|
||||
src/pcre2_auto_possess.c \
|
||||
src/pcre2_compile.c \
|
||||
src/pcre2_config.c \
|
||||
src/pcre2_context.c \
|
||||
src/pcre2_dfa_match.c \
|
||||
src/pcre2_error.c \
|
||||
src/pcre2_internal.h \
|
||||
src/pcre2_intmodedep.h \
|
||||
src/pcre2_jit_compile.c \
|
||||
src/pcre2_jit_match.c \
|
||||
src/pcre2_jit_misc.c \
|
||||
src/pcre2_jit_test.c \
|
||||
src/pcre2_maketables.c \
|
||||
src/pcre2_match.c \
|
||||
src/pcre2_match_data.c \
|
||||
src/pcre2_newline.c \
|
||||
src/pcre2_ord2utf.c \
|
||||
src/pcre2_pattern_info.c \
|
||||
src/pcre2_printint.c \
|
||||
src/pcre2_string_utils.c \
|
||||
src/pcre2_study.c \
|
||||
src/pcre2_substring.c \
|
||||
src/pcre2_tables.c \
|
||||
src/pcre2_ucd.c \
|
||||
src/pcre2_ucp.h \
|
||||
src/pcre2_valid_utf.c \
|
||||
src/pcre2_xclass.c \
|
||||
src/pcre2demo.c \
|
||||
src/pcre2grep.c \
|
||||
src/pcre2posix.c \
|
||||
src/pcre2posix.h \
|
||||
src/pcre2test.c"
|
||||
|
||||
echo Detrailing
|
||||
perl ./Detrail $files doc/p* doc/html/*
|
||||
|
|
46
README
46
README
|
@ -1,7 +1,7 @@
|
|||
README file for PCRE2 (Perl-compatible regular expression library)
|
||||
------------------------------------------------------------------
|
||||
|
||||
PCRE2 is a re-implementation of the original PCRE library with an entirely new
|
||||
PCRE2 is a re-implementation of the original PCRE library with an entirely new
|
||||
API. The latest release of PCRE2 is always available in three alternative
|
||||
formats from:
|
||||
|
||||
|
@ -11,7 +11,7 @@ FIXME: THIS WILL NOT BE THE CASE UNTIL THERE IS A FORMAL RELEASE.
|
|||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.tar.bz2
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.zip
|
||||
|
||||
There is a mailing list for discussion about the development of PCRE (both the
|
||||
There is a mailing list for discussion about the development of PCRE (both the
|
||||
original and new APIs) at pcre-dev@exim.org. You can access the archives and
|
||||
subscribe or manage your subscription here:
|
||||
|
||||
|
@ -41,7 +41,7 @@ The PCRE2 APIs
|
|||
PCRE2 is written in C, and it has its own API. There are three sets of
|
||||
functions, one for the 8-bit library, which processes strings of bytes, one for
|
||||
the 16-bit library, which processes strings of 16-bit values, and one for the
|
||||
32-bit library, which processes strings of 32-bit values. As this is a new API,
|
||||
32-bit library, which processes strings of 32-bit values. As this is a new API,
|
||||
there as yet no C++ wrappers.
|
||||
|
||||
The distribution does contain a set of C wrapper functions for the 8-bit
|
||||
|
@ -102,7 +102,7 @@ NON-AUTOTOOLS-BUILD, though if your system supports the use of "configure" and
|
|||
"make" you may be able to build PCRE2 using autotools in the same way as for
|
||||
many Unix-like systems.
|
||||
|
||||
PCRE2 can also be configured using CMake, which can be run in various ways
|
||||
PCRE2 can also be configured using CMake, which can be run in various ways
|
||||
(command line, GUI, etc). This creates Makefiles, solution files, etc. The file
|
||||
NON-AUTOTOOLS-BUILD has information about CMake.
|
||||
|
||||
|
@ -186,13 +186,13 @@ library. They are also documented in the pcre2build man page.
|
|||
handling UTF-8, UTF-16 and UTF-8 is not included. It is not possible to
|
||||
configure one library with UTF support and the other without in the same
|
||||
configuration.
|
||||
|
||||
|
||||
Even when --enable-unicode is included, the use of a UTF encoding still has
|
||||
to be enabled by an option at run time. When PCRE2 is compiled with this
|
||||
option, its input can only either be ASCII or UTF-8/16/32, even when running
|
||||
on EBCDIC platforms. It is not possible to use both --enable-unicode and
|
||||
--enable-ebcdic at the same time.
|
||||
|
||||
|
||||
When --enable-unicode is specified, as well as supporting UTF strings, PCRE2
|
||||
includes support for the \P, \p, and \X sequences that recognize Unicode
|
||||
character properties. However, only the basic two-letter properties such as
|
||||
|
@ -248,7 +248,7 @@ library. They are also documented in the pcre2build man page.
|
|||
cause programs to crash in strange ways. There is a discussion about stack
|
||||
sizes in the pcre2stack man page.
|
||||
|
||||
. In the 8-bit library, the default maximum compiled pattern size is around
|
||||
. In the 8-bit library, the default maximum compiled pattern size is around
|
||||
64K. You can increase this by adding --with-link-size=3 to the "configure"
|
||||
command. PCRE2 then uses three bytes instead of two for offsets to different
|
||||
parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is
|
||||
|
@ -360,7 +360,7 @@ The "configure" script builds the following files for the basic C library:
|
|||
. src/pcre2.h the public PCRE2 header file
|
||||
. pcre2-config script that shows the building settings such as CFLAGS
|
||||
that were set for "configure"
|
||||
. libpcre2-8.pc )
|
||||
. libpcre2-8.pc )
|
||||
. libpcre2-16.pc ) data for the pkg-config command
|
||||
. libpcre2-32.pc )
|
||||
. libpcre2-posix.pc )
|
||||
|
@ -452,7 +452,7 @@ prints the version number, and
|
|||
|
||||
outputs information about where the 8-bit library is installed. This command
|
||||
can be included in makefiles for programs that use PCRE2, saving the programmer
|
||||
from having to remember too many details. Run pcre2-config with no arguments to
|
||||
from having to remember too many details. Run pcre2-config with no arguments to
|
||||
obtain a list of possible arguments.
|
||||
|
||||
The pkg-config command is another system for saving and retrieving information
|
||||
|
@ -593,7 +593,7 @@ bug in PCRE2.
|
|||
|
||||
The third set of tests checks pcre2_maketables(), the facility for building a
|
||||
set of character tables for a specific locale and using them instead of the
|
||||
default tables. The script uses the "locale" command to check for the
|
||||
default tables. The script uses the "locale" command to check for the
|
||||
availability of the "fr_FR", "french", or "fr" locale, and uses the first one
|
||||
that it finds. If the "locale" command fails, or if its output doesn't include
|
||||
"fr_FR", "french", or "fr" in the list of available locales, the third test
|
||||
|
@ -609,7 +609,7 @@ of the French locale have been encountered. The test passes if its output
|
|||
matches any one of them.
|
||||
|
||||
The fourth and fifth tests check UTF and Unicode property support, the fourth
|
||||
being compatible with the perltest.pl script, and the fifth checking
|
||||
being compatible with the perltest.pl script, and the fifth checking
|
||||
PCRE2-specific things.
|
||||
|
||||
The sixth and seventh tests check the pcre2_dfa_match() alternative matching
|
||||
|
@ -623,8 +623,8 @@ change) and when Unicode support is enabled.
|
|||
The ninth and tenth tests are run only in 8-bit mode, and the eleventh and
|
||||
twelfth tests are run only in 16-bit and 32-bit modes. These are tests that
|
||||
generate different output in 8-bit mode. Each pair are for general cases and
|
||||
Unicode support, respectively. The thirteenth test checks the handling of
|
||||
non-UTF characters greater than 255 by pcre2_dfa_match() in 16-bit and 32-bit
|
||||
Unicode support, respectively. The thirteenth test checks the handling of
|
||||
non-UTF characters greater than 255 by pcre2_dfa_match() in 16-bit and 32-bit
|
||||
modes.
|
||||
|
||||
The fourteenth test is run only when JIT support is not available, and the
|
||||
|
@ -632,8 +632,8 @@ fifteenth test is run only when JIT support is available. They test some
|
|||
JIT-specific features such as information output from pcre2test about JIT
|
||||
compilation.
|
||||
|
||||
The sixteenth and seventeenth tests are run only in 8-bit mode. They check the
|
||||
POSIX interface to the 8-bit library, withouth and with Unicode support,
|
||||
The sixteenth and seventeenth tests are run only in 8-bit mode. They check the
|
||||
POSIX interface to the 8-bit library, withouth and with Unicode support,
|
||||
respectively.
|
||||
|
||||
|
||||
|
@ -692,9 +692,9 @@ will cause PCRE2 to malfunction.
|
|||
File manifest
|
||||
-------------
|
||||
|
||||
The distribution should contain the files listed below.
|
||||
The distribution should contain the files listed below.
|
||||
|
||||
(A) Source files for the PCRE2 library functions and their headers are found in
|
||||
(A) Source files for the PCRE2 library functions and their headers are found in
|
||||
the src directory:
|
||||
|
||||
src/dftables.c auxiliary program for building pcre2_chartables.c
|
||||
|
@ -705,25 +705,25 @@ The distribution should contain the files listed below.
|
|||
specified, used by copying to pcre2_chartables.c
|
||||
|
||||
src/pcre2posix.c )
|
||||
src/pcre2_auto_possess.c )
|
||||
src/pcre2_auto_possess.c )
|
||||
src/pcre2_compile.c )
|
||||
src/pcre2_config.c )
|
||||
src/pcre2_context.c )
|
||||
src/pcre2_context.c )
|
||||
src/pcre2_dfa_match.c )
|
||||
src/pcre2_error.c )
|
||||
src/pcre2_error.c )
|
||||
src/pcre2_exec.c )
|
||||
src/pcre2_jit_compile.c )
|
||||
src/pcre2_jit_match.c ) sources for the functions in the library,
|
||||
src/pcre2_jit_match.c ) sources for the functions in the library,
|
||||
src/pcre2_jit_misc.c ) and some internal functions that they use
|
||||
src/pcre2_maketables.c )
|
||||
src/pcre2_match.c )
|
||||
src/pcre2_match_data.c )
|
||||
src/pcre2_match_data.c )
|
||||
src/pcre2_newline.c )
|
||||
src/pcre2_ord2utf.c )
|
||||
src/pcre2_pattern_info.c )
|
||||
src/pcre2_string_utils.c )
|
||||
src/pcre2_study.c )
|
||||
src/pcre2_substring.c )
|
||||
src/pcre2_substring.c )
|
||||
src/pcre2_tables.c )
|
||||
src/pcre2_ucd.c )
|
||||
src/pcre2_valid_utf.c )
|
||||
|
|
|
@ -23,7 +23,7 @@ pcre2grep=$builddir/pcre2grep
|
|||
if [ ! -x $pcre2grep ] ; then
|
||||
echo "** $pcre2grep does not exist or is not execuatble."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
valgrind=
|
||||
while [ $# -gt 0 ] ; do
|
||||
|
|
2
RunTest
2
RunTest
|
@ -126,7 +126,7 @@ fi
|
|||
|
||||
checkresult()
|
||||
{
|
||||
if [ $1 -ne 0 ] ; then
|
||||
if [ $1 -ne 0 ] ; then
|
||||
echo "** pcre2test failed - check testtry"
|
||||
exit 1
|
||||
fi
|
||||
|
|
16
configure.ac
16
configure.ac
|
@ -106,7 +106,7 @@ AC_ARG_ENABLE(pcre32,,,enable_pcre32=no)
|
|||
if test "$enable_pcre8$enable_pcre16$enable_pcre32" != "nonono"
|
||||
then
|
||||
echo "** ERROR: Use --[[en|dis]]able-pcre2-[[8|16|32]], not --[[en|dis]]able-pcre[[8|16|32]]"
|
||||
exit 1
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Handle --disable-pcre2-8 (enabled by default)
|
||||
|
@ -512,7 +512,7 @@ if test "$enable_jit" = "yes"; then
|
|||
CC="$PTHREAD_CC"
|
||||
CFLAGS="$PTHREAD_CFLAGS $CFLAGS"
|
||||
LIBS="$PTHREAD_LIBS $LIBS"
|
||||
fi
|
||||
fi
|
||||
AC_DEFINE([SUPPORT_JIT], [], [
|
||||
Define to any value to enable support for Just-In-Time compiling.])
|
||||
else
|
||||
|
@ -538,7 +538,7 @@ if test "$enable_stack_for_recursion" = "no"; then
|
|||
matching. This can sometimes be a problem on systems that have
|
||||
stacks of limited size. Define HEAP_MATCH_RECURSE to any value to get a
|
||||
version that doesn't use recursion in the match() function; instead
|
||||
it creates its own stack by steam using memory from the heap. For more
|
||||
it creates its own stack by steam using memory from the heap. For more
|
||||
detail, see the comments and other stuff just above the match() function.])
|
||||
fi
|
||||
|
||||
|
@ -559,8 +559,8 @@ if test $with_pcre2grep_bufsize -lt 8192 ; then
|
|||
with_pcre2grep_bufsize="8192"
|
||||
else
|
||||
if test $? -gt 1 ; then
|
||||
AC_MSG_ERROR([Bad value for --with-pcre2grep-bufsize])
|
||||
fi
|
||||
AC_MSG_ERROR([Bad value for --with-pcre2grep-bufsize])
|
||||
fi
|
||||
fi
|
||||
|
||||
AC_DEFINE_UNQUOTED([PCRE2GREP_BUFSIZE], [$with_pcre2grep_bufsize], [
|
||||
|
@ -579,9 +579,9 @@ elif test "$enable_pcre2test_libreadline" = "yes"; then
|
|||
fi
|
||||
|
||||
AC_DEFINE_UNQUOTED([NEWLINE_DEFAULT], [$ac_pcre2_newline_value], [
|
||||
The value of NEWLINE_DEFAULT determines the default newline character
|
||||
sequence. PCRE2 client programs can override this by selecting other values
|
||||
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY),
|
||||
The value of NEWLINE_DEFAULT determines the default newline character
|
||||
sequence. PCRE2 client programs can override this by selecting other values
|
||||
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY),
|
||||
and 5 (ANYCRLF).])
|
||||
|
||||
if test "$enable_bsr_anycrlf" = "yes"; then
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
Building PCRE2 without using autotools
|
||||
--------------------------------------
|
||||
|
||||
This document has been converted from the PCRE1 document, but is not yet
|
||||
complete. I have removed a number of quite old sections about building in
|
||||
various environments, as they applied only to PCRE1 and are probably out of
|
||||
This document has been converted from the PCRE1 document, but is not yet
|
||||
complete. I have removed a number of quite old sections about building in
|
||||
various environments, as they applied only to PCRE1 and are probably out of
|
||||
date.
|
||||
|
||||
|
||||
|
@ -57,7 +57,7 @@ can skip ahead to the CMake section.
|
|||
environment. In particular, you can alter the definition of the NEWLINE
|
||||
macro to specify what character(s) you want to be interpreted as line
|
||||
terminators.
|
||||
|
||||
|
||||
When you compile any of the PCRE2 modules, you must specify
|
||||
-DHAVE_CONFIG_H to your compiler so that src/config.h is included in the
|
||||
sources.
|
||||
|
@ -100,7 +100,7 @@ can skip ahead to the CMake section.
|
|||
pcre2_chartables.c
|
||||
pcre2_compile.c
|
||||
pcre2_config.c
|
||||
pcre2_context.c
|
||||
pcre2_context.c
|
||||
pcre2_dfa_match.c
|
||||
pcre2_error.c
|
||||
pcre2_jit_compile.c
|
||||
|
@ -114,7 +114,7 @@ can skip ahead to the CMake section.
|
|||
pcre2_pattern_info.c
|
||||
pcre2_string_utils.c
|
||||
pcre2_study.c
|
||||
pcre2_substring.c
|
||||
pcre2_substring.c
|
||||
pcre2_tables.c
|
||||
pcre2_ucd.c
|
||||
pcre2_valid_utf.c
|
||||
|
@ -138,8 +138,8 @@ can skip ahead to the CMake section.
|
|||
|
||||
(6) If you want to build a 16-bit library or 32-bit library (as well as, or
|
||||
instead of the 8-bit library) just supply 16 or 32 as the value of
|
||||
-DPCRE2_CODE_UNIT_WIDTH when you are compiling.
|
||||
|
||||
-DPCRE2_CODE_UNIT_WIDTH when you are compiling.
|
||||
|
||||
(7) If you want to build the POSIX wrapper functions (which apply only to the
|
||||
8-bit library), ensure that you have the pcre2posix.h file and then
|
||||
compile pcre2posix.c. Link the result (on its own) as the pcre2posix
|
||||
|
@ -295,7 +295,7 @@ Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no
|
|||
spaces in the names for your CMake installation and your PCRE2 source and build
|
||||
directories.
|
||||
|
||||
The following instructions were contributed by a PCRE1 user, but they should
|
||||
The following instructions were contributed by a PCRE1 user, but they should
|
||||
also work for PCRE2. If they are not followed exactly, errors may occur. In the
|
||||
event that errors do occur, it is recommended that you delete the CMake cache
|
||||
before attempting to repeat the CMake build process. In the CMake GUI, the
|
||||
|
@ -394,9 +394,9 @@ required. For details, please see this web site:
|
|||
There is also a mirror here:
|
||||
|
||||
http://www.vsoft-software.com/downloads.html
|
||||
|
||||
The site currently has ports for PCRE1 releases, but PCRE2 should follow in due
|
||||
course.
|
||||
|
||||
The site currently has ports for PCRE1 releases, but PCRE2 should follow in due
|
||||
course.
|
||||
|
||||
==========================
|
||||
Last Updated: 28 September 2014
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
README file for PCRE2 (Perl-compatible regular expression library)
|
||||
------------------------------------------------------------------
|
||||
|
||||
PCRE2 is a re-implementation of the original PCRE library with an entirely new
|
||||
PCRE2 is a re-implementation of the original PCRE library with an entirely new
|
||||
API. The latest release of PCRE2 is always available in three alternative
|
||||
formats from:
|
||||
|
||||
|
@ -11,7 +11,7 @@ FIXME: THIS WILL NOT BE THE CASE UNTIL THERE IS A FORMAL RELEASE.
|
|||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.tar.bz2
|
||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.zip
|
||||
|
||||
There is a mailing list for discussion about the development of PCRE (both the
|
||||
There is a mailing list for discussion about the development of PCRE (both the
|
||||
original and new APIs) at pcre-dev@exim.org. You can access the archives and
|
||||
subscribe or manage your subscription here:
|
||||
|
||||
|
@ -41,7 +41,7 @@ The PCRE2 APIs
|
|||
PCRE2 is written in C, and it has its own API. There are three sets of
|
||||
functions, one for the 8-bit library, which processes strings of bytes, one for
|
||||
the 16-bit library, which processes strings of 16-bit values, and one for the
|
||||
32-bit library, which processes strings of 32-bit values. As this is a new API,
|
||||
32-bit library, which processes strings of 32-bit values. As this is a new API,
|
||||
there as yet no C++ wrappers.
|
||||
|
||||
The distribution does contain a set of C wrapper functions for the 8-bit
|
||||
|
@ -102,7 +102,7 @@ NON-AUTOTOOLS-BUILD, though if your system supports the use of "configure" and
|
|||
"make" you may be able to build PCRE2 using autotools in the same way as for
|
||||
many Unix-like systems.
|
||||
|
||||
PCRE2 can also be configured using CMake, which can be run in various ways
|
||||
PCRE2 can also be configured using CMake, which can be run in various ways
|
||||
(command line, GUI, etc). This creates Makefiles, solution files, etc. The file
|
||||
NON-AUTOTOOLS-BUILD has information about CMake.
|
||||
|
||||
|
@ -186,13 +186,13 @@ library. They are also documented in the pcre2build man page.
|
|||
handling UTF-8, UTF-16 and UTF-8 is not included. It is not possible to
|
||||
configure one library with UTF support and the other without in the same
|
||||
configuration.
|
||||
|
||||
|
||||
Even when --enable-unicode is included, the use of a UTF encoding still has
|
||||
to be enabled by an option at run time. When PCRE2 is compiled with this
|
||||
option, its input can only either be ASCII or UTF-8/16/32, even when running
|
||||
on EBCDIC platforms. It is not possible to use both --enable-unicode and
|
||||
--enable-ebcdic at the same time.
|
||||
|
||||
|
||||
When --enable-unicode is specified, as well as supporting UTF strings, PCRE2
|
||||
includes support for the \P, \p, and \X sequences that recognize Unicode
|
||||
character properties. However, only the basic two-letter properties such as
|
||||
|
@ -248,7 +248,7 @@ library. They are also documented in the pcre2build man page.
|
|||
cause programs to crash in strange ways. There is a discussion about stack
|
||||
sizes in the pcre2stack man page.
|
||||
|
||||
. In the 8-bit library, the default maximum compiled pattern size is around
|
||||
. In the 8-bit library, the default maximum compiled pattern size is around
|
||||
64K. You can increase this by adding --with-link-size=3 to the "configure"
|
||||
command. PCRE2 then uses three bytes instead of two for offsets to different
|
||||
parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is
|
||||
|
@ -360,7 +360,7 @@ The "configure" script builds the following files for the basic C library:
|
|||
. src/pcre2.h the public PCRE2 header file
|
||||
. pcre2-config script that shows the building settings such as CFLAGS
|
||||
that were set for "configure"
|
||||
. libpcre2-8.pc )
|
||||
. libpcre2-8.pc )
|
||||
. libpcre2-16.pc ) data for the pkg-config command
|
||||
. libpcre2-32.pc )
|
||||
. libpcre2-posix.pc )
|
||||
|
@ -452,7 +452,7 @@ prints the version number, and
|
|||
|
||||
outputs information about where the 8-bit library is installed. This command
|
||||
can be included in makefiles for programs that use PCRE2, saving the programmer
|
||||
from having to remember too many details. Run pcre2-config with no arguments to
|
||||
from having to remember too many details. Run pcre2-config with no arguments to
|
||||
obtain a list of possible arguments.
|
||||
|
||||
The pkg-config command is another system for saving and retrieving information
|
||||
|
@ -593,7 +593,7 @@ bug in PCRE2.
|
|||
|
||||
The third set of tests checks pcre2_maketables(), the facility for building a
|
||||
set of character tables for a specific locale and using them instead of the
|
||||
default tables. The script uses the "locale" command to check for the
|
||||
default tables. The script uses the "locale" command to check for the
|
||||
availability of the "fr_FR", "french", or "fr" locale, and uses the first one
|
||||
that it finds. If the "locale" command fails, or if its output doesn't include
|
||||
"fr_FR", "french", or "fr" in the list of available locales, the third test
|
||||
|
@ -609,7 +609,7 @@ of the French locale have been encountered. The test passes if its output
|
|||
matches any one of them.
|
||||
|
||||
The fourth and fifth tests check UTF and Unicode property support, the fourth
|
||||
being compatible with the perltest.pl script, and the fifth checking
|
||||
being compatible with the perltest.pl script, and the fifth checking
|
||||
PCRE2-specific things.
|
||||
|
||||
The sixth and seventh tests check the pcre2_dfa_match() alternative matching
|
||||
|
@ -623,8 +623,8 @@ change) and when Unicode support is enabled.
|
|||
The ninth and tenth tests are run only in 8-bit mode, and the eleventh and
|
||||
twelfth tests are run only in 16-bit and 32-bit modes. These are tests that
|
||||
generate different output in 8-bit mode. Each pair are for general cases and
|
||||
Unicode support, respectively. The thirteenth test checks the handling of
|
||||
non-UTF characters greater than 255 by pcre2_dfa_match() in 16-bit and 32-bit
|
||||
Unicode support, respectively. The thirteenth test checks the handling of
|
||||
non-UTF characters greater than 255 by pcre2_dfa_match() in 16-bit and 32-bit
|
||||
modes.
|
||||
|
||||
The fourteenth test is run only when JIT support is not available, and the
|
||||
|
@ -632,8 +632,8 @@ fifteenth test is run only when JIT support is available. They test some
|
|||
JIT-specific features such as information output from pcre2test about JIT
|
||||
compilation.
|
||||
|
||||
The sixteenth and seventeenth tests are run only in 8-bit mode. They check the
|
||||
POSIX interface to the 8-bit library, withouth and with Unicode support,
|
||||
The sixteenth and seventeenth tests are run only in 8-bit mode. They check the
|
||||
POSIX interface to the 8-bit library, withouth and with Unicode support,
|
||||
respectively.
|
||||
|
||||
|
||||
|
@ -692,9 +692,9 @@ will cause PCRE2 to malfunction.
|
|||
File manifest
|
||||
-------------
|
||||
|
||||
The distribution should contain the files listed below.
|
||||
The distribution should contain the files listed below.
|
||||
|
||||
(A) Source files for the PCRE2 library functions and their headers are found in
|
||||
(A) Source files for the PCRE2 library functions and their headers are found in
|
||||
the src directory:
|
||||
|
||||
src/dftables.c auxiliary program for building pcre2_chartables.c
|
||||
|
@ -705,25 +705,25 @@ The distribution should contain the files listed below.
|
|||
specified, used by copying to pcre2_chartables.c
|
||||
|
||||
src/pcre2posix.c )
|
||||
src/pcre2_auto_possess.c )
|
||||
src/pcre2_auto_possess.c )
|
||||
src/pcre2_compile.c )
|
||||
src/pcre2_config.c )
|
||||
src/pcre2_context.c )
|
||||
src/pcre2_context.c )
|
||||
src/pcre2_dfa_match.c )
|
||||
src/pcre2_error.c )
|
||||
src/pcre2_error.c )
|
||||
src/pcre2_exec.c )
|
||||
src/pcre2_jit_compile.c )
|
||||
src/pcre2_jit_match.c ) sources for the functions in the library,
|
||||
src/pcre2_jit_match.c ) sources for the functions in the library,
|
||||
src/pcre2_jit_misc.c ) and some internal functions that they use
|
||||
src/pcre2_maketables.c )
|
||||
src/pcre2_match.c )
|
||||
src/pcre2_match_data.c )
|
||||
src/pcre2_match_data.c )
|
||||
src/pcre2_newline.c )
|
||||
src/pcre2_ord2utf.c )
|
||||
src/pcre2_pattern_info.c )
|
||||
src/pcre2_string_utils.c )
|
||||
src/pcre2_study.c )
|
||||
src/pcre2_substring.c )
|
||||
src/pcre2_substring.c )
|
||||
src/pcre2_tables.c )
|
||||
src/pcre2_ucd.c )
|
||||
src/pcre2_valid_utf.c )
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
<html>
|
||||
<!-- This is a manually maintained file that is the root of the HTML version of
|
||||
the PCRE2 documentation. When the HTML documents are built from the man
|
||||
page versions, the entire doc/html directory is emptied, this file is then
|
||||
copied into doc/html/index.html, and the remaining files therein are
|
||||
<!-- This is a manually maintained file that is the root of the HTML version of
|
||||
the PCRE2 documentation. When the HTML documents are built from the man
|
||||
page versions, the entire doc/html directory is emptied, this file is then
|
||||
copied into doc/html/index.html, and the remaining files therein are
|
||||
created by the 132html script.
|
||||
-->
|
||||
-->
|
||||
<head>
|
||||
<title>PCRE2 specification</title>
|
||||
</head>
|
||||
|
@ -87,7 +87,7 @@ in the library. There is a single page for each triple of 8-bit/16-bit/32-bit
|
|||
functions.
|
||||
</p>
|
||||
|
||||
<table>
|
||||
<table>
|
||||
|
||||
<tr><td><a href="pcre2_assign_jit_stack.html">pcre2_assign_jit_stack</a></td>
|
||||
<td> Assign stack for JIT matching</td></tr>
|
||||
|
@ -153,7 +153,7 @@ functions.
|
|||
|
||||
<tr><td><a href="pcre2_maketables.html">pcre2_maketables</a></td>
|
||||
<td> Build character tables in current locale</td></tr>
|
||||
|
||||
|
||||
<tr><td><a href="pcre2_pattern_to_host_byte_order.html">pcre2_pattern_to_host_byte_order</a></td>
|
||||
<td> Convert compiled pattern to host byte order if necessary</td></tr>
|
||||
|
||||
|
|
|
@ -43,11 +43,11 @@ of Unicode in use can be discovered by running
|
|||
</PRE>
|
||||
</P>
|
||||
<P>
|
||||
The three libraries contain identical sets of functions, with names ending in
|
||||
_8, _16, or _32, respectively (for example, <b>pcre2_compile_8()</b>). However,
|
||||
by defining PCRE2_CODE_UNIT_WIDTH to be 8, 16, or 32, a program that uses just
|
||||
The three libraries contain identical sets of functions, with names ending in
|
||||
_8, _16, or _32, respectively (for example, <b>pcre2_compile_8()</b>). However,
|
||||
by defining PCRE2_CODE_UNIT_WIDTH to be 8, 16, or 32, a program that uses just
|
||||
one code unit width can be written using generic names such as
|
||||
<b>pcre2_compile()</b>, and the documentation is written assuming that this is
|
||||
<b>pcre2_compile()</b>, and the documentation is written assuming that this is
|
||||
the case.
|
||||
</P>
|
||||
<P>
|
||||
|
|
|
@ -306,7 +306,7 @@ unknown should also use the real function names. (Unfortunately, it is not
|
|||
possible in C code to save and restore the value of a macro.)
|
||||
</P>
|
||||
<P>
|
||||
If PCRE2_CODE_UNIT_WIDTH is not defined before including <b>pcre2.h</b>, a
|
||||
If PCRE2_CODE_UNIT_WIDTH is not defined before including <b>pcre2.h</b>, a
|
||||
compiler error occurs.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -443,7 +443,7 @@ below.
|
|||
</P>
|
||||
<P>
|
||||
The choice of newline convention does not affect the interpretation of
|
||||
the \n or \r escape sequences, nor does it affect what \R matches, which has
|
||||
the \n or \r escape sequences, nor does it affect what \R matches, which has
|
||||
its own separate control.
|
||||
</P>
|
||||
<br><a name="SEC12" href="#TOC1">MULTITHREADING</a><br>
|
||||
|
@ -553,7 +553,7 @@ The memory used for a general context should be freed by calling:
|
|||
The compile context
|
||||
</b><br>
|
||||
<P>
|
||||
A compile context is required if you want to change the default values of any
|
||||
A compile context is required if you want to change the default values of any
|
||||
of the following compile-time parameters:
|
||||
<pre>
|
||||
What \R matches (Unicode newlines or CR, LF, CRLF only);
|
||||
|
@ -562,7 +562,7 @@ of the following compile-time parameters:
|
|||
The compile time nested parentheses limit;
|
||||
An external function for stack checking.
|
||||
</pre>
|
||||
A compile context is also required if you are using custom memory management.
|
||||
A compile context is also required if you are using custom memory management.
|
||||
If none of these apply, just pass NULL as the context argument of
|
||||
<i>pcre2_compile()</i>.
|
||||
</P>
|
||||
|
@ -579,33 +579,33 @@ A compile context is created, copied, and freed by the following functions:
|
|||
<b>void pcre2_compile_context_free(pcre2_compile_context *<i>ccontext</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
A compile context is created with default values for its parameters. These can
|
||||
be changed by calling the following functions, which return 0 on success, or
|
||||
A compile context is created with default values for its parameters. These can
|
||||
be changed by calling the following functions, which return 0 on success, or
|
||||
PCRE2_ERROR_BADDATA if invalid data is detected.
|
||||
<b>int pcre2_set_bsr(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||
<b> uint32_t <i>value</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
The value must be PCRE2_BSR_ANYCRLF, to specify that \R matches only CR, LF,
|
||||
or CRLF, or PCRE2_BSR_UNICODE, to specify that \R matches any Unicode line
|
||||
ending sequence. The value of this parameter does not affect what is compiled;
|
||||
The value must be PCRE2_BSR_ANYCRLF, to specify that \R matches only CR, LF,
|
||||
or CRLF, or PCRE2_BSR_UNICODE, to specify that \R matches any Unicode line
|
||||
ending sequence. The value of this parameter does not affect what is compiled;
|
||||
it is just saved with the compiled pattern. The value is used by the JIT
|
||||
compiler and by the two interpreted matching functions, <i>pcre2_match()</i> and
|
||||
compiler and by the two interpreted matching functions, <i>pcre2_match()</i> and
|
||||
<i>pcre2_dfa_match()</i>.
|
||||
<b>int pcre2_set_character_tables(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||
<b> const unsigned char *<i>tables</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
The value must be the result of a call to <i>pcre2_maketables()</i>, whose only
|
||||
The value must be the result of a call to <i>pcre2_maketables()</i>, whose only
|
||||
argument is a general context. This function builds a set of character tables
|
||||
in the current locale.
|
||||
<b>int pcre2_set_newline(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||
<b> uint32_t <i>value</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
This specifies which characters or character sequences are to be recognized as
|
||||
This specifies which characters or character sequences are to be recognized as
|
||||
newlines. The value must be one of PCRE2_NEWLINE_CR (carriage return only),
|
||||
PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the two-character
|
||||
PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the two-character
|
||||
sequence CR followed by LF), PCRE2_NEWLINE_ANYCRLF (any of the above), or
|
||||
PCRE2_NEWLINE_ANY (any Unicode newline sequence).
|
||||
</P>
|
||||
|
@ -627,7 +627,7 @@ using up too much system stack when being compiled.
|
|||
<br>
|
||||
<br>
|
||||
There is at least one application that runs PCRE2 in threads with very limited
|
||||
system stack, where running out of stack is to be avoided at all costs. The
|
||||
system stack, where running out of stack is to be avoided at all costs. The
|
||||
parenthesis limit above cannot take account of how much stack is actually
|
||||
available. For a finer control, you can supply a function that is called
|
||||
whenever <b>pcre2_compile()</b> starts to compile a parenthesized part of a
|
||||
|
@ -638,20 +638,20 @@ function should return zero if all is well, or non-zero to force an error.
|
|||
The match context
|
||||
</b><br>
|
||||
<P>
|
||||
A match context is required if you want to change the default values of any
|
||||
A match context is required if you want to change the default values of any
|
||||
of the following match-time parameters:
|
||||
<pre>
|
||||
What \R matches (Unicode newlines or CR, LF, CRLF only);
|
||||
A callout function;
|
||||
The limit for calling <i>match()</i>;
|
||||
The limit for calling <i>match()</i>;
|
||||
The limit for calling <i>match()</i> recursively;
|
||||
The newline character sequence;
|
||||
</pre>
|
||||
A match context is also required if you are using custom memory management.
|
||||
If none of these apply, just pass NULL as the context argument of
|
||||
A match context is also required if you are using custom memory management.
|
||||
If none of these apply, just pass NULL as the context argument of
|
||||
<b>pcre2_match()</b>, <b>pcre2_dfa_match()</b>, or <b>pcre2_jit_match()</b>.
|
||||
Changing the newline value or what \R matches at match time disables the use
|
||||
of JIT via <b>pcre2_match()</b>.
|
||||
Changing the newline value or what \R matches at match time disables the use
|
||||
of JIT via <b>pcre2_match()</b>.
|
||||
</P>
|
||||
<P>
|
||||
A match context is created, copied, and freed by the following functions:
|
||||
|
@ -666,8 +666,8 @@ A match context is created, copied, and freed by the following functions:
|
|||
<b>void pcre2_match_context_free(pcre2_match_context *<i>mcontext</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
A match context is created with default values for its parameters. These can
|
||||
be changed by calling the following functions, which return 0 on success, or
|
||||
A match context is created with default values for its parameters. These can
|
||||
be changed by calling the following functions, which return 0 on success, or
|
||||
PCRE2_ERROR_BADDATA if invalid data is detected.
|
||||
<b>int pcre2_set_callout(pcre2_match_context *<i>mcontext</i>,</b>
|
||||
<b> int (*<i>callout_function</i>)(pcre2_callout_block *),</b>
|
||||
|
@ -693,7 +693,7 @@ calls repeatedly (sometimes recursively). The limit set by <i>match_limit</i> is
|
|||
imposed on the number of times this function is called during a match, which
|
||||
has the effect of limiting the amount of backtracking that can take place. For
|
||||
patterns that are not anchored, the count restarts from zero for each position
|
||||
in the subject string. This limit is not relevant to <b>pcre2_dfa_match()</b>,
|
||||
in the subject string. This limit is not relevant to <b>pcre2_dfa_match()</b>,
|
||||
which ignores it.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -730,7 +730,7 @@ This limit is of use only if it is set smaller than <i>match_limit</i>.
|
|||
Limiting the recursion depth limits the amount of system stack that can be
|
||||
used, or, when PCRE2 has been compiled to use memory on the heap instead of the
|
||||
stack, the amount of heap memory that can be used. This limit is not relevant,
|
||||
and is ignored, when matching is done using JIT compiled code or by the
|
||||
and is ignored, when matching is done using JIT compiled code or by the
|
||||
<b>pcre2_dfa_match()</b> function.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -751,9 +751,9 @@ limit is set, less than the default.
|
|||
<b> void (*<i>private_free</i>)(void *, void *), void *<i>memory_data</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
This function sets up two additional custom memory management functions for use
|
||||
This function sets up two additional custom memory management functions for use
|
||||
by <b>pcre2_match()</b> when PCRE2 is compiled to use the heap for remembering
|
||||
backtracking data, instead of recursive function calls that use the system
|
||||
backtracking data, instead of recursive function calls that use the system
|
||||
stack. There is a discussion about PCRE2's stack usage in the
|
||||
<a href="pcre2stack.html"><b>pcre2stack</b></a>
|
||||
documentation. See the
|
||||
|
@ -765,7 +765,7 @@ limited stacks. Because of the greater use of memory management,
|
|||
general custom memory functions are provided so that special-purpose external
|
||||
code can be used for this case, because the memory blocks are all the same
|
||||
size. The blocks are retained by <b>pcre2_match()</b> until it is about to exit
|
||||
so that they can be re-used when possible during the match. In the absence of
|
||||
so that they can be re-used when possible during the match. In the absence of
|
||||
these functions, the normal custom memory management functions are used, if
|
||||
supplied, otherwise the system functions.
|
||||
</P>
|
||||
|
@ -785,7 +785,7 @@ required. The second argument is a pointer to memory into which the information
|
|||
is placed. If NULL is passed, the function returns the amount of memory that is
|
||||
needed for the requested information. For calls that return numerical values,
|
||||
the value is in bytes; when requesting these values, <i>where</i> should point
|
||||
to appropriately aligned memory. For calls that return strings, the required
|
||||
to appropriately aligned memory. For calls that return strings, the required
|
||||
length is given in code units, not counting the terminating zero.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -809,7 +809,7 @@ compiling is available; otherwise it is set to zero.
|
|||
PCRE2_CONFIG_JITTARGET
|
||||
</pre>
|
||||
The <i>where</i> argument should point to a buffer that is at least 48 code
|
||||
units long. (The exact length needed can be found by calling
|
||||
units long. (The exact length needed can be found by calling
|
||||
<b>pcre2_config()</b> with <b>where</b> set to NULL.) The buffer is filled with a
|
||||
string that contains the name of the architecture for which the JIT compiler is
|
||||
configured, for example "x86 32bit (little endian + unaligned)". If JIT support
|
||||
|
@ -820,9 +820,9 @@ the string, in code units, is returned.
|
|||
</pre>
|
||||
The output is an integer that contains the number of bytes used for internal
|
||||
linkage in compiled regular expressions. When PCRE2 is configured, the value
|
||||
can be set to 2, 3, or 4, with the default being 2. This is the value that is
|
||||
returned by <b>pcre2_config()</b>. However, when the 16-bit library is compiled,
|
||||
a value of 3 is rounded up to 4, and when the 32-bit library is compiled,
|
||||
can be set to 2, 3, or 4, with the default being 2. This is the value that is
|
||||
returned by <b>pcre2_config()</b>. However, when the 16-bit library is compiled,
|
||||
a value of 3 is rounded up to 4, and when the 32-bit library is compiled,
|
||||
internal linkages always use 4 bytes, so the configured value is not relevant.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -908,16 +908,16 @@ units) is returned.
|
|||
<b>pcre2_code_free(pcre2_code *<i>code</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
This function compiles a pattern, defined by a pointer to a string of code
|
||||
units and a length, into an internal form. If the pattern is zero-terminated,
|
||||
the length should be specified as PCRE2_ZERO_TERMINATED. The function returns a
|
||||
pointer to a block of memory that contains the compiled pattern and related
|
||||
data. The caller must free the memory by calling <b>pcre2_code_free()</b> when
|
||||
This function compiles a pattern, defined by a pointer to a string of code
|
||||
units and a length, into an internal form. If the pattern is zero-terminated,
|
||||
the length should be specified as PCRE2_ZERO_TERMINATED. The function returns a
|
||||
pointer to a block of memory that contains the compiled pattern and related
|
||||
data. The caller must free the memory by calling <b>pcre2_code_free()</b> when
|
||||
it is no longer needed.
|
||||
</P>
|
||||
<P>
|
||||
If the compile context argument <i>ccontext</i> is NULL, the memory is obtained
|
||||
by calling <b>malloc()</b>. Otherwise, it is obtained from the same memory
|
||||
If the compile context argument <i>ccontext</i> is NULL, the memory is obtained
|
||||
by calling <b>malloc()</b>. Otherwise, it is obtained from the same memory
|
||||
function that was used for the compile context.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -927,7 +927,7 @@ options are described below. Some of them (in particular, those that are
|
|||
compatible with Perl, but some others as well) can also be set and unset from
|
||||
within the pattern (see the detailed description in the
|
||||
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
|
||||
documentation).
|
||||
documentation).
|
||||
</P>
|
||||
<P>
|
||||
For those options that can be different in different parts of the pattern, the
|
||||
|
@ -936,7 +936,7 @@ compilation. The PCRE2_ANCHORED and PCRE2_NO_UTF_CHECK options can be set at
|
|||
the time of matching as well as at compile time.
|
||||
</P>
|
||||
<P>
|
||||
Other, less frequently required compile-time parameters (for example, the
|
||||
Other, less frequently required compile-time parameters (for example, the
|
||||
newline setting) can be provided in a compile context (as described
|
||||
<a href="#compilecontext">above).</a>
|
||||
</P>
|
||||
|
@ -962,10 +962,10 @@ This code fragment shows a typical straightforward call to
|
|||
<pre>
|
||||
pcre2_code *re;
|
||||
PCRE2_SIZE erroffset;
|
||||
int errorcode;
|
||||
int errorcode;
|
||||
re = pcre2_compile(
|
||||
"^A.*Z", /* the pattern */
|
||||
PCRE2_ZERO_TERMINATED, /* the pattern is zero-terminated */
|
||||
PCRE2_ZERO_TERMINATED, /* the pattern is zero-terminated */
|
||||
0, /* default options */
|
||||
&errorcode, /* for error code */
|
||||
&erroffset, /* for error offset */
|
||||
|
@ -984,14 +984,14 @@ Perl.
|
|||
<pre>
|
||||
PCRE2_ALLOW_EMPTY_CLASS
|
||||
</pre>
|
||||
By default, for compatibility with Perl, a closing square bracket that
|
||||
immediately follows an opening one is treated as a data character for the
|
||||
class. When PCRE2_ALLOW_EMPTY_CLASS is set, it terminates the class, which
|
||||
therefore contains no characters and so can never match.
|
||||
By default, for compatibility with Perl, a closing square bracket that
|
||||
immediately follows an opening one is treated as a data character for the
|
||||
class. When PCRE2_ALLOW_EMPTY_CLASS is set, it terminates the class, which
|
||||
therefore contains no characters and so can never match.
|
||||
<pre>
|
||||
PCRE2_ALT_BSUX
|
||||
</pre>
|
||||
This option request alternative handling of three escape sequences, which
|
||||
This option request alternative handling of three escape sequences, which
|
||||
makes PCRE2's behaviour more like ECMAscript (aka JavaScript). When it is set:
|
||||
</P>
|
||||
<P>
|
||||
|
@ -1023,7 +1023,7 @@ documentation.
|
|||
</pre>
|
||||
If this bit is set, letters in the pattern match both upper and lower case
|
||||
letters in the subject. It is equivalent to Perl's /i option, and it can be
|
||||
changed within a pattern by a (?i) option setting.
|
||||
changed within a pattern by a (?i) option setting.
|
||||
<pre>
|
||||
PCRE2_DOLLAR_ENDONLY
|
||||
</pre>
|
||||
|
@ -1076,7 +1076,7 @@ Which characters are interpreted as newlines can be specified by a setting in
|
|||
the compile context that is passed to <b>pcre2_compile()</b> or by a special
|
||||
sequence at the start of the pattern, as described in the section entitled
|
||||
<a href="pcrepattern.html#newlines">"Newline conventions"</a>
|
||||
in the <b>pcre2pattern</b> documentation. A default is defined when PCRE2 is
|
||||
in the <b>pcre2pattern</b> documentation. A default is defined when PCRE2 is
|
||||
built.
|
||||
<pre>
|
||||
PCRE2_FIRSTLINE
|
||||
|
@ -1091,7 +1091,7 @@ If this option is set, a back reference to an unset subpattern group matches an
|
|||
empty string (by default this causes the current matching alternative to fail).
|
||||
A pattern such as (\1)(a) succeeds when this option is set (assuming it can
|
||||
find an "a" in the subject), whereas it fails by default, for Perl
|
||||
compatibility. Setting this option makes PCRE2 behave more like ECMAscript (aka
|
||||
compatibility. Setting this option makes PCRE2 behave more like ECMAscript (aka
|
||||
JavaScript).
|
||||
<pre>
|
||||
PCRE2_MULTILINE
|
||||
|
@ -1116,10 +1116,10 @@ occurrences of ^ or $ in a pattern, setting PCRE2_MULTILINE has no effect.
|
|||
PCRE2_NEVER_UCP
|
||||
</pre>
|
||||
This option locks out the use of Unicode properties for handling \B, \b, \D,
|
||||
\d, \S, \s, \W, \w, and some of the POSIX character classes, as described
|
||||
for the PCRE2_UCP option below. In particular, it prevents the creator of the
|
||||
pattern from enabling this facility by starting the pattern with (*UCP). This
|
||||
may be useful in applications that process patterns from external sources. The
|
||||
\d, \S, \s, \W, \w, and some of the POSIX character classes, as described
|
||||
for the PCRE2_UCP option below. In particular, it prevents the creator of the
|
||||
pattern from enabling this facility by starting the pattern with (*UCP). This
|
||||
may be useful in applications that process patterns from external sources. The
|
||||
option combination PCRE_UCP and PCRE_NEVER_UCP causes an error.
|
||||
<pre>
|
||||
PCRE2_NEVER_UTF
|
||||
|
@ -1195,7 +1195,7 @@ pattern
|
|||
(*MARK:A)(X|Y)
|
||||
</pre>
|
||||
The minimum length for a match is one character. If the subject is "ABC", there
|
||||
will be attempts to match "ABC", "BC", and "C". An attempt to match an empty
|
||||
will be attempts to match "ABC", "BC", and "C". An attempt to match an empty
|
||||
string at the end of the subject does not take place, because PCRE2 knows that
|
||||
the subject is now too short, and so the (*MARK) is never encountered. In this
|
||||
case, the optimization does not affect the overall match result, which is still
|
||||
|
@ -1211,7 +1211,7 @@ and
|
|||
<a href="pcre2unicode.html#utf32strings">UTF-32 strings</a>
|
||||
in the
|
||||
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
|
||||
document.
|
||||
document.
|
||||
If an invalid UTF sequence is found, <b>pcre2_compile()</b> returns a negative
|
||||
error code.
|
||||
</P>
|
||||
|
@ -1391,9 +1391,9 @@ The possible values for the second argument are defined in <b>pcre2.h</b>, and
|
|||
are as follows:
|
||||
<pre>
|
||||
PCRE2_INFO_ALLOPTIONS
|
||||
PCRE2_INFO_ARGOPTIONS
|
||||
PCRE2_INFO_ARGOPTIONS
|
||||
</pre>
|
||||
Return a copy of the pattern's options. The third argument should point to a
|
||||
Return a copy of the pattern's options. The third argument should point to a
|
||||
<b>uint32_t</b> variable. PCRE2_INFO_ARGOPTIONS returns exactly the options that
|
||||
were passed to <b>pcre2_compile()</b>, whereas PCRE2_INFO_ALLOPTIONS returns
|
||||
the compile options as modified by any top-level option settings at the start
|
||||
|
@ -1411,7 +1411,7 @@ alternatives begin with one of the following:
|
|||
\G always
|
||||
.* if PCRE2_DOTALL is set and there are no back references to the subpattern in which .* appears
|
||||
</pre>
|
||||
For such patterns, the PCRE2_ANCHORED bit is set in the options returned for
|
||||
For such patterns, the PCRE2_ANCHORED bit is set in the options returned for
|
||||
PCRE2_INFO_ALLOPTIONS.
|
||||
<pre>
|
||||
PCRE2_INFO_BACKREFMAX
|
||||
|
@ -1499,7 +1499,7 @@ return zero. The third argument should point to a <b>size_t</b> variable.
|
|||
</pre>
|
||||
Returns 1 if there is a rightmost literal code unit that must exist in any
|
||||
matched string, other than at its start. The third argument should point to an
|
||||
<b>uint32_t</b> variable. If there is no such value, 0 is returned. When 1 is
|
||||
<b>uint32_t</b> variable. If there is no such value, 0 is returned. When 1 is
|
||||
returned, the code unit value itself can be retrieved using
|
||||
PCRE2_INFO_LASTCODEUNIT.
|
||||
</P>
|
||||
|
@ -1657,11 +1657,11 @@ pattern with the JIT compiler does not alter the value returned by this option.
|
|||
<b>void pcre2_match_data_free(pcre2_match_data *<i>match_data</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
Information about successful and unsuccessful matches is placed in a match
|
||||
Information about successful and unsuccessful matches is placed in a match
|
||||
data block, which is an opaque structure that is accessed by function calls. In
|
||||
particular, the match data block contains a vector of offsets into the subject
|
||||
string that define the matched part of the subject and any substrings that were
|
||||
capured. This is know as the <i>ovector</i>.
|
||||
capured. This is know as the <i>ovector</i>.
|
||||
</P>
|
||||
<P>
|
||||
Before calling <b>pcre2_match()</b> or <b>pcre2_dfa_match()</b> you must create a
|
||||
|
@ -1676,12 +1676,12 @@ return the overall matched string.
|
|||
</P>
|
||||
<P>
|
||||
For <b>pcre2_match_data_create_from_pattern()</b>, the first argument is a
|
||||
pointer to a compiled pattern. In this case the ovector is created to be
|
||||
pointer to a compiled pattern. In this case the ovector is created to be
|
||||
exactly the right size to hold all the substrings a pattern might capture.
|
||||
</P>
|
||||
<P>
|
||||
The second argument of both these functions ia a pointer to a general context,
|
||||
which can specify custom memory management for obtaining the memory for the
|
||||
The second argument of both these functions ia a pointer to a general context,
|
||||
which can specify custom memory management for obtaining the memory for the
|
||||
match data block. If you are not using custom memory management, pass NULL.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -1728,8 +1728,8 @@ Here is an example of a simple call to <b>pcre2_match()</b>:
|
|||
match_data, /* the match data block */
|
||||
NULL); /* a match context; NULL means use defaults */
|
||||
</pre>
|
||||
If the subject string is zero-terminated, the length can be given as
|
||||
PCRE2_ZERO_TERMINATED. A match context must be provided if certain less common
|
||||
If the subject string is zero-terminated, the length can be given as
|
||||
PCRE2_ZERO_TERMINATED. A match context must be provided if certain less common
|
||||
matching parameters are to be changed. For details, see the section on
|
||||
<a href="#matchcontext">the match context</a>
|
||||
above.
|
||||
|
@ -1742,7 +1742,7 @@ The subject string is passed to <b>pcre2_match()</b> as a pointer in
|
|||
<i>subject</i>, a length in <i>length</i>, and a starting offset in
|
||||
<i>startoffset</i>. The length and offset are in code units, not characters.
|
||||
That is, they are in bytes for the 8-bit library, 16-bit code units for the
|
||||
16-bit library, and 32-bit code units for the 32-bit library, whether or not
|
||||
16-bit library, and 32-bit code units for the 32-bit library, whether or not
|
||||
UTF processing is enabled.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -1752,7 +1752,7 @@ zero, the search for a match starts at the beginning of the subject, and this
|
|||
is by far the most common case. In UTF-8 or UTF-16 mode, the starting offset
|
||||
must point to the start of a character, or to the end of the subject (in UTF-32
|
||||
mode, one code unit equals one character, so all offsets are valid). Like the
|
||||
pattern string, the subject may contain binary zeroes.
|
||||
pattern string, the subject may contain binary zeroes.
|
||||
</P>
|
||||
<P>
|
||||
A non-zero starting offset is useful when searching for another match in the
|
||||
|
@ -1814,7 +1814,7 @@ JIT matching is disabled and the normal interpretive code in
|
|||
The PCRE2_ANCHORED option limits <b>pcre2_match()</b> to matching at the first
|
||||
matching position. If a pattern was compiled with PCRE2_ANCHORED, or turned out
|
||||
to be anchored by virtue of its contents, it cannot be made unachored at
|
||||
matching time. Note that setting the option at match time disables JIT
|
||||
matching time. Note that setting the option at match time disables JIT
|
||||
matching.
|
||||
<pre>
|
||||
PCRE2_NOTBOL
|
||||
|
@ -1867,14 +1867,14 @@ and
|
|||
<a href="pcre2unicode.html#utf32strings">UTF-32 strings</a>
|
||||
in the
|
||||
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
|
||||
page.
|
||||
page.
|
||||
</P>
|
||||
<P>
|
||||
If you know that your subject is valid, and you want to skip these checks for
|
||||
performance reasons, you can set the PCRE2_NO_UTF_CHECK option when calling
|
||||
<b>pcre2_match()</b>. You might want to do this for the second and subsequent
|
||||
calls to <b>pcre2_match()</b> if you are making repeated calls to find all the
|
||||
matches in a single subject string.
|
||||
matches in a single subject string.
|
||||
</P>
|
||||
<P>
|
||||
NOTE: When PCRE2_NO_UTF_CHECK is set, the effect of passing an invalid string
|
||||
|
@ -1908,9 +1908,9 @@ documentation.
|
|||
</P>
|
||||
<br><a name="SEC22" href="#TOC1">NEWLINE HANDLING WHEN MATCHING</a><br>
|
||||
<P>
|
||||
When PCRE2 is built, a default newline convention is set; this is usually the
|
||||
standard convention for the operating system. The default can be overridden in
|
||||
either a
|
||||
When PCRE2 is built, a default newline convention is set; this is usually the
|
||||
standard convention for the operating system. The default can be overridden in
|
||||
either a
|
||||
<a href="#compilecontext">compile context</a>
|
||||
or a
|
||||
<a href="#matchcontext">match context.</a>
|
||||
|
@ -1953,7 +1953,7 @@ valid newline sequence and explicit \r or \n escapes appear in the pattern.
|
|||
</P>
|
||||
<P>
|
||||
In general, a pattern matches a certain portion of the subject, and in
|
||||
addition, further substrings from the subject may be picked out by
|
||||
addition, further substrings from the subject may be picked out by
|
||||
parenthesized parts of the pattern. Following the usage in Jeffrey Friedl's
|
||||
book, this is called "capturing" in what follows, and the phrase "capturing
|
||||
subpattern" is used for a fragment of a pattern that picks out a substring.
|
||||
|
@ -1964,11 +1964,11 @@ pattern.
|
|||
</P>
|
||||
<P>
|
||||
The overall matched string and any captured substrings are returned to the
|
||||
caller via a vector of PCRE2_SIZE values, called the <b>ovector</b>. This is
|
||||
caller via a vector of PCRE2_SIZE values, called the <b>ovector</b>. This is
|
||||
contained within the
|
||||
<a href="#matchdatablock">match data block.</a>
|
||||
You can obtain direct access to the ovector by calling
|
||||
<b>pcre2_get_ovector_pointer()</b> to find its address, and
|
||||
You can obtain direct access to the ovector by calling
|
||||
<b>pcre2_get_ovector_pointer()</b> to find its address, and
|
||||
<b>pcre2_get_ovector_count()</b> to find the number of pairs of values it
|
||||
contains. Alternatively, you can use the auxiliary functions for accessing
|
||||
captured substrings
|
||||
|
@ -2044,26 +2044,26 @@ Other information about the match
|
|||
<b>PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *<i>match_data</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
In addition to the offsets in the ovector, other information about a match is
|
||||
In addition to the offsets in the ovector, other information about a match is
|
||||
retained in the match data block and can be retrieved by the above functions.
|
||||
</P>
|
||||
<P>
|
||||
When a (*MARK) name is to be passed back, <b>pcre2_get_mark()</b> returns a
|
||||
pointer to the zero-terminated name, which is within the compiled pattern.
|
||||
Otherwise NULL is returned. A (*MARK) name may be available after a failed
|
||||
pointer to the zero-terminated name, which is within the compiled pattern.
|
||||
Otherwise NULL is returned. A (*MARK) name may be available after a failed
|
||||
match or a partial match, as well as after a successful one.
|
||||
</P>
|
||||
<P>
|
||||
The offset of the character at which the successful match started is
|
||||
returned by <b>pcre2_get_startchar()</b>. This can be different to the value of
|
||||
<i>ovector[0]</i> if the pattern contains the \K escape sequence. Note,
|
||||
<i>ovector[0]</i> if the pattern contains the \K escape sequence. Note,
|
||||
however, the \K has no effect for a partial match.
|
||||
<a name="errorlist"></a></P>
|
||||
<br><b>
|
||||
Error return values from <b>pcre2_match()</b>
|
||||
</b><br>
|
||||
<P>
|
||||
If <b>pcre2_match()</b> fails, it returns a negative number. This can be
|
||||
If <b>pcre2_match()</b> fails, it returns a negative number. This can be
|
||||
converted to a text string by calling <b>pcre2_get_error_message()</b>. Negative
|
||||
error codes are also returned by other functions, and are documented with them.
|
||||
The codes are given names in the header file. If UTF checking is in force and
|
||||
|
@ -2205,7 +2205,7 @@ argument is a pointer to the match data block, the second is the group number,
|
|||
and the third is a pointer to a variable into which the length is placed.
|
||||
</P>
|
||||
<P>
|
||||
The <b>pcre2_substring_copy_bynumber()</b> function copies one string into a
|
||||
The <b>pcre2_substring_copy_bynumber()</b> function copies one string into a
|
||||
supplied buffer, whereas <b>pcre2_substring_get_bynumber()</b> copies it into
|
||||
new memory, obtained using the same memory allocation function that was used
|
||||
for the match data block. The first two arguments of these functions are a
|
||||
|
@ -2220,10 +2220,10 @@ This is updated to contain the actual number of code units used, excluding the
|
|||
terminating zero.
|
||||
</P>
|
||||
<P>
|
||||
For <b>pcre2_substring_get_bynumber()</b> the third and fourth arguments point
|
||||
to variables that are updated with a pointer to the new memory and the number
|
||||
of code units that comprise the substring, again excluding the terminating
|
||||
zero. When the substring is no longer needed, the memory should be freed by
|
||||
For <b>pcre2_substring_get_bynumber()</b> the third and fourth arguments point
|
||||
to variables that are updated with a pointer to the new memory and the number
|
||||
of code units that comprise the substring, again excluding the terminating
|
||||
zero. When the substring is no longer needed, the memory should be freed by
|
||||
calling <b>pcre2_substring_free()</b>.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -2237,9 +2237,9 @@ attempt to get memory failed for <b>pcre2_substring_get_bynumber()</b>.
|
|||
<pre>
|
||||
PCRE2_ERROR_NOSUBSTRING
|
||||
</pre>
|
||||
No substring with the given number was captured. This could be because there is
|
||||
no capturing group of that number in the pattern, or because the group with
|
||||
that number did not participate in the match, or because the ovector was too
|
||||
No substring with the given number was captured. This could be because there is
|
||||
no capturing group of that number in the pattern, or because the group with
|
||||
that number did not participate in the match, or because the ovector was too
|
||||
small to capture that group.
|
||||
</P>
|
||||
<br><a name="SEC25" href="#TOC1">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a><br>
|
||||
|
@ -2253,7 +2253,7 @@ small to capture that group.
|
|||
<P>
|
||||
The <b>pcre2_substring_list_get()</b> function extracts all available substrings
|
||||
and builds a list of pointers to them, and a second list that contains their
|
||||
lengths (in code units), excluding a terminating zero that is added to each of
|
||||
lengths (in code units), excluding a terminating zero that is added to each of
|
||||
them. All this is done in a single block of memory that is obtained using the
|
||||
same memory allocation function that was used to get the match data block.
|
||||
</P>
|
||||
|
@ -2265,7 +2265,7 @@ NULL pointer. The address of the list of lengths is returned via
|
|||
therefore need the lengths, you may supply NULL as the <b>lengthsptr</b>
|
||||
argument to disable the creation of a list of lengths. The yield of the
|
||||
function is zero if all went well, or PCRE2_ERROR_NOMEMORY if the memory block
|
||||
could not be obtained. When the list is no longer needed, it should be freed by
|
||||
could not be obtained. When the list is no longer needed, it should be freed by
|
||||
calling <b>pcre2_substring_list_free()</b>.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -2312,7 +2312,7 @@ name.
|
|||
<P>
|
||||
Given the number, you can extract the substring directly, or use one of the
|
||||
functions described in the previous section. For convenience, there are also
|
||||
"byname" functions that correspond to the "bynumber" functions, the only
|
||||
"byname" functions that correspond to the "bynumber" functions, the only
|
||||
difference being that the second argument is a name instead of a number.
|
||||
However, if PCRE2_DUPNAMES is set and there are duplicate names,
|
||||
the behaviour may not be what you want (see the next section).
|
||||
|
@ -2375,7 +2375,7 @@ numbers, and hence the captured data.
|
|||
<P>
|
||||
The traditional matching function uses a similar algorithm to Perl, which stops
|
||||
when it finds the first match, starting at a given point in the subject. If you
|
||||
want to find all possible matches, or the longest possible match at a given
|
||||
want to find all possible matches, or the longest possible match at a given
|
||||
position, consider using the alternative matching function (see below) instead.
|
||||
If you cannot use the alternative function, you can kludge it up by making use
|
||||
of the callout facility, which is described in the
|
||||
|
@ -2566,8 +2566,8 @@ fail, this error is given.
|
|||
</P>
|
||||
<br><a name="SEC30" href="#TOC1">SEE ALSO</a><br>
|
||||
<P>
|
||||
<b>pcre2build</b>(3), <b>pcre2libs</b>(3), <b>pcre2callout</b>(3),
|
||||
<b>pcre2matching</b>(3), <b>pcre2partial</b>(3), <b>pcre2posix</b>(3),
|
||||
<b>pcre2build</b>(3), <b>pcre2libs</b>(3), <b>pcre2callout</b>(3),
|
||||
<b>pcre2matching</b>(3), <b>pcre2partial</b>(3), <b>pcre2posix</b>(3),
|
||||
<b>pcre2demo(3)</b>, <b>pcre2sample</b>(3), <b>pcre2stack</b>(3).
|
||||
</P>
|
||||
<br><a name="SEC31" href="#TOC1">AUTHOR</a><br>
|
||||
|
|
|
@ -88,11 +88,11 @@ single-byte characters, or UTF-8 strings. You can also build two other
|
|||
libraries, called <b>libpcre2-16</b> and <b>libpcre2-32</b>, which process
|
||||
strings that are contained in vectors of 16-bit and 32-bit code units,
|
||||
respectively. These can be interpreted either as single-unit characters or
|
||||
UTF-16/UTF-32 strings. To build these additional libraries, add one or both of
|
||||
UTF-16/UTF-32 strings. To build these additional libraries, add one or both of
|
||||
the following to the <b>configure</b> command:
|
||||
<pre>
|
||||
--enable-pcre16
|
||||
--enable-pcre32
|
||||
--enable-pcre32
|
||||
</pre>
|
||||
If you do not want the 8-bit library, add
|
||||
<pre>
|
||||
|
@ -358,7 +358,7 @@ override this value by specifying a run-time option.
|
|||
If you add one of
|
||||
<pre>
|
||||
--enable-pcre2test-libreadline
|
||||
--enable-pcre2test-libedit
|
||||
--enable-pcre2test-libedit
|
||||
</pre>
|
||||
to the <b>configure</b> command, <b>pcre2test</b> is linked with the
|
||||
<b>libreadline</b> or<b>libedit</b> library, respectively, and when its input is
|
||||
|
@ -376,8 +376,8 @@ unmodified distribution version of readline is in use), some extra
|
|||
configuration may be necessary. The INSTALL file for <b>libreadline</b> says
|
||||
this:
|
||||
<pre>
|
||||
"Readline uses the termcap functions, but does not link with
|
||||
the termcap or curses library itself, allowing applications
|
||||
"Readline uses the termcap functions, but does not link with
|
||||
the termcap or curses library itself, allowing applications
|
||||
which link with readline the to choose an appropriate library."
|
||||
</pre>
|
||||
If your environment has not been set up so that an appropriate library is
|
||||
|
|
|
@ -25,7 +25,7 @@ pcre2sample documentation for a short discussion ("man pcre2sample" if you have
|
|||
the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is
|
||||
incompatible with the original PCRE API.
|
||||
|
||||
There are actually three libraries, each supporting a different code unit
|
||||
There are actually three libraries, each supporting a different code unit
|
||||
width. This demonstration program uses the 8-bit library.
|
||||
|
||||
In Unix-like environments, if PCRE2 is installed in your standard system
|
||||
|
@ -56,8 +56,8 @@ the following line. */
|
|||
|
||||
/* #define PCRE2_STATIC */
|
||||
|
||||
/* This macro must be defined before including pcre2.h. For a program that uses
|
||||
only one code unit width, it makes it possible to use generic function names
|
||||
/* This macro must be defined before including pcre2.h. For a program that uses
|
||||
only one code unit width, it makes it possible to use generic function names
|
||||
such as pcre2_compile(). */
|
||||
|
||||
#define PCRE2_CODE_UNIT_WIDTH 8
|
||||
|
@ -141,7 +141,7 @@ subject_length = strlen((char *)subject);
|
|||
|
||||
re = pcre2_compile(
|
||||
pattern, /* the pattern */
|
||||
PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
|
||||
PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
|
||||
0, /* default options */
|
||||
&errornumber, /* for error number */
|
||||
&erroroffset, /* for error offset */
|
||||
|
@ -151,9 +151,9 @@ re = pcre2_compile(
|
|||
|
||||
if (re == NULL)
|
||||
{
|
||||
PCRE2_UCHAR buffer[256];
|
||||
PCRE2_UCHAR buffer[256];
|
||||
pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
|
||||
printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset,
|
||||
printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset,
|
||||
buffer);
|
||||
return 1;
|
||||
}
|
||||
|
@ -197,7 +197,7 @@ if (rc < 0)
|
|||
return 1;
|
||||
}
|
||||
|
||||
/* Match succeded. Get a pointer to the output vector, where string offsets are
|
||||
/* Match succeded. Get a pointer to the output vector, where string offsets are
|
||||
stored. */
|
||||
|
||||
ovector = pcre2_get_ovector_pointer(match_data);
|
||||
|
@ -210,7 +210,7 @@ printf("\nMatch succeeded at offset %d\n", (int)ovector[0]);
|
|||
* captured. *
|
||||
*************************************************************************/
|
||||
|
||||
/* The output vector wasn't big enough. This should not happen, because we used
|
||||
/* The output vector wasn't big enough. This should not happen, because we used
|
||||
pcre2_match_data_create_from_pattern() above. */
|
||||
|
||||
if (rc == 0)
|
||||
|
@ -261,7 +261,7 @@ if (namecount <= 0) printf("No named substrings\n"); else
|
|||
&name_entry_size); /* where to put the answer */
|
||||
|
||||
/* Now we can scan the table and, for each entry, print the number, the name,
|
||||
and the substring itself. In the 8-bit library the number is held in two
|
||||
and the substring itself. In the 8-bit library the number is held in two
|
||||
bytes, most significant first. */
|
||||
|
||||
tabptr = name_table;
|
||||
|
@ -306,7 +306,7 @@ if (namecount <= 0) printf("No named substrings\n"); else
|
|||
|
||||
if (!find_all) /* Check for -g */
|
||||
{
|
||||
pcre2_match_data_free(match_data); /* Release the memory that was used */
|
||||
pcre2_match_data_free(match_data); /* Release the memory that was used */
|
||||
pcre2_code_free(re); /* for the match data and the pattern. */
|
||||
return 0; /* Exit the program. */
|
||||
}
|
||||
|
@ -324,7 +324,7 @@ sequence. */
|
|||
(void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &newline);
|
||||
crlf_is_newline = newline == PCRE2_NEWLINE_ANY ||
|
||||
newline == PCRE2_NEWLINE_CRLF ||
|
||||
newline == PCRE2_NEWLINE_ANYCRLF;
|
||||
newline == PCRE2_NEWLINE_ANYCRLF;
|
||||
|
||||
/* Loop for second and subsequent matches */
|
||||
|
||||
|
|
|
@ -71,10 +71,10 @@ performance, there is also a "fast path" API that is JIT-specific.
|
|||
</P>
|
||||
<br><a name="SEC3" href="#TOC1">SIMPLE USE OF JIT</a><br>
|
||||
<P>
|
||||
To make use of the JIT support in the simplest way, all you have to do is to
|
||||
call <b>pcre2_jit_compile()</b> after successfully compiling a pattern with
|
||||
<b>pcre2_compile()</b>. This function has two arguments: the first is the
|
||||
compiled pattern pointer that was returned by <b>pcre2_compile()</b>, and the
|
||||
To make use of the JIT support in the simplest way, all you have to do is to
|
||||
call <b>pcre2_jit_compile()</b> after successfully compiling a pattern with
|
||||
<b>pcre2_compile()</b>. This function has two arguments: the first is the
|
||||
compiled pattern pointer that was returned by <b>pcre2_compile()</b>, and the
|
||||
second is a set of option bits, which must include at least one of
|
||||
PCRE2_JIT_COMPLETE, PCRE2_JIT_PARTIAL_HARD, or PCRE2_JIT_PARTIAL_SOFT.
|
||||
</P>
|
||||
|
@ -239,7 +239,7 @@ non-default JIT stacks might operate:
|
|||
</pre>
|
||||
All the functions described in this section do nothing if JIT is not available,
|
||||
and <b>pcre2_jit_stack_assign()</b> does nothing unless the <b>code</b> argument
|
||||
is non-NULL and points to a <b>pcre2_code</b> block that has been successfully
|
||||
is non-NULL and points to a <b>pcre2_code</b> block that has been successfully
|
||||
processed by <b>pcre2_jit_compile()</b>.
|
||||
<a name="stackfaq"></a></P>
|
||||
<br><a name="SEC7" href="#TOC1">JIT STACK FAQ</a><br>
|
||||
|
@ -328,18 +328,18 @@ callback.
|
|||
<pre>
|
||||
int rc;
|
||||
pcre2_code *re;
|
||||
pcre2_match_data *match_data;
|
||||
pcre2_match_data *match_data;
|
||||
pcre2_jit_stack *jit_stack;
|
||||
|
||||
re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0,
|
||||
re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0,
|
||||
&errornumber, &erroffset, NULL);
|
||||
/* Check for errors */
|
||||
rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
|
||||
/* Check for errors */
|
||||
/* Check for errors */
|
||||
jit_stack = pcre2_jit_stack_alloc(NULL, 32*1024, 512*1024);
|
||||
/* Check for error (NULL) */
|
||||
pcre2_jit_stack_assign(re, NULL, jit_stack);
|
||||
match_data = pcre2_match_data_create(re, 10);
|
||||
match_data = pcre2_match_data_create(re, 10);
|
||||
rc = pcre2_match(re, subject, length, 0, 0, match_data, NULL);
|
||||
/* Check results */
|
||||
pcre2_free(re);
|
||||
|
|
|
@ -89,15 +89,15 @@ empty string at the end of the subject.
|
|||
</P>
|
||||
<P>
|
||||
When a partial match is returned, the first two elements in the ovector point
|
||||
to the portion of the subject that was matched. The appearance of \K in the
|
||||
to the portion of the subject that was matched. The appearance of \K in the
|
||||
pattern has no effect for a partial match. Consider this pattern:
|
||||
<pre>
|
||||
/abc\K123/
|
||||
</pre>
|
||||
If it is matched against "456abc123xyz" the result is a complete match, and the
|
||||
ovector defines the matched string as "123", because \K resets the "start of
|
||||
match" point. However, if a partial match is requested and the subject string
|
||||
is "456abc12", a partial match is found for the string "abc12", because all
|
||||
ovector defines the matched string as "123", because \K resets the "start of
|
||||
match" point. However, if a partial match is requested and the subject string
|
||||
is "456abc12", a partial match is found for the string "abc12", because all
|
||||
these characters are needed for a subsequent re-match with additional
|
||||
characters.
|
||||
</P>
|
||||
|
@ -343,14 +343,14 @@ same point as before.
|
|||
For example, if the pattern "(?<=123)abc" is partially matched against the
|
||||
string "xx123ab", the ovector offsets are 5 and 7 ("ab"). The maximum
|
||||
lookbehind count is 3, so all characters before offset 2 can be discarded. The
|
||||
value of <b>startoffset</b> for the next match should be 3. When <b>pcre2test</b>
|
||||
displays a partial match, it indicates the lookbehind characters with '<'
|
||||
value of <b>startoffset</b> for the next match should be 3. When <b>pcre2test</b>
|
||||
displays a partial match, it indicates the lookbehind characters with '<'
|
||||
characters:
|
||||
<pre>
|
||||
re> "(?<=123)abc"
|
||||
data> xx123ab\=ph
|
||||
Partial match: 123ab
|
||||
<<<
|
||||
<<<
|
||||
</PRE>
|
||||
</P>
|
||||
<P>
|
||||
|
|
|
@ -145,7 +145,7 @@ Unicode newline sequence. The
|
|||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page has
|
||||
<a href="pcre2api.html#newlines">further discussion</a>
|
||||
about newlines, and shows how to set the newline convention when calling
|
||||
about newlines, and shows how to set the newline convention when calling
|
||||
<b>pcre2_compile()</b>.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -218,7 +218,7 @@ corresponding characters in the subject. As a trivial example, the pattern
|
|||
</pre>
|
||||
matches a portion of a subject string that is identical to itself. When
|
||||
caseless matching is specified (the PCRE2_CASELESS option), letters are matched
|
||||
independently of case.
|
||||
independently of case.
|
||||
</P>
|
||||
<P>
|
||||
The power of regular expressions comes from the ability to include alternatives
|
||||
|
@ -1191,8 +1191,8 @@ An opening square bracket introduces a character class, terminated by a closing
|
|||
square bracket. A closing square bracket on its own is not special by default.
|
||||
If a closing square bracket is required as a member of the class, it should be
|
||||
the first data character in the class (after an initial circumflex, if present)
|
||||
or escaped with a backslash. This means that, by default, an empty class cannot
|
||||
be defined. However, if the PCRE2_ALLOW_EMPTY_CLASS option is set, a closing
|
||||
or escaped with a backslash. This means that, by default, an empty class cannot
|
||||
be defined. However, if the PCRE2_ALLOW_EMPTY_CLASS option is set, a closing
|
||||
square bracket at the start does end the (empty) class.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -1216,7 +1216,7 @@ string.
|
|||
When caseless matching is set, any letters in a class represent both their
|
||||
upper case and lower case versions, so for example, a caseless [aeiou] matches
|
||||
"A" as well as "a", and a caseless [^aeiou] does not match "A", whereas a
|
||||
caseful version would.
|
||||
caseful version would.
|
||||
</P>
|
||||
<P>
|
||||
Characters that might indicate line breaks are never treated in any special way
|
||||
|
@ -1341,7 +1341,7 @@ classes by other sequences, as follows:
|
|||
[:alnum:] becomes \p{Xan}
|
||||
[:alpha:] becomes \p{L}
|
||||
[:blank:] becomes \h
|
||||
[:cntrl:] becomes \p{Cc}
|
||||
[:cntrl:] becomes \p{Cc}
|
||||
[:digit:] becomes \p{Nd}
|
||||
[:lower:] becomes \p{Ll}
|
||||
[:space:] becomes \p{Xps}
|
||||
|
@ -1490,7 +1490,7 @@ match "cataract", "erpillar" or an empty string.
|
|||
<br>
|
||||
2. It sets up the subpattern as a capturing subpattern. This means that, when
|
||||
the whole pattern matches, the portion of the subject string that matched the
|
||||
subpattern is passed back to the caller, separately from the portion that
|
||||
subpattern is passed back to the caller, separately from the portion that
|
||||
matched the whole pattern. (This applies only to the traditional matching
|
||||
function; the DFA matching function does not support capturing.)
|
||||
</P>
|
||||
|
@ -1908,7 +1908,7 @@ at release 5.10.
|
|||
PCRE2 has an optimization that automatically "possessifies" certain simple
|
||||
pattern constructs. For example, the sequence A+B is treated as A++B because
|
||||
there is no point in backtracking into a sequence of A's when B must follow.
|
||||
This feature can be disabled by the PCRE2_NO_AUTOPOSSESS option, or starting
|
||||
This feature can be disabled by the PCRE2_NO_AUTOPOSSESS option, or starting
|
||||
the pattern with (*NO_AUTO_POSSESS).
|
||||
</P>
|
||||
<P>
|
||||
|
@ -2216,7 +2216,7 @@ if the pattern is written as
|
|||
<pre>
|
||||
^.*+(?<=abcd)
|
||||
</pre>
|
||||
there can be no backtracking for the .*+ item because of the possessive
|
||||
there can be no backtracking for the .*+ item because of the possessive
|
||||
quantifier; it can match only the entire string. The subsequent lookbehind
|
||||
assertion does a single test on the last four characters. If it fails, the
|
||||
match fails immediately. For long strings, this approach makes a significant
|
||||
|
@ -2720,8 +2720,8 @@ same pair of parentheses when there is a repetition.
|
|||
<P>
|
||||
PCRE2 provides a similar feature, but of course it cannot obey arbitrary Perl
|
||||
code. The feature is called "callout". The caller of PCRE2 provides an external
|
||||
function by putting its entry point in a match context using the function
|
||||
<b>pcre2_set_callout()</b> and passing the context to <b>pcre2_match()</b> or
|
||||
function by putting its entry point in a match context using the function
|
||||
<b>pcre2_set_callout()</b> and passing the context to <b>pcre2_match()</b> or
|
||||
<b>pcre2_dfa_match()</b>. If no match context is passed, or if the callout entry
|
||||
point is set to NULL, callouts are disabled.
|
||||
</P>
|
||||
|
@ -2961,7 +2961,7 @@ output from <b>pcre2test</b>:
|
|||
re> /(*COMMIT)abc/
|
||||
data> xyzabc
|
||||
0: abc
|
||||
data>
|
||||
data>
|
||||
re> /(*COMMIT)abc/no_start_optimize
|
||||
data> xyzabc
|
||||
No match
|
||||
|
@ -2989,7 +2989,7 @@ as (*COMMIT).
|
|||
<P>
|
||||
The behaviour of (*PRUNE:NAME) is the not the same as (*MARK:NAME)(*PRUNE).
|
||||
It is like (*MARK:NAME) in that the name is remembered for passing back to the
|
||||
caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
|
||||
caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
|
||||
ignoring those set by (*PRUNE) or (*THEN).
|
||||
<pre>
|
||||
(*SKIP)
|
||||
|
@ -3041,7 +3041,7 @@ group. If (*THEN) is not inside an alternation, it acts like (*PRUNE).
|
|||
<P>
|
||||
The behaviour of (*THEN:NAME) is the not the same as (*MARK:NAME)(*THEN).
|
||||
It is like (*MARK:NAME) in that the name is remembered for passing back to the
|
||||
caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
|
||||
caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
|
||||
ignoring those set by (*PRUNE) and (*THEN).
|
||||
</P>
|
||||
<P>
|
||||
|
|
|
@ -103,17 +103,17 @@ PCRE2 to use heap memory instead of stack for remembering back-up points when
|
|||
of how to do this are given in the
|
||||
<a href="pcre2build.html"><b>pcre2build</b></a>
|
||||
documentation. When built in this way, instead of using the stack, PCRE2
|
||||
gets memory for remembering backup points from the heap. By default, the memory
|
||||
is obtained by calling the system <b>malloc()</b> function, but you can arrange
|
||||
to supply your own memory management function. For details, see the section
|
||||
entitled
|
||||
gets memory for remembering backup points from the heap. By default, the memory
|
||||
is obtained by calling the system <b>malloc()</b> function, but you can arrange
|
||||
to supply your own memory management function. For details, see the section
|
||||
entitled
|
||||
<a href="pcre2api.html#matchcontext">"The match context"</a>
|
||||
in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
documentation. Since the block sizes are always the same, it may be possible to
|
||||
implement customized a memory handler that is more efficient than the standard
|
||||
function. The memory blocks obtained for this purpose are retained and re-used
|
||||
if possible while <b>pcre2_match()</b> is running. They are all freed just
|
||||
function. The memory blocks obtained for this purpose are retained and re-used
|
||||
if possible while <b>pcre2_match()</b> is running. They are all freed just
|
||||
before it exits.
|
||||
</P>
|
||||
<br><b>
|
||||
|
|
|
@ -414,7 +414,7 @@ appear.
|
|||
(*LIMIT_MATCH=d) set the match limit to d (decimal number)
|
||||
(*LIMIT_RECURSION=d) set the recursion limit to d (decimal number)
|
||||
(*NOTEMPTY) set PCRE2_NOTEMPTY when matching
|
||||
(*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching
|
||||
(*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching
|
||||
(*NO_AUTO_POSSESS) no auto-possessification (PCRE2_NO_AUTO_POSSESS)
|
||||
(*NO_START_OPT) no start-match optimization (PCRE2_NO_START_OPTIMIZE)
|
||||
(*UTF) set appropriate UTF mode for the library in use
|
||||
|
|
|
@ -476,7 +476,7 @@ about the pattern:
|
|||
/I info show info about compiled pattern
|
||||
hex pattern is coded in hexadecimal
|
||||
jit[=<number>] use JIT
|
||||
jitverify verify JIT use
|
||||
jitverify verify JIT use
|
||||
locale=<name> use this locale
|
||||
memory show memory used
|
||||
newline=<type> set newline type
|
||||
|
@ -565,7 +565,7 @@ number in the range 0 to 7:
|
|||
7 all three modes
|
||||
</pre>
|
||||
If no number is given, 7 is assumed. If JIT compilation is successful, the
|
||||
compiled JIT code will automatically be used when <b>pcre2_match()</b> is run
|
||||
compiled JIT code will automatically be used when <b>pcre2_match()</b> is run
|
||||
for the appropriate type of match, except when incompatible run-time options
|
||||
are specified. For more details, see the
|
||||
<a href="pcre2jit.html"><b>pcre2jit</b></a>
|
||||
|
@ -710,7 +710,7 @@ for a description of their effects.
|
|||
partial_hard (or ph) set PCRE2_PARTIAL_HARD
|
||||
partial_soft (or ps) set PCRE2_PARTIAL_SOFT
|
||||
</pre>
|
||||
The partial matching modifiers are provided with abbreviations because they
|
||||
The partial matching modifiers are provided with abbreviations because they
|
||||
appear frequently in tests.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -892,8 +892,8 @@ until it finds the minimum values for each parameter that allow
|
|||
<b>pcre2_match()</b> to complete without error.
|
||||
</P>
|
||||
<P>
|
||||
If JIT is being used, only the match limit is relevant. If DFA matching is
|
||||
being used, neither limit is relevant, and this modifier is ignored (with a
|
||||
If JIT is being used, only the match limit is relevant. If DFA matching is
|
||||
being used, neither limit is relevant, and this modifier is ignored (with a
|
||||
warning message).
|
||||
</P>
|
||||
<P>
|
||||
|
@ -939,10 +939,10 @@ appears, though of course it can also be used to set a default in a
|
|||
available for storing matching information. The default is 15.
|
||||
</P>
|
||||
<P>
|
||||
At least one pair of offsets is always created by
|
||||
<b>pcre2_match_data_create()</b>, for matching with PCRE2's native API, so a
|
||||
value of 0 is the same as 1. However a value of 0 is useful when testing the
|
||||
POSIX API because it causes <b>regexec()</b> to be called with a NULL capture
|
||||
At least one pair of offsets is always created by
|
||||
<b>pcre2_match_data_create()</b>, for matching with PCRE2's native API, so a
|
||||
value of 0 is the same as 1. However a value of 0 is useful when testing the
|
||||
POSIX API because it causes <b>regexec()</b> to be called with a NULL capture
|
||||
vector.
|
||||
</P>
|
||||
<br><a name="SEC12" href="#TOC1">THE ALTERNATIVE MATCHING FUNCTION</a><br>
|
||||
|
|
|
@ -67,7 +67,7 @@ In UTF modes, the dot metacharacter matches one UTF character instead of a
|
|||
single code unit.
|
||||
</P>
|
||||
<P>
|
||||
The escape sequence \C can be used to match a single code unit, in a UTF mode,
|
||||
The escape sequence \C can be used to match a single code unit, in a UTF mode,
|
||||
but its use can lead to some strange effects because it breaks up multi-unit
|
||||
characters (see the description of \C in the
|
||||
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
|
||||
|
@ -114,8 +114,8 @@ VALIDITY OF UTF STRINGS
|
|||
</b><br>
|
||||
<P>
|
||||
When the PCRE2_UTF option is set, the strings passed as patterns and subjects
|
||||
are (by default) checked for validity on entry to the relevant functions.
|
||||
If an invalid UTF string is passed, an error return is given.
|
||||
are (by default) checked for validity on entry to the relevant functions.
|
||||
If an invalid UTF string is passed, an error return is given.
|
||||
</P>
|
||||
<P>
|
||||
UTF-16 and UTF-32 strings can indicate their endianness by special code knows
|
||||
|
|
|
@ -23,11 +23,11 @@ of Unicode in use can be discovered by running
|
|||
.sp
|
||||
pcre2test -C
|
||||
.P
|
||||
The three libraries contain identical sets of functions, with names ending in
|
||||
_8, _16, or _32, respectively (for example, \fBpcre2_compile_8()\fP). However,
|
||||
by defining PCRE2_CODE_UNIT_WIDTH to be 8, 16, or 32, a program that uses just
|
||||
The three libraries contain identical sets of functions, with names ending in
|
||||
_8, _16, or _32, respectively (for example, \fBpcre2_compile_8()\fP). However,
|
||||
by defining PCRE2_CODE_UNIT_WIDTH to be 8, 16, or 32, a program that uses just
|
||||
one code unit width can be written using generic names such as
|
||||
\fBpcre2_compile()\fP, and the documentation is written assuming that this is
|
||||
\fBpcre2_compile()\fP, and the documentation is written assuming that this is
|
||||
the case.
|
||||
.P
|
||||
In addition to the Perl-compatible matching function, PCRE2 contains an
|
||||
|
|
|
@ -158,8 +158,8 @@ REVISION
|
|||
Last updated: 28 September 2014
|
||||
Copyright (c) 1997-2014 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2API(3) Library Functions Manual PCRE2API(3)
|
||||
|
||||
|
||||
|
@ -2529,8 +2529,8 @@ REVISION
|
|||
Last updated: 16 October 2014
|
||||
Copyright (c) 1997-2014 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2BUILD(3) Library Functions Manual PCRE2BUILD(3)
|
||||
|
||||
|
||||
|
@ -2981,8 +2981,8 @@ REVISION
|
|||
Last updated: 28 September 2014
|
||||
Copyright (c) 1997-2014 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2CALLOUT(3) Library Functions Manual PCRE2CALLOUT(3)
|
||||
|
||||
|
||||
|
@ -3217,8 +3217,8 @@ REVISION
|
|||
Last updated: 19 October 2014
|
||||
Copyright (c) 1997-2014 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2COMPAT(3) Library Functions Manual PCRE2COMPAT(3)
|
||||
|
||||
|
||||
|
@ -3403,8 +3403,8 @@ REVISION
|
|||
Last updated: 28 September 2014
|
||||
Copyright (c) 1997-2014 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2JIT(3) Library Functions Manual PCRE2JIT(3)
|
||||
|
||||
|
||||
|
@ -3758,8 +3758,8 @@ REVISION
|
|||
Last updated: 29 September 2014
|
||||
Copyright (c) 1997-2014 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2LIMITS(3) Library Functions Manual PCRE2LIMITS(3)
|
||||
|
||||
|
||||
|
@ -3826,8 +3826,8 @@ REVISION
|
|||
Last updated: 29 September 2014
|
||||
Copyright (c) 1997-2014 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2MATCHING(3) Library Functions Manual PCRE2MATCHING(3)
|
||||
|
||||
|
||||
|
@ -4045,8 +4045,8 @@ REVISION
|
|||
Last updated: 29 September 2014
|
||||
Copyright (c) 1997-2014 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2PARTIAL(3) Library Functions Manual PCRE2PARTIAL(3)
|
||||
|
||||
|
||||
|
@ -4485,8 +4485,8 @@ REVISION
|
|||
Last updated: 14 October 2014
|
||||
Copyright (c) 1997-2014 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2UNICODE(3) Library Functions Manual PCRE2UNICODE(3)
|
||||
|
||||
|
||||
|
@ -4711,5 +4711,5 @@ REVISION
|
|||
Last updated: 16 September 2014
|
||||
Copyright (c) 1997-2014 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
206
doc/pcre2api.3
206
doc/pcre2api.3
|
@ -250,7 +250,7 @@ to be included in an environment where the value of PCRE2_CODE_UNIT_WIDTH is
|
|||
unknown should also use the real function names. (Unfortunately, it is not
|
||||
possible in C code to save and restore the value of a macro.)
|
||||
.P
|
||||
If PCRE2_CODE_UNIT_WIDTH is not defined before including \fBpcre2.h\fP, a
|
||||
If PCRE2_CODE_UNIT_WIDTH is not defined before including \fBpcre2.h\fP, a
|
||||
compiler error occurs.
|
||||
.P
|
||||
When using multiple libraries in an application, you must take care when
|
||||
|
@ -392,7 +392,7 @@ section on \fBpcre2_match()\fP options
|
|||
below.
|
||||
.P
|
||||
The choice of newline convention does not affect the interpretation of
|
||||
the \en or \er escape sequences, nor does it affect what \eR matches, which has
|
||||
the \en or \er escape sequences, nor does it affect what \eR matches, which has
|
||||
its own separate control.
|
||||
.
|
||||
.
|
||||
|
@ -509,7 +509,7 @@ The memory used for a general context should be freed by calling:
|
|||
.SS "The compile context"
|
||||
.rs
|
||||
.sp
|
||||
A compile context is required if you want to change the default values of any
|
||||
A compile context is required if you want to change the default values of any
|
||||
of the following compile-time parameters:
|
||||
.sp
|
||||
What \eR matches (Unicode newlines or CR, LF, CRLF only);
|
||||
|
@ -518,7 +518,7 @@ of the following compile-time parameters:
|
|||
The compile time nested parentheses limit;
|
||||
An external function for stack checking.
|
||||
.sp
|
||||
A compile context is also required if you are using custom memory management.
|
||||
A compile context is also required if you are using custom memory management.
|
||||
If none of these apply, just pass NULL as the context argument of
|
||||
\fIpcre2_compile()\fP.
|
||||
.P
|
||||
|
@ -534,8 +534,8 @@ A compile context is created, copied, and freed by the following functions:
|
|||
.B void pcre2_compile_context_free(pcre2_compile_context *\fIccontext\fP);
|
||||
.fi
|
||||
.sp
|
||||
A compile context is created with default values for its parameters. These can
|
||||
be changed by calling the following functions, which return 0 on success, or
|
||||
A compile context is created with default values for its parameters. These can
|
||||
be changed by calling the following functions, which return 0 on success, or
|
||||
PCRE2_ERROR_BADDATA if invalid data is detected.
|
||||
.sp
|
||||
.nf
|
||||
|
@ -543,11 +543,11 @@ PCRE2_ERROR_BADDATA if invalid data is detected.
|
|||
.B " uint32_t \fIvalue\fP);"
|
||||
.fi
|
||||
.sp
|
||||
The value must be PCRE2_BSR_ANYCRLF, to specify that \eR matches only CR, LF,
|
||||
or CRLF, or PCRE2_BSR_UNICODE, to specify that \eR matches any Unicode line
|
||||
ending sequence. The value of this parameter does not affect what is compiled;
|
||||
The value must be PCRE2_BSR_ANYCRLF, to specify that \eR matches only CR, LF,
|
||||
or CRLF, or PCRE2_BSR_UNICODE, to specify that \eR matches any Unicode line
|
||||
ending sequence. The value of this parameter does not affect what is compiled;
|
||||
it is just saved with the compiled pattern. The value is used by the JIT
|
||||
compiler and by the two interpreted matching functions, \fIpcre2_match()\fP and
|
||||
compiler and by the two interpreted matching functions, \fIpcre2_match()\fP and
|
||||
\fIpcre2_dfa_match()\fP.
|
||||
.sp
|
||||
.nf
|
||||
|
@ -555,7 +555,7 @@ compiler and by the two interpreted matching functions, \fIpcre2_match()\fP and
|
|||
.B " const unsigned char *\fItables\fP);"
|
||||
.fi
|
||||
.sp
|
||||
The value must be the result of a call to \fIpcre2_maketables()\fP, whose only
|
||||
The value must be the result of a call to \fIpcre2_maketables()\fP, whose only
|
||||
argument is a general context. This function builds a set of character tables
|
||||
in the current locale.
|
||||
.sp
|
||||
|
@ -564,9 +564,9 @@ in the current locale.
|
|||
.B " uint32_t \fIvalue\fP);"
|
||||
.fi
|
||||
.sp
|
||||
This specifies which characters or character sequences are to be recognized as
|
||||
This specifies which characters or character sequences are to be recognized as
|
||||
newlines. The value must be one of PCRE2_NEWLINE_CR (carriage return only),
|
||||
PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the two-character
|
||||
PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the two-character
|
||||
sequence CR followed by LF), PCRE2_NEWLINE_ANYCRLF (any of the above), or
|
||||
PCRE2_NEWLINE_ANY (any Unicode newline sequence).
|
||||
.P
|
||||
|
@ -591,7 +591,7 @@ using up too much system stack when being compiled.
|
|||
.fi
|
||||
.sp
|
||||
There is at least one application that runs PCRE2 in threads with very limited
|
||||
system stack, where running out of stack is to be avoided at all costs. The
|
||||
system stack, where running out of stack is to be avoided at all costs. The
|
||||
parenthesis limit above cannot take account of how much stack is actually
|
||||
available. For a finer control, you can supply a function that is called
|
||||
whenever \fBpcre2_compile()\fP starts to compile a parenthesized part of a
|
||||
|
@ -603,20 +603,20 @@ function should return zero if all is well, or non-zero to force an error.
|
|||
.SS "The match context"
|
||||
.rs
|
||||
.sp
|
||||
A match context is required if you want to change the default values of any
|
||||
A match context is required if you want to change the default values of any
|
||||
of the following match-time parameters:
|
||||
.sp
|
||||
What \eR matches (Unicode newlines or CR, LF, CRLF only);
|
||||
A callout function;
|
||||
The limit for calling \fImatch()\fP;
|
||||
The limit for calling \fImatch()\fP;
|
||||
The limit for calling \fImatch()\fP recursively;
|
||||
The newline character sequence;
|
||||
.sp
|
||||
A match context is also required if you are using custom memory management.
|
||||
If none of these apply, just pass NULL as the context argument of
|
||||
A match context is also required if you are using custom memory management.
|
||||
If none of these apply, just pass NULL as the context argument of
|
||||
\fBpcre2_match()\fP, \fBpcre2_dfa_match()\fP, or \fBpcre2_jit_match()\fP.
|
||||
Changing the newline value or what \eR matches at match time disables the use
|
||||
of JIT via \fBpcre2_match()\fP.
|
||||
Changing the newline value or what \eR matches at match time disables the use
|
||||
of JIT via \fBpcre2_match()\fP.
|
||||
.P
|
||||
A match context is created, copied, and freed by the following functions:
|
||||
.sp
|
||||
|
@ -630,8 +630,8 @@ A match context is created, copied, and freed by the following functions:
|
|||
.B void pcre2_match_context_free(pcre2_match_context *\fImcontext\fP);
|
||||
.fi
|
||||
.sp
|
||||
A match context is created with default values for its parameters. These can
|
||||
be changed by calling the following functions, which return 0 on success, or
|
||||
A match context is created with default values for its parameters. These can
|
||||
be changed by calling the following functions, which return 0 on success, or
|
||||
PCRE2_ERROR_BADDATA if invalid data is detected.
|
||||
.sp
|
||||
.nf
|
||||
|
@ -662,7 +662,7 @@ calls repeatedly (sometimes recursively). The limit set by \fImatch_limit\fP is
|
|||
imposed on the number of times this function is called during a match, which
|
||||
has the effect of limiting the amount of backtracking that can take place. For
|
||||
patterns that are not anchored, the count restarts from zero for each position
|
||||
in the subject string. This limit is not relevant to \fBpcre2_dfa_match()\fP,
|
||||
in the subject string. This limit is not relevant to \fBpcre2_dfa_match()\fP,
|
||||
which ignores it.
|
||||
.P
|
||||
When \fBpcre2_match()\fP is called with a pattern that was successfully studied
|
||||
|
@ -698,7 +698,7 @@ This limit is of use only if it is set smaller than \fImatch_limit\fP.
|
|||
Limiting the recursion depth limits the amount of system stack that can be
|
||||
used, or, when PCRE2 has been compiled to use memory on the heap instead of the
|
||||
stack, the amount of heap memory that can be used. This limit is not relevant,
|
||||
and is ignored, when matching is done using JIT compiled code or by the
|
||||
and is ignored, when matching is done using JIT compiled code or by the
|
||||
\fBpcre2_dfa_match()\fP function.
|
||||
.P
|
||||
The default value for \fIrecursion_limit\fP can be set when PCRE2 is built; the
|
||||
|
@ -720,9 +720,9 @@ limit is set, less than the default.
|
|||
.B " void (*\fIprivate_free\fP)(void *, void *), void *\fImemory_data\fP);"
|
||||
.fi
|
||||
.sp
|
||||
This function sets up two additional custom memory management functions for use
|
||||
This function sets up two additional custom memory management functions for use
|
||||
by \fBpcre2_match()\fP when PCRE2 is compiled to use the heap for remembering
|
||||
backtracking data, instead of recursive function calls that use the system
|
||||
backtracking data, instead of recursive function calls that use the system
|
||||
stack. There is a discussion about PCRE2's stack usage in the
|
||||
.\" HREF
|
||||
\fBpcre2stack\fP
|
||||
|
@ -738,7 +738,7 @@ limited stacks. Because of the greater use of memory management,
|
|||
general custom memory functions are provided so that special-purpose external
|
||||
code can be used for this case, because the memory blocks are all the same
|
||||
size. The blocks are retained by \fBpcre2_match()\fP until it is about to exit
|
||||
so that they can be re-used when possible during the match. In the absence of
|
||||
so that they can be re-used when possible during the match. In the absence of
|
||||
these functions, the normal custom memory management functions are used, if
|
||||
supplied, otherwise the system functions.
|
||||
.
|
||||
|
@ -760,7 +760,7 @@ required. The second argument is a pointer to memory into which the information
|
|||
is placed. If NULL is passed, the function returns the amount of memory that is
|
||||
needed for the requested information. For calls that return numerical values,
|
||||
the value is in bytes; when requesting these values, \fIwhere\fP should point
|
||||
to appropriately aligned memory. For calls that return strings, the required
|
||||
to appropriately aligned memory. For calls that return strings, the required
|
||||
length is given in code units, not counting the terminating zero.
|
||||
.P
|
||||
When requesting information, the returned value from \fBpcre2_config()\fP is
|
||||
|
@ -783,7 +783,7 @@ compiling is available; otherwise it is set to zero.
|
|||
PCRE2_CONFIG_JITTARGET
|
||||
.sp
|
||||
The \fIwhere\fP argument should point to a buffer that is at least 48 code
|
||||
units long. (The exact length needed can be found by calling
|
||||
units long. (The exact length needed can be found by calling
|
||||
\fBpcre2_config()\fP with \fBwhere\fP set to NULL.) The buffer is filled with a
|
||||
string that contains the name of the architecture for which the JIT compiler is
|
||||
configured, for example "x86 32bit (little endian + unaligned)". If JIT support
|
||||
|
@ -794,9 +794,9 @@ the string, in code units, is returned.
|
|||
.sp
|
||||
The output is an integer that contains the number of bytes used for internal
|
||||
linkage in compiled regular expressions. When PCRE2 is configured, the value
|
||||
can be set to 2, 3, or 4, with the default being 2. This is the value that is
|
||||
returned by \fBpcre2_config()\fP. However, when the 16-bit library is compiled,
|
||||
a value of 3 is rounded up to 4, and when the 32-bit library is compiled,
|
||||
can be set to 2, 3, or 4, with the default being 2. This is the value that is
|
||||
returned by \fBpcre2_config()\fP. However, when the 16-bit library is compiled,
|
||||
a value of 3 is rounded up to 4, and when the 32-bit library is compiled,
|
||||
internal linkages always use 4 bytes, so the configured value is not relevant.
|
||||
.P
|
||||
The default value of 2 for the 8-bit and 16-bit libraries is sufficient for all
|
||||
|
@ -820,7 +820,7 @@ that is recognized as meaning "newline". The values are:
|
|||
3 Carriage return, linefeed (CRLF)
|
||||
4 Any Unicode line ending
|
||||
5 Any of CR, LF, or CRLF
|
||||
.sp
|
||||
.sp
|
||||
The default should normally correspond to the standard sequence for your
|
||||
operating system.
|
||||
.sp
|
||||
|
@ -849,7 +849,7 @@ compiled. The output is zero if PCRE2 was compiled to use blocks of data on the
|
|||
heap instead of recursive function calls.
|
||||
.sp
|
||||
PCRE2_CONFIG_UNICODE_VERSION
|
||||
.sp
|
||||
.sp
|
||||
The \fIwhere\fP argument should point to a buffer that is at least 24 code
|
||||
units long. (The exact length needed can be found by calling
|
||||
\fBpcre2_config()\fP with \fBwhere\fP set to NULL.) If PCRE2 has been compiled
|
||||
|
@ -884,15 +884,15 @@ units) is returned.
|
|||
.B pcre2_code_free(pcre2_code *\fIcode\fP);
|
||||
.fi
|
||||
.P
|
||||
This function compiles a pattern, defined by a pointer to a string of code
|
||||
units and a length, into an internal form. If the pattern is zero-terminated,
|
||||
the length should be specified as PCRE2_ZERO_TERMINATED. The function returns a
|
||||
pointer to a block of memory that contains the compiled pattern and related
|
||||
data. The caller must free the memory by calling \fBpcre2_code_free()\fP when
|
||||
This function compiles a pattern, defined by a pointer to a string of code
|
||||
units and a length, into an internal form. If the pattern is zero-terminated,
|
||||
the length should be specified as PCRE2_ZERO_TERMINATED. The function returns a
|
||||
pointer to a block of memory that contains the compiled pattern and related
|
||||
data. The caller must free the memory by calling \fBpcre2_code_free()\fP when
|
||||
it is no longer needed.
|
||||
.P
|
||||
If the compile context argument \fIccontext\fP is NULL, the memory is obtained
|
||||
by calling \fBmalloc()\fP. Otherwise, it is obtained from the same memory
|
||||
If the compile context argument \fIccontext\fP is NULL, the memory is obtained
|
||||
by calling \fBmalloc()\fP. Otherwise, it is obtained from the same memory
|
||||
function that was used for the compile context.
|
||||
.P
|
||||
The \fIoptions\fP argument contains various bit settings that affect the
|
||||
|
@ -903,14 +903,14 @@ within the pattern (see the detailed description in the
|
|||
.\" HREF
|
||||
\fBpcre2pattern\fP
|
||||
.\"
|
||||
documentation).
|
||||
documentation).
|
||||
.P
|
||||
For those options that can be different in different parts of the pattern, the
|
||||
contents of the \fIoptions\fP argument specifies their settings at the start of
|
||||
compilation. The PCRE2_ANCHORED and PCRE2_NO_UTF_CHECK options can be set at
|
||||
the time of matching as well as at compile time.
|
||||
.P
|
||||
Other, less frequently required compile-time parameters (for example, the
|
||||
Other, less frequently required compile-time parameters (for example, the
|
||||
newline setting) can be provided in a compile context (as described
|
||||
.\" HTML <a href="#compilecontext">
|
||||
.\" </a>
|
||||
|
@ -936,10 +936,10 @@ This code fragment shows a typical straightforward call to
|
|||
.sp
|
||||
pcre2_code *re;
|
||||
PCRE2_SIZE erroffset;
|
||||
int errorcode;
|
||||
int errorcode;
|
||||
re = pcre2_compile(
|
||||
"^A.*Z", /* the pattern */
|
||||
PCRE2_ZERO_TERMINATED, /* the pattern is zero-terminated */
|
||||
PCRE2_ZERO_TERMINATED, /* the pattern is zero-terminated */
|
||||
0, /* default options */
|
||||
&errorcode, /* for error code */
|
||||
&erroffset, /* for error offset */
|
||||
|
@ -958,14 +958,14 @@ Perl.
|
|||
.sp
|
||||
PCRE2_ALLOW_EMPTY_CLASS
|
||||
.sp
|
||||
By default, for compatibility with Perl, a closing square bracket that
|
||||
immediately follows an opening one is treated as a data character for the
|
||||
class. When PCRE2_ALLOW_EMPTY_CLASS is set, it terminates the class, which
|
||||
therefore contains no characters and so can never match.
|
||||
By default, for compatibility with Perl, a closing square bracket that
|
||||
immediately follows an opening one is treated as a data character for the
|
||||
class. When PCRE2_ALLOW_EMPTY_CLASS is set, it terminates the class, which
|
||||
therefore contains no characters and so can never match.
|
||||
.sp
|
||||
PCRE2_ALT_BSUX
|
||||
.sp
|
||||
This option request alternative handling of three escape sequences, which
|
||||
This option request alternative handling of three escape sequences, which
|
||||
makes PCRE2's behaviour more like ECMAscript (aka JavaScript). When it is set:
|
||||
.P
|
||||
(1) \eU matches an upper case "U" character; by default \eU causes a compile
|
||||
|
@ -996,7 +996,7 @@ documentation.
|
|||
.sp
|
||||
If this bit is set, letters in the pattern match both upper and lower case
|
||||
letters in the subject. It is equivalent to Perl's /i option, and it can be
|
||||
changed within a pattern by a (?i) option setting.
|
||||
changed within a pattern by a (?i) option setting.
|
||||
.sp
|
||||
PCRE2_DOLLAR_ENDONLY
|
||||
.sp
|
||||
|
@ -1052,7 +1052,7 @@ sequence at the start of the pattern, as described in the section entitled
|
|||
.\" </a>
|
||||
"Newline conventions"
|
||||
.\"
|
||||
in the \fBpcre2pattern\fP documentation. A default is defined when PCRE2 is
|
||||
in the \fBpcre2pattern\fP documentation. A default is defined when PCRE2 is
|
||||
built.
|
||||
.sp
|
||||
PCRE2_FIRSTLINE
|
||||
|
@ -1067,7 +1067,7 @@ If this option is set, a back reference to an unset subpattern group matches an
|
|||
empty string (by default this causes the current matching alternative to fail).
|
||||
A pattern such as (\e1)(a) succeeds when this option is set (assuming it can
|
||||
find an "a" in the subject), whereas it fails by default, for Perl
|
||||
compatibility. Setting this option makes PCRE2 behave more like ECMAscript (aka
|
||||
compatibility. Setting this option makes PCRE2 behave more like ECMAscript (aka
|
||||
JavaScript).
|
||||
.sp
|
||||
PCRE2_MULTILINE
|
||||
|
@ -1091,10 +1091,10 @@ occurrences of ^ or $ in a pattern, setting PCRE2_MULTILINE has no effect.
|
|||
PCRE2_NEVER_UCP
|
||||
.sp
|
||||
This option locks out the use of Unicode properties for handling \eB, \eb, \eD,
|
||||
\ed, \eS, \es, \eW, \ew, and some of the POSIX character classes, as described
|
||||
for the PCRE2_UCP option below. In particular, it prevents the creator of the
|
||||
pattern from enabling this facility by starting the pattern with (*UCP). This
|
||||
may be useful in applications that process patterns from external sources. The
|
||||
\ed, \eS, \es, \eW, \ew, and some of the POSIX character classes, as described
|
||||
for the PCRE2_UCP option below. In particular, it prevents the creator of the
|
||||
pattern from enabling this facility by starting the pattern with (*UCP). This
|
||||
may be useful in applications that process patterns from external sources. The
|
||||
option combination PCRE_UCP and PCRE_NEVER_UCP causes an error.
|
||||
.sp
|
||||
PCRE2_NEVER_UTF
|
||||
|
@ -1167,7 +1167,7 @@ pattern
|
|||
(*MARK:A)(X|Y)
|
||||
.sp
|
||||
The minimum length for a match is one character. If the subject is "ABC", there
|
||||
will be attempts to match "ABC", "BC", and "C". An attempt to match an empty
|
||||
will be attempts to match "ABC", "BC", and "C". An attempt to match an empty
|
||||
string at the end of the subject does not take place, because PCRE2 knows that
|
||||
the subject is now too short, and so the (*MARK) is never encountered. In this
|
||||
case, the optimization does not affect the overall match result, which is still
|
||||
|
@ -1194,7 +1194,7 @@ in the
|
|||
.\" HREF
|
||||
\fBpcre2unicode\fP
|
||||
.\"
|
||||
document.
|
||||
document.
|
||||
If an invalid UTF sequence is found, \fBpcre2_compile()\fP returns a negative
|
||||
error code.
|
||||
.P
|
||||
|
@ -1385,9 +1385,9 @@ The possible values for the second argument are defined in \fBpcre2.h\fP, and
|
|||
are as follows:
|
||||
.sp
|
||||
PCRE2_INFO_ALLOPTIONS
|
||||
PCRE2_INFO_ARGOPTIONS
|
||||
PCRE2_INFO_ARGOPTIONS
|
||||
.sp
|
||||
Return a copy of the pattern's options. The third argument should point to a
|
||||
Return a copy of the pattern's options. The third argument should point to a
|
||||
\fBuint32_t\fP variable. PCRE2_INFO_ARGOPTIONS returns exactly the options that
|
||||
were passed to \fBpcre2_compile()\fP, whereas PCRE2_INFO_ALLOPTIONS returns
|
||||
the compile options as modified by any top-level option settings at the start
|
||||
|
@ -1406,7 +1406,7 @@ alternatives begin with one of the following:
|
|||
.* if PCRE2_DOTALL is set and there are no back
|
||||
references to the subpattern in which .* appears
|
||||
.sp
|
||||
For such patterns, the PCRE2_ANCHORED bit is set in the options returned for
|
||||
For such patterns, the PCRE2_ANCHORED bit is set in the options returned for
|
||||
PCRE2_INFO_ALLOPTIONS.
|
||||
.sp
|
||||
PCRE2_INFO_BACKREFMAX
|
||||
|
@ -1490,7 +1490,7 @@ return zero. The third argument should point to a \fBsize_t\fP variable.
|
|||
.sp
|
||||
Returns 1 if there is a rightmost literal code unit that must exist in any
|
||||
matched string, other than at its start. The third argument should point to an
|
||||
\fBuint32_t\fP variable. If there is no such value, 0 is returned. When 1 is
|
||||
\fBuint32_t\fP variable. If there is no such value, 0 is returned. When 1 is
|
||||
returned, the code unit value itself can be retrieved using
|
||||
PCRE2_INFO_LASTCODEUNIT.
|
||||
.P
|
||||
|
@ -1617,7 +1617,7 @@ values are:
|
|||
3 Carriage return, linefeed (CRLF)
|
||||
4 Any Unicode line ending
|
||||
5 Any of CR, LF, or CRLF
|
||||
.sp
|
||||
.sp
|
||||
The default can be overridden when a pattern is matched.
|
||||
.sp
|
||||
PCRE2_INFO_RECURSIONLIMIT
|
||||
|
@ -1652,11 +1652,11 @@ pattern with the JIT compiler does not alter the value returned by this option.
|
|||
.B void pcre2_match_data_free(pcre2_match_data *\fImatch_data\fP);
|
||||
.fi
|
||||
.P
|
||||
Information about successful and unsuccessful matches is placed in a match
|
||||
Information about successful and unsuccessful matches is placed in a match
|
||||
data block, which is an opaque structure that is accessed by function calls. In
|
||||
particular, the match data block contains a vector of offsets into the subject
|
||||
string that define the matched part of the subject and any substrings that were
|
||||
capured. This is know as the \fIovector\fP.
|
||||
capured. This is know as the \fIovector\fP.
|
||||
.P
|
||||
Before calling \fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP you must create a
|
||||
match data block by calling one of the creation functions above. For
|
||||
|
@ -1669,11 +1669,11 @@ pair is imposed by \fBpcre2_match_data_create()\fP, so it is always possible to
|
|||
return the overall matched string.
|
||||
.P
|
||||
For \fBpcre2_match_data_create_from_pattern()\fP, the first argument is a
|
||||
pointer to a compiled pattern. In this case the ovector is created to be
|
||||
pointer to a compiled pattern. In this case the ovector is created to be
|
||||
exactly the right size to hold all the substrings a pattern might capture.
|
||||
.P
|
||||
The second argument of both these functions ia a pointer to a general context,
|
||||
which can specify custom memory management for obtaining the memory for the
|
||||
The second argument of both these functions ia a pointer to a general context,
|
||||
which can specify custom memory management for obtaining the memory for the
|
||||
match data block. If you are not using custom memory management, pass NULL.
|
||||
.P
|
||||
A match data block can be used many times, with the same or different compiled
|
||||
|
@ -1729,8 +1729,8 @@ Here is an example of a simple call to \fBpcre2_match()\fP:
|
|||
match_data, /* the match data block */
|
||||
NULL); /* a match context; NULL means use defaults */
|
||||
.sp
|
||||
If the subject string is zero-terminated, the length can be given as
|
||||
PCRE2_ZERO_TERMINATED. A match context must be provided if certain less common
|
||||
If the subject string is zero-terminated, the length can be given as
|
||||
PCRE2_ZERO_TERMINATED. A match context must be provided if certain less common
|
||||
matching parameters are to be changed. For details, see the section on
|
||||
.\" HTML <a href="#matchcontext">
|
||||
.\" </a>
|
||||
|
@ -1746,7 +1746,7 @@ The subject string is passed to \fBpcre2_match()\fP as a pointer in
|
|||
\fIsubject\fP, a length in \fIlength\fP, and a starting offset in
|
||||
\fIstartoffset\fP. The length and offset are in code units, not characters.
|
||||
That is, they are in bytes for the 8-bit library, 16-bit code units for the
|
||||
16-bit library, and 32-bit code units for the 32-bit library, whether or not
|
||||
16-bit library, and 32-bit code units for the 32-bit library, whether or not
|
||||
UTF processing is enabled.
|
||||
.P
|
||||
If \fIstartoffset\fP is greater than the length of the subject,
|
||||
|
@ -1755,7 +1755,7 @@ zero, the search for a match starts at the beginning of the subject, and this
|
|||
is by far the most common case. In UTF-8 or UTF-16 mode, the starting offset
|
||||
must point to the start of a character, or to the end of the subject (in UTF-32
|
||||
mode, one code unit equals one character, so all offsets are valid). Like the
|
||||
pattern string, the subject may contain binary zeroes.
|
||||
pattern string, the subject may contain binary zeroes.
|
||||
.P
|
||||
A non-zero starting offset is useful when searching for another match in the
|
||||
same subject by calling \fBpcre2_match()\fP again after a previous success.
|
||||
|
@ -1816,7 +1816,7 @@ JIT matching is disabled and the normal interpretive code in
|
|||
The PCRE2_ANCHORED option limits \fBpcre2_match()\fP to matching at the first
|
||||
matching position. If a pattern was compiled with PCRE2_ANCHORED, or turned out
|
||||
to be anchored by virtue of its contents, it cannot be made unachored at
|
||||
matching time. Note that setting the option at match time disables JIT
|
||||
matching time. Note that setting the option at match time disables JIT
|
||||
matching.
|
||||
.sp
|
||||
PCRE2_NOTBOL
|
||||
|
@ -1880,13 +1880,13 @@ in the
|
|||
.\" HREF
|
||||
\fBpcre2unicode\fP
|
||||
.\"
|
||||
page.
|
||||
page.
|
||||
.P
|
||||
If you know that your subject is valid, and you want to skip these checks for
|
||||
performance reasons, you can set the PCRE2_NO_UTF_CHECK option when calling
|
||||
\fBpcre2_match()\fP. You might want to do this for the second and subsequent
|
||||
calls to \fBpcre2_match()\fP if you are making repeated calls to find all the
|
||||
matches in a single subject string.
|
||||
matches in a single subject string.
|
||||
.P
|
||||
NOTE: When PCRE2_NO_UTF_CHECK is set, the effect of passing an invalid string
|
||||
as a subject, or an invalid value of \fIstartoffset\fP, is undefined. Your
|
||||
|
@ -1921,10 +1921,10 @@ documentation.
|
|||
.
|
||||
.SH "NEWLINE HANDLING WHEN MATCHING"
|
||||
.rs
|
||||
.sp
|
||||
When PCRE2 is built, a default newline convention is set; this is usually the
|
||||
standard convention for the operating system. The default can be overridden in
|
||||
either a
|
||||
.sp
|
||||
When PCRE2 is built, a default newline convention is set; this is usually the
|
||||
standard convention for the operating system. The default can be overridden in
|
||||
either a
|
||||
.\" HTML <a href="#compilecontext">
|
||||
.\" </a>
|
||||
compile context
|
||||
|
@ -1972,7 +1972,7 @@ valid newline sequence and explicit \er or \en escapes appear in the pattern.
|
|||
.fi
|
||||
.P
|
||||
In general, a pattern matches a certain portion of the subject, and in
|
||||
addition, further substrings from the subject may be picked out by
|
||||
addition, further substrings from the subject may be picked out by
|
||||
parenthesized parts of the pattern. Following the usage in Jeffrey Friedl's
|
||||
book, this is called "capturing" in what follows, and the phrase "capturing
|
||||
subpattern" is used for a fragment of a pattern that picks out a substring.
|
||||
|
@ -1982,14 +1982,14 @@ used to find out how many capturing subpatterns there are in a compiled
|
|||
pattern.
|
||||
.P
|
||||
The overall matched string and any captured substrings are returned to the
|
||||
caller via a vector of PCRE2_SIZE values, called the \fBovector\fP. This is
|
||||
caller via a vector of PCRE2_SIZE values, called the \fBovector\fP. This is
|
||||
contained within the
|
||||
.\" HTML <a href="#matchdatablock">
|
||||
.\" </a>
|
||||
match data block.
|
||||
.\"
|
||||
You can obtain direct access to the ovector by calling
|
||||
\fBpcre2_get_ovector_pointer()\fP to find its address, and
|
||||
You can obtain direct access to the ovector by calling
|
||||
\fBpcre2_get_ovector_pointer()\fP to find its address, and
|
||||
\fBpcre2_get_ovector_count()\fP to find the number of pairs of values it
|
||||
contains. Alternatively, you can use the auxiliary functions for accessing
|
||||
captured substrings
|
||||
|
@ -2065,17 +2065,17 @@ had.
|
|||
.B PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *\fImatch_data\fP);
|
||||
.fi
|
||||
.P
|
||||
In addition to the offsets in the ovector, other information about a match is
|
||||
In addition to the offsets in the ovector, other information about a match is
|
||||
retained in the match data block and can be retrieved by the above functions.
|
||||
.P
|
||||
When a (*MARK) name is to be passed back, \fBpcre2_get_mark()\fP returns a
|
||||
pointer to the zero-terminated name, which is within the compiled pattern.
|
||||
Otherwise NULL is returned. A (*MARK) name may be available after a failed
|
||||
pointer to the zero-terminated name, which is within the compiled pattern.
|
||||
Otherwise NULL is returned. A (*MARK) name may be available after a failed
|
||||
match or a partial match, as well as after a successful one.
|
||||
.P
|
||||
The offset of the character at which the successful match started is
|
||||
returned by \fBpcre2_get_startchar()\fP. This can be different to the value of
|
||||
\fIovector[0]\fP if the pattern contains the \eK escape sequence. Note,
|
||||
\fIovector[0]\fP if the pattern contains the \eK escape sequence. Note,
|
||||
however, the \eK has no effect for a partial match.
|
||||
.
|
||||
.
|
||||
|
@ -2083,7 +2083,7 @@ however, the \eK has no effect for a partial match.
|
|||
.SS "Error return values from \fBpcre2_match()\fP"
|
||||
.rs
|
||||
.sp
|
||||
If \fBpcre2_match()\fP fails, it returns a negative number. This can be
|
||||
If \fBpcre2_match()\fP fails, it returns a negative number. This can be
|
||||
converted to a text string by calling \fBpcre2_get_error_message()\fP. Negative
|
||||
error codes are also returned by other functions, and are documented with them.
|
||||
The codes are given names in the header file. If UTF checking is in force and
|
||||
|
@ -2237,7 +2237,7 @@ extracting it by calling \fBpcre2_substring_length_bynumber()\fP. The first
|
|||
argument is a pointer to the match data block, the second is the group number,
|
||||
and the third is a pointer to a variable into which the length is placed.
|
||||
.P
|
||||
The \fBpcre2_substring_copy_bynumber()\fP function copies one string into a
|
||||
The \fBpcre2_substring_copy_bynumber()\fP function copies one string into a
|
||||
supplied buffer, whereas \fBpcre2_substring_get_bynumber()\fP copies it into
|
||||
new memory, obtained using the same memory allocation function that was used
|
||||
for the match data block. The first two arguments of these functions are a
|
||||
|
@ -2250,10 +2250,10 @@ the buffer and a pointer to a variable that contains its length in code units.
|
|||
This is updated to contain the actual number of code units used, excluding the
|
||||
terminating zero.
|
||||
.P
|
||||
For \fBpcre2_substring_get_bynumber()\fP the third and fourth arguments point
|
||||
to variables that are updated with a pointer to the new memory and the number
|
||||
of code units that comprise the substring, again excluding the terminating
|
||||
zero. When the substring is no longer needed, the memory should be freed by
|
||||
For \fBpcre2_substring_get_bynumber()\fP the third and fourth arguments point
|
||||
to variables that are updated with a pointer to the new memory and the number
|
||||
of code units that comprise the substring, again excluding the terminating
|
||||
zero. When the substring is no longer needed, the memory should be freed by
|
||||
calling \fBpcre2_substring_free()\fP.
|
||||
.P
|
||||
The return value from these functions is zero for success, or one of these
|
||||
|
@ -2266,9 +2266,9 @@ attempt to get memory failed for \fBpcre2_substring_get_bynumber()\fP.
|
|||
.sp
|
||||
PCRE2_ERROR_NOSUBSTRING
|
||||
.sp
|
||||
No substring with the given number was captured. This could be because there is
|
||||
no capturing group of that number in the pattern, or because the group with
|
||||
that number did not participate in the match, or because the ovector was too
|
||||
No substring with the given number was captured. This could be because there is
|
||||
no capturing group of that number in the pattern, or because the group with
|
||||
that number did not participate in the match, or because the ovector was too
|
||||
small to capture that group.
|
||||
.
|
||||
.
|
||||
|
@ -2284,7 +2284,7 @@ small to capture that group.
|
|||
.P
|
||||
The \fBpcre2_substring_list_get()\fP function extracts all available substrings
|
||||
and builds a list of pointers to them, and a second list that contains their
|
||||
lengths (in code units), excluding a terminating zero that is added to each of
|
||||
lengths (in code units), excluding a terminating zero that is added to each of
|
||||
them. All this is done in a single block of memory that is obtained using the
|
||||
same memory allocation function that was used to get the match data block.
|
||||
.P
|
||||
|
@ -2295,7 +2295,7 @@ NULL pointer. The address of the list of lengths is returned via
|
|||
therefore need the lengths, you may supply NULL as the \fBlengthsptr\fP
|
||||
argument to disable the creation of a list of lengths. The yield of the
|
||||
function is zero if all went well, or PCRE2_ERROR_NOMEMORY if the memory block
|
||||
could not be obtained. When the list is no longer needed, it should be freed by
|
||||
could not be obtained. When the list is no longer needed, it should be freed by
|
||||
calling \fBpcre2_substring_list_free()\fP.
|
||||
.P
|
||||
If this function encounters a substring that is unset, which can happen when
|
||||
|
@ -2340,7 +2340,7 @@ name.
|
|||
.P
|
||||
Given the number, you can extract the substring directly, or use one of the
|
||||
functions described in the previous section. For convenience, there are also
|
||||
"byname" functions that correspond to the "bynumber" functions, the only
|
||||
"byname" functions that correspond to the "bynumber" functions, the only
|
||||
difference being that the second argument is a name instead of a number.
|
||||
However, if PCRE2_DUPNAMES is set and there are duplicate names,
|
||||
the behaviour may not be what you want (see the next section).
|
||||
|
@ -2413,7 +2413,7 @@ numbers, and hence the captured data.
|
|||
.sp
|
||||
The traditional matching function uses a similar algorithm to Perl, which stops
|
||||
when it finds the first match, starting at a given point in the subject. If you
|
||||
want to find all possible matches, or the longest possible match at a given
|
||||
want to find all possible matches, or the longest possible match at a given
|
||||
position, consider using the alternative matching function (see below) instead.
|
||||
If you cannot use the alternative function, you can kludge it up by making use
|
||||
of the callout facility, which is described in the
|
||||
|
@ -2614,8 +2614,8 @@ fail, this error is given.
|
|||
.SH "SEE ALSO"
|
||||
.rs
|
||||
.sp
|
||||
\fBpcre2build\fP(3), \fBpcre2libs\fP(3), \fBpcre2callout\fP(3),
|
||||
\fBpcre2matching\fP(3), \fBpcre2partial\fP(3), \fBpcre2posix\fP(3),
|
||||
\fBpcre2build\fP(3), \fBpcre2libs\fP(3), \fBpcre2callout\fP(3),
|
||||
\fBpcre2matching\fP(3), \fBpcre2partial\fP(3), \fBpcre2posix\fP(3),
|
||||
\fBpcre2demo(3)\fP, \fBpcre2sample\fP(3), \fBpcre2stack\fP(3).
|
||||
.
|
||||
.
|
||||
|
|
|
@ -71,11 +71,11 @@ single-byte characters, or UTF-8 strings. You can also build two other
|
|||
libraries, called \fBlibpcre2-16\fP and \fBlibpcre2-32\fP, which process
|
||||
strings that are contained in vectors of 16-bit and 32-bit code units,
|
||||
respectively. These can be interpreted either as single-unit characters or
|
||||
UTF-16/UTF-32 strings. To build these additional libraries, add one or both of
|
||||
UTF-16/UTF-32 strings. To build these additional libraries, add one or both of
|
||||
the following to the \fBconfigure\fP command:
|
||||
.sp
|
||||
--enable-pcre16
|
||||
--enable-pcre32
|
||||
--enable-pcre32
|
||||
.sp
|
||||
If you do not want the 8-bit library, add
|
||||
.sp
|
||||
|
@ -367,7 +367,7 @@ override this value by specifying a run-time option.
|
|||
If you add one of
|
||||
.sp
|
||||
--enable-pcre2test-libreadline
|
||||
--enable-pcre2test-libedit
|
||||
--enable-pcre2test-libedit
|
||||
.sp
|
||||
to the \fBconfigure\fP command, \fBpcre2test\fP is linked with the
|
||||
\fBlibreadline\fP or\fBlibedit\fP library, respectively, and when its input is
|
||||
|
@ -384,8 +384,8 @@ unmodified distribution version of readline is in use), some extra
|
|||
configuration may be necessary. The INSTALL file for \fBlibreadline\fP says
|
||||
this:
|
||||
.sp
|
||||
"Readline uses the termcap functions, but does not link with
|
||||
the termcap or curses library itself, allowing applications
|
||||
"Readline uses the termcap functions, but does not link with
|
||||
the termcap or curses library itself, allowing applications
|
||||
which link with readline the to choose an appropriate library."
|
||||
.sp
|
||||
If your environment has not been set up so that an appropriate library is
|
||||
|
|
|
@ -16,9 +16,9 @@ PCRE2 provides a feature called "callout", which is a means of temporarily
|
|||
passing control to the caller of PCRE2 in the middle of pattern matching. The
|
||||
caller of PCRE2 provides an external function by putting its entry point in
|
||||
a match context (see \fBpcre2_set_callout()\fP) in the
|
||||
.\" HREF
|
||||
\fBpcre2api\fP
|
||||
.\"
|
||||
.\" HREF
|
||||
\fBpcre2api\fP
|
||||
.\"
|
||||
documentation).
|
||||
.P
|
||||
Within a regular expression, (?C) indicates the points at which the external
|
||||
|
|
|
@ -25,7 +25,7 @@ pcre2sample documentation for a short discussion ("man pcre2sample" if you have
|
|||
the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is
|
||||
incompatible with the original PCRE API.
|
||||
|
||||
There are actually three libraries, each supporting a different code unit
|
||||
There are actually three libraries, each supporting a different code unit
|
||||
width. This demonstration program uses the 8-bit library.
|
||||
|
||||
In Unix-like environments, if PCRE2 is installed in your standard system
|
||||
|
@ -56,8 +56,8 @@ the following line. */
|
|||
|
||||
/* #define PCRE2_STATIC */
|
||||
|
||||
/* This macro must be defined before including pcre2.h. For a program that uses
|
||||
only one code unit width, it makes it possible to use generic function names
|
||||
/* This macro must be defined before including pcre2.h. For a program that uses
|
||||
only one code unit width, it makes it possible to use generic function names
|
||||
such as pcre2_compile(). */
|
||||
|
||||
#define PCRE2_CODE_UNIT_WIDTH 8
|
||||
|
@ -141,7 +141,7 @@ subject_length = strlen((char *)subject);
|
|||
|
||||
re = pcre2_compile(
|
||||
pattern, /* the pattern */
|
||||
PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
|
||||
PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
|
||||
0, /* default options */
|
||||
&errornumber, /* for error number */
|
||||
&erroroffset, /* for error offset */
|
||||
|
@ -151,9 +151,9 @@ re = pcre2_compile(
|
|||
|
||||
if (re == NULL)
|
||||
{
|
||||
PCRE2_UCHAR buffer[256];
|
||||
PCRE2_UCHAR buffer[256];
|
||||
pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
|
||||
printf("PCRE2 compilation failed at offset %d: %s\en", (int)erroroffset,
|
||||
printf("PCRE2 compilation failed at offset %d: %s\en", (int)erroroffset,
|
||||
buffer);
|
||||
return 1;
|
||||
}
|
||||
|
@ -197,7 +197,7 @@ if (rc < 0)
|
|||
return 1;
|
||||
}
|
||||
|
||||
/* Match succeded. Get a pointer to the output vector, where string offsets are
|
||||
/* Match succeded. Get a pointer to the output vector, where string offsets are
|
||||
stored. */
|
||||
|
||||
ovector = pcre2_get_ovector_pointer(match_data);
|
||||
|
@ -210,7 +210,7 @@ printf("\enMatch succeeded at offset %d\en", (int)ovector[0]);
|
|||
* captured. *
|
||||
*************************************************************************/
|
||||
|
||||
/* The output vector wasn't big enough. This should not happen, because we used
|
||||
/* The output vector wasn't big enough. This should not happen, because we used
|
||||
pcre2_match_data_create_from_pattern() above. */
|
||||
|
||||
if (rc == 0)
|
||||
|
@ -261,7 +261,7 @@ if (namecount <= 0) printf("No named substrings\en"); else
|
|||
&name_entry_size); /* where to put the answer */
|
||||
|
||||
/* Now we can scan the table and, for each entry, print the number, the name,
|
||||
and the substring itself. In the 8-bit library the number is held in two
|
||||
and the substring itself. In the 8-bit library the number is held in two
|
||||
bytes, most significant first. */
|
||||
|
||||
tabptr = name_table;
|
||||
|
@ -306,7 +306,7 @@ if (namecount <= 0) printf("No named substrings\en"); else
|
|||
|
||||
if (!find_all) /* Check for -g */
|
||||
{
|
||||
pcre2_match_data_free(match_data); /* Release the memory that was used */
|
||||
pcre2_match_data_free(match_data); /* Release the memory that was used */
|
||||
pcre2_code_free(re); /* for the match data and the pattern. */
|
||||
return 0; /* Exit the program. */
|
||||
}
|
||||
|
@ -324,7 +324,7 @@ sequence. */
|
|||
(void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &newline);
|
||||
crlf_is_newline = newline == PCRE2_NEWLINE_ANY ||
|
||||
newline == PCRE2_NEWLINE_CRLF ||
|
||||
newline == PCRE2_NEWLINE_ANYCRLF;
|
||||
newline == PCRE2_NEWLINE_ANYCRLF;
|
||||
|
||||
/* Loop for second and subsequent matches */
|
||||
|
||||
|
|
|
@ -48,10 +48,10 @@ performance, there is also a "fast path" API that is JIT-specific.
|
|||
.SH "SIMPLE USE OF JIT"
|
||||
.rs
|
||||
.sp
|
||||
To make use of the JIT support in the simplest way, all you have to do is to
|
||||
call \fBpcre2_jit_compile()\fP after successfully compiling a pattern with
|
||||
\fBpcre2_compile()\fP. This function has two arguments: the first is the
|
||||
compiled pattern pointer that was returned by \fBpcre2_compile()\fP, and the
|
||||
To make use of the JIT support in the simplest way, all you have to do is to
|
||||
call \fBpcre2_jit_compile()\fP after successfully compiling a pattern with
|
||||
\fBpcre2_compile()\fP. This function has two arguments: the first is the
|
||||
compiled pattern pointer that was returned by \fBpcre2_compile()\fP, and the
|
||||
second is a set of option bits, which must include at least one of
|
||||
PCRE2_JIT_COMPLETE, PCRE2_JIT_PARTIAL_HARD, or PCRE2_JIT_PARTIAL_SOFT.
|
||||
.P
|
||||
|
@ -221,7 +221,7 @@ non-default JIT stacks might operate:
|
|||
.sp
|
||||
All the functions described in this section do nothing if JIT is not available,
|
||||
and \fBpcre2_jit_stack_assign()\fP does nothing unless the \fBcode\fP argument
|
||||
is non-NULL and points to a \fBpcre2_code\fP block that has been successfully
|
||||
is non-NULL and points to a \fBpcre2_code\fP block that has been successfully
|
||||
processed by \fBpcre2_jit_compile()\fP.
|
||||
.
|
||||
.
|
||||
|
@ -302,18 +302,18 @@ callback.
|
|||
.sp
|
||||
int rc;
|
||||
pcre2_code *re;
|
||||
pcre2_match_data *match_data;
|
||||
pcre2_match_data *match_data;
|
||||
pcre2_jit_stack *jit_stack;
|
||||
.sp
|
||||
re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0,
|
||||
re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0,
|
||||
&errornumber, &erroffset, NULL);
|
||||
/* Check for errors */
|
||||
rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
|
||||
/* Check for errors */
|
||||
/* Check for errors */
|
||||
jit_stack = pcre2_jit_stack_alloc(NULL, 32*1024, 512*1024);
|
||||
/* Check for error (NULL) */
|
||||
pcre2_jit_stack_assign(re, NULL, jit_stack);
|
||||
match_data = pcre2_match_data_create(re, 10);
|
||||
match_data = pcre2_match_data_create(re, 10);
|
||||
rc = pcre2_match(re, subject, length, 0, 0, match_data, NULL);
|
||||
/* Check results */
|
||||
pcre2_free(re);
|
||||
|
|
|
@ -64,15 +64,15 @@ matched; without such a restriction there would always be a partial match of an
|
|||
empty string at the end of the subject.
|
||||
.P
|
||||
When a partial match is returned, the first two elements in the ovector point
|
||||
to the portion of the subject that was matched. The appearance of \eK in the
|
||||
to the portion of the subject that was matched. The appearance of \eK in the
|
||||
pattern has no effect for a partial match. Consider this pattern:
|
||||
.sp
|
||||
/abc\eK123/
|
||||
.sp
|
||||
If it is matched against "456abc123xyz" the result is a complete match, and the
|
||||
ovector defines the matched string as "123", because \eK resets the "start of
|
||||
match" point. However, if a partial match is requested and the subject string
|
||||
is "456abc12", a partial match is found for the string "abc12", because all
|
||||
ovector defines the matched string as "123", because \eK resets the "start of
|
||||
match" point. However, if a partial match is requested and the subject string
|
||||
is "456abc12", a partial match is found for the string "abc12", because all
|
||||
these characters are needed for a subsequent re-match with additional
|
||||
characters.
|
||||
.P
|
||||
|
@ -316,14 +316,14 @@ same point as before.
|
|||
For example, if the pattern "(?<=123)abc" is partially matched against the
|
||||
string "xx123ab", the ovector offsets are 5 and 7 ("ab"). The maximum
|
||||
lookbehind count is 3, so all characters before offset 2 can be discarded. The
|
||||
value of \fBstartoffset\fP for the next match should be 3. When \fBpcre2test\fP
|
||||
displays a partial match, it indicates the lookbehind characters with '<'
|
||||
value of \fBstartoffset\fP for the next match should be 3. When \fBpcre2test\fP
|
||||
displays a partial match, it indicates the lookbehind characters with '<'
|
||||
characters:
|
||||
.sp
|
||||
re> "(?<=123)abc"
|
||||
data> xx123ab\e=ph
|
||||
Partial match: 123ab
|
||||
<<<
|
||||
<<<
|
||||
.P
|
||||
3. Because a partial match must always contain at least one character, what
|
||||
might be considered a partial match of an empty string actually gives a "no
|
||||
|
|
|
@ -118,7 +118,7 @@ page has
|
|||
.\" </a>
|
||||
further discussion
|
||||
.\"
|
||||
about newlines, and shows how to set the newline convention when calling
|
||||
about newlines, and shows how to set the newline convention when calling
|
||||
\fBpcre2_compile()\fP.
|
||||
.P
|
||||
It is also possible to specify a newline convention by starting a pattern
|
||||
|
@ -196,7 +196,7 @@ corresponding characters in the subject. As a trivial example, the pattern
|
|||
.sp
|
||||
matches a portion of a subject string that is identical to itself. When
|
||||
caseless matching is specified (the PCRE2_CASELESS option), letters are matched
|
||||
independently of case.
|
||||
independently of case.
|
||||
.P
|
||||
The power of regular expressions comes from the ability to include alternatives
|
||||
and repetitions in the pattern. These are encoded in the pattern by the use of
|
||||
|
@ -1199,8 +1199,8 @@ An opening square bracket introduces a character class, terminated by a closing
|
|||
square bracket. A closing square bracket on its own is not special by default.
|
||||
If a closing square bracket is required as a member of the class, it should be
|
||||
the first data character in the class (after an initial circumflex, if present)
|
||||
or escaped with a backslash. This means that, by default, an empty class cannot
|
||||
be defined. However, if the PCRE2_ALLOW_EMPTY_CLASS option is set, a closing
|
||||
or escaped with a backslash. This means that, by default, an empty class cannot
|
||||
be defined. However, if the PCRE2_ALLOW_EMPTY_CLASS option is set, a closing
|
||||
square bracket at the start does end the (empty) class.
|
||||
.P
|
||||
A character class matches a single character in the subject. A matched
|
||||
|
@ -1221,7 +1221,7 @@ string.
|
|||
When caseless matching is set, any letters in a class represent both their
|
||||
upper case and lower case versions, so for example, a caseless [aeiou] matches
|
||||
"A" as well as "a", and a caseless [^aeiou] does not match "A", whereas a
|
||||
caseful version would.
|
||||
caseful version would.
|
||||
.P
|
||||
Characters that might indicate line breaks are never treated in any special way
|
||||
when matching character classes, whatever line-ending sequence is in use, and
|
||||
|
@ -1340,7 +1340,7 @@ classes by other sequences, as follows:
|
|||
[:alnum:] becomes \ep{Xan}
|
||||
[:alpha:] becomes \ep{L}
|
||||
[:blank:] becomes \eh
|
||||
[:cntrl:] becomes \ep{Cc}
|
||||
[:cntrl:] becomes \ep{Cc}
|
||||
[:digit:] becomes \ep{Nd}
|
||||
[:lower:] becomes \ep{Ll}
|
||||
[:space:] becomes \ep{Xps}
|
||||
|
@ -1496,7 +1496,7 @@ match "cataract", "erpillar" or an empty string.
|
|||
.sp
|
||||
2. It sets up the subpattern as a capturing subpattern. This means that, when
|
||||
the whole pattern matches, the portion of the subject string that matched the
|
||||
subpattern is passed back to the caller, separately from the portion that
|
||||
subpattern is passed back to the caller, separately from the portion that
|
||||
matched the whole pattern. (This applies only to the traditional matching
|
||||
function; the DFA matching function does not support capturing.)
|
||||
.P
|
||||
|
@ -1916,7 +1916,7 @@ at release 5.10.
|
|||
PCRE2 has an optimization that automatically "possessifies" certain simple
|
||||
pattern constructs. For example, the sequence A+B is treated as A++B because
|
||||
there is no point in backtracking into a sequence of A's when B must follow.
|
||||
This feature can be disabled by the PCRE2_NO_AUTOPOSSESS option, or starting
|
||||
This feature can be disabled by the PCRE2_NO_AUTOPOSSESS option, or starting
|
||||
the pattern with (*NO_AUTO_POSSESS).
|
||||
.P
|
||||
When a pattern contains an unlimited repeat inside a subpattern that can itself
|
||||
|
@ -2238,7 +2238,7 @@ if the pattern is written as
|
|||
.sp
|
||||
^.*+(?<=abcd)
|
||||
.sp
|
||||
there can be no backtracking for the .*+ item because of the possessive
|
||||
there can be no backtracking for the .*+ item because of the possessive
|
||||
quantifier; it can match only the entire string. The subsequent lookbehind
|
||||
assertion does a single test on the last four characters. If it fails, the
|
||||
match fails immediately. For long strings, this approach makes a significant
|
||||
|
@ -2754,8 +2754,8 @@ same pair of parentheses when there is a repetition.
|
|||
.P
|
||||
PCRE2 provides a similar feature, but of course it cannot obey arbitrary Perl
|
||||
code. The feature is called "callout". The caller of PCRE2 provides an external
|
||||
function by putting its entry point in a match context using the function
|
||||
\fBpcre2_set_callout()\fP and passing the context to \fBpcre2_match()\fP or
|
||||
function by putting its entry point in a match context using the function
|
||||
\fBpcre2_set_callout()\fP and passing the context to \fBpcre2_match()\fP or
|
||||
\fBpcre2_dfa_match()\fP. If no match context is passed, or if the callout entry
|
||||
point is set to NULL, callouts are disabled.
|
||||
.P
|
||||
|
@ -3008,7 +3008,7 @@ output from \fBpcre2test\fP:
|
|||
re> /(*COMMIT)abc/
|
||||
data> xyzabc
|
||||
0: abc
|
||||
data>
|
||||
data>
|
||||
re> /(*COMMIT)abc/no_start_optimize
|
||||
data> xyzabc
|
||||
No match
|
||||
|
@ -3035,7 +3035,7 @@ as (*COMMIT).
|
|||
.P
|
||||
The behaviour of (*PRUNE:NAME) is the not the same as (*MARK:NAME)(*PRUNE).
|
||||
It is like (*MARK:NAME) in that the name is remembered for passing back to the
|
||||
caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
|
||||
caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
|
||||
ignoring those set by (*PRUNE) or (*THEN).
|
||||
.sp
|
||||
(*SKIP)
|
||||
|
@ -3085,7 +3085,7 @@ group. If (*THEN) is not inside an alternation, it acts like (*PRUNE).
|
|||
.P
|
||||
The behaviour of (*THEN:NAME) is the not the same as (*MARK:NAME)(*THEN).
|
||||
It is like (*MARK:NAME) in that the name is remembered for passing back to the
|
||||
caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
|
||||
caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
|
||||
ignoring those set by (*PRUNE) and (*THEN).
|
||||
.P
|
||||
A subpattern that does not contain a | character is just a part of the
|
||||
|
|
|
@ -90,10 +90,10 @@ of how to do this are given in the
|
|||
\fBpcre2build\fP
|
||||
.\"
|
||||
documentation. When built in this way, instead of using the stack, PCRE2
|
||||
gets memory for remembering backup points from the heap. By default, the memory
|
||||
is obtained by calling the system \fBmalloc()\fP function, but you can arrange
|
||||
to supply your own memory management function. For details, see the section
|
||||
entitled
|
||||
gets memory for remembering backup points from the heap. By default, the memory
|
||||
is obtained by calling the system \fBmalloc()\fP function, but you can arrange
|
||||
to supply your own memory management function. For details, see the section
|
||||
entitled
|
||||
.\" HTML <a href="pcre2api.html#matchcontext">
|
||||
.\" </a>
|
||||
"The match context"
|
||||
|
@ -104,8 +104,8 @@ in the
|
|||
.\"
|
||||
documentation. Since the block sizes are always the same, it may be possible to
|
||||
implement customized a memory handler that is more efficient than the standard
|
||||
function. The memory blocks obtained for this purpose are retained and re-used
|
||||
if possible while \fBpcre2_match()\fP is running. They are all freed just
|
||||
function. The memory blocks obtained for this purpose are retained and re-used
|
||||
if possible while \fBpcre2_match()\fP is running. They are all freed just
|
||||
before it exits.
|
||||
.
|
||||
.
|
||||
|
|
|
@ -387,7 +387,7 @@ appear.
|
|||
(*LIMIT_MATCH=d) set the match limit to d (decimal number)
|
||||
(*LIMIT_RECURSION=d) set the recursion limit to d (decimal number)
|
||||
(*NOTEMPTY) set PCRE2_NOTEMPTY when matching
|
||||
(*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching
|
||||
(*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching
|
||||
(*NO_AUTO_POSSESS) no auto-possessification (PCRE2_NO_AUTO_POSSESS)
|
||||
(*NO_START_OPT) no start-match optimization (PCRE2_NO_START_OPTIMIZE)
|
||||
(*UTF) set appropriate UTF mode for the library in use
|
||||
|
|
|
@ -433,7 +433,7 @@ about the pattern:
|
|||
/I info show info about compiled pattern
|
||||
hex pattern is coded in hexadecimal
|
||||
jit[=<number>] use JIT
|
||||
jitverify verify JIT use
|
||||
jitverify verify JIT use
|
||||
locale=<name> use this locale
|
||||
memory show memory used
|
||||
newline=<type> set newline type
|
||||
|
@ -518,7 +518,7 @@ number in the range 0 to 7:
|
|||
7 all three modes
|
||||
.sp
|
||||
If no number is given, 7 is assumed. If JIT compilation is successful, the
|
||||
compiled JIT code will automatically be used when \fBpcre2_match()\fP is run
|
||||
compiled JIT code will automatically be used when \fBpcre2_match()\fP is run
|
||||
for the appropriate type of match, except when incompatible run-time options
|
||||
are specified. For more details, see the
|
||||
.\" HREF
|
||||
|
@ -670,7 +670,7 @@ for a description of their effects.
|
|||
partial_hard (or ph) set PCRE2_PARTIAL_HARD
|
||||
partial_soft (or ps) set PCRE2_PARTIAL_SOFT
|
||||
.sp
|
||||
The partial matching modifiers are provided with abbreviations because they
|
||||
The partial matching modifiers are provided with abbreviations because they
|
||||
appear frequently in tests.
|
||||
.P
|
||||
If the \fB/posix\fP modifier was present on the pattern, causing the POSIX
|
||||
|
@ -844,8 +844,8 @@ context via \fBpcre2_set_match_limit()\fP and \fBpcre2_set_recursion_limit()\fP
|
|||
until it finds the minimum values for each parameter that allow
|
||||
\fBpcre2_match()\fP to complete without error.
|
||||
.P
|
||||
If JIT is being used, only the match limit is relevant. If DFA matching is
|
||||
being used, neither limit is relevant, and this modifier is ignored (with a
|
||||
If JIT is being used, only the match limit is relevant. If DFA matching is
|
||||
being used, neither limit is relevant, and this modifier is ignored (with a
|
||||
warning message).
|
||||
.P
|
||||
The \fImatch_limit\fP number is a measure of the amount of backtracking
|
||||
|
@ -890,10 +890,10 @@ appears, though of course it can also be used to set a default in a
|
|||
\fB#subject\fP command. It specifies the number of pairs of offsets that are
|
||||
available for storing matching information. The default is 15.
|
||||
.P
|
||||
At least one pair of offsets is always created by
|
||||
\fBpcre2_match_data_create()\fP, for matching with PCRE2's native API, so a
|
||||
value of 0 is the same as 1. However a value of 0 is useful when testing the
|
||||
POSIX API because it causes \fBregexec()\fP to be called with a NULL capture
|
||||
At least one pair of offsets is always created by
|
||||
\fBpcre2_match_data_create()\fP, for matching with PCRE2's native API, so a
|
||||
value of 0 is the same as 1. However a value of 0 is useful when testing the
|
||||
POSIX API because it causes \fBregexec()\fP to be called with a NULL capture
|
||||
vector.
|
||||
.
|
||||
.
|
||||
|
|
|
@ -57,7 +57,7 @@ individual code units.
|
|||
In UTF modes, the dot metacharacter matches one UTF character instead of a
|
||||
single code unit.
|
||||
.P
|
||||
The escape sequence \eC can be used to match a single code unit, in a UTF mode,
|
||||
The escape sequence \eC can be used to match a single code unit, in a UTF mode,
|
||||
but its use can lead to some strange effects because it breaks up multi-unit
|
||||
characters (see the description of \eC in the
|
||||
.\" HREF
|
||||
|
@ -107,8 +107,8 @@ case-equivalent, and these are treated as such.
|
|||
.rs
|
||||
.sp
|
||||
When the PCRE2_UTF option is set, the strings passed as patterns and subjects
|
||||
are (by default) checked for validity on entry to the relevant functions.
|
||||
If an invalid UTF string is passed, an error return is given.
|
||||
are (by default) checked for validity on entry to the relevant functions.
|
||||
If an invalid UTF string is passed, an error return is given.
|
||||
.P
|
||||
UTF-16 and UTF-32 strings can indicate their endianness by special code knows
|
||||
as a byte-order mark (BOM). The PCRE2 functions do not handle this, expecting
|
||||
|
|
12
perltest.pl
12
perltest.pl
|
@ -82,13 +82,13 @@ for (;;)
|
|||
|
||||
chomp($pattern);
|
||||
$pattern =~ s/\s+$//;
|
||||
|
||||
|
||||
# Split the pattern from the modifiers and adjust them as necessary.
|
||||
|
||||
|
||||
$pattern =~ /^\s*((.).*\2)(.*)$/s;
|
||||
$pat = $1;
|
||||
$mod = $3;
|
||||
|
||||
|
||||
# The private "aftertext" modifier means "print $' afterwards".
|
||||
|
||||
$showrest = ($mod =~ s/aftertext,?//);
|
||||
|
@ -131,9 +131,9 @@ for (;;)
|
|||
for (;;)
|
||||
{
|
||||
last if ! ($_ = <$infile>);
|
||||
last if $_ =~ /^\s*$/;
|
||||
}
|
||||
}
|
||||
last if $_ =~ /^\s*$/;
|
||||
}
|
||||
}
|
||||
next NEXT_RE;
|
||||
}
|
||||
|
||||
|
|
|
@ -41,7 +41,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
/* This is a freestanding support program to generate a file containing
|
||||
character tables for PCRE2. The tables are built according to the current
|
||||
locale using the pcre2_maketables() function, which is part of the PCRE2 API.
|
||||
locale using the pcre2_maketables() function, which is part of the PCRE2 API.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
|
|
|
@ -38,7 +38,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
-----------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* This module contains functions that scan a compiled pattern and change
|
||||
/* This module contains functions that scan a compiled pattern and change
|
||||
repeats into possessive repeats where possible. */
|
||||
|
||||
|
||||
|
@ -359,8 +359,8 @@ Returns: points to the start of the next opcode if *code is accepted
|
|||
NULL if *code is not accepted
|
||||
*/
|
||||
|
||||
static PCRE2_SPTR
|
||||
get_chr_property_list(PCRE2_SPTR code, BOOL utf, const uint8_t *fcc,
|
||||
static PCRE2_SPTR
|
||||
get_chr_property_list(PCRE2_SPTR code, BOOL utf, const uint8_t *fcc,
|
||||
uint32_t *list)
|
||||
{
|
||||
PCRE2_UCHAR c = *code;
|
||||
|
@ -387,7 +387,7 @@ if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
|
|||
if (c == OP_UPTO || c == OP_MINUPTO || c == OP_EXACT || c == OP_POSUPTO)
|
||||
code += IMM2_SIZE;
|
||||
|
||||
list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT &&
|
||||
list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT &&
|
||||
c != OP_POSPLUS);
|
||||
|
||||
switch(base)
|
||||
|
@ -595,7 +595,7 @@ for(;;)
|
|||
Therefore infinite recursions are not possible. */
|
||||
|
||||
c = *code;
|
||||
|
||||
|
||||
/* Skip over callouts */
|
||||
|
||||
if (c == OP_CALLOUT)
|
||||
|
@ -624,7 +624,7 @@ for(;;)
|
|||
/* If the bracket is capturing, and referenced by an OP_RECURSE, or
|
||||
it is an atomic sub-pattern (assert, once, etc.) the non-greedy case
|
||||
cannot be converted to a possessive form. */
|
||||
|
||||
|
||||
if (base_list[1] == 0) return FALSE;
|
||||
|
||||
switch(*(code - GET(code, 1)))
|
||||
|
@ -636,7 +636,7 @@ for(;;)
|
|||
case OP_ONCE:
|
||||
case OP_ONCE_NC:
|
||||
/* Atomic sub-patterns and assertions can always auto-possessify their
|
||||
last iterator. However, if the group was entered as a result of checking
|
||||
last iterator. However, if the group was entered as a result of checking
|
||||
a previous iterator, this is not possible. */
|
||||
|
||||
return !entered_a_group;
|
||||
|
@ -672,7 +672,7 @@ for(;;)
|
|||
do next_code += GET(next_code, 1); while (*next_code == OP_ALT);
|
||||
|
||||
/* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */
|
||||
|
||||
|
||||
next_code += 1 + LINK_SIZE;
|
||||
if (!compare_opcodes(next_code, utf, cb, base_list, base_end))
|
||||
return FALSE;
|
||||
|
@ -681,14 +681,14 @@ for(;;)
|
|||
continue;
|
||||
|
||||
default:
|
||||
break;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Check for a supported opcode, and load its properties. */
|
||||
|
||||
code = get_chr_property_list(code, utf, cb->fcc, list);
|
||||
if (code == NULL) return FALSE; /* Unsupported */
|
||||
|
||||
|
||||
/* If either opcode is a small character list, set pointers for comparing
|
||||
characters from that list with another list, or with a property. */
|
||||
|
||||
|
@ -778,7 +778,7 @@ for(;;)
|
|||
|
||||
/* Because the bit sets are unaligned bytes, we need to perform byte
|
||||
comparison here. */
|
||||
|
||||
|
||||
set_end = set1 + 32;
|
||||
if (invert_bits)
|
||||
{
|
||||
|
@ -922,7 +922,7 @@ for(;;)
|
|||
autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP];
|
||||
|
||||
if (!accepted) return FALSE;
|
||||
|
||||
|
||||
if (list[1] == 0) return TRUE;
|
||||
/* Might be an empty repeat. */
|
||||
continue;
|
||||
|
@ -1093,8 +1093,8 @@ but some compilers complain about an unreachable statement. */
|
|||
if appropriate. This function modifies the compiled opcode!
|
||||
|
||||
Arguments:
|
||||
code points to start of the byte code
|
||||
utf TRUE in UTF mode
|
||||
code points to start of the byte code
|
||||
utf TRUE in UTF mode
|
||||
cb compile data block
|
||||
|
||||
Returns: nothing
|
||||
|
@ -1111,7 +1111,7 @@ uint32_t list[8];
|
|||
for (;;)
|
||||
{
|
||||
c = *code;
|
||||
|
||||
|
||||
if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
|
||||
{
|
||||
c -= get_repeat_base(c) - OP_STAR;
|
||||
|
@ -1244,7 +1244,7 @@ for (;;)
|
|||
}
|
||||
|
||||
/* Add in the fixed length from the table */
|
||||
|
||||
|
||||
code += PRIV(OP_lengths)[c];
|
||||
|
||||
/* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
|
||||
|
|
|
@ -594,7 +594,7 @@ static pso pso_list[] = {
|
|||
{ (uint8_t *)STRING_UTF_RIGHTPAR, 4, PSO_OPT, PCRE2_UTF },
|
||||
{ (uint8_t *)STRING_UCP_RIGHTPAR, 4, PSO_OPT, PCRE2_UCP },
|
||||
{ (uint8_t *)STRING_NOTEMPTY_RIGHTPAR, 9, PSO_FLG, PCRE2_NOTEMPTY_SET },
|
||||
{ (uint8_t *)STRING_NOTEMPTY_ATSTART_RIGHTPAR,17, PSO_FLG, PCRE2_NE_ATST_SET },
|
||||
{ (uint8_t *)STRING_NOTEMPTY_ATSTART_RIGHTPAR,17, PSO_FLG, PCRE2_NE_ATST_SET },
|
||||
{ (uint8_t *)STRING_NO_AUTO_POSSESS_RIGHTPAR, 16, PSO_OPT, PCRE2_NO_AUTO_POSSESS },
|
||||
{ (uint8_t *)STRING_NO_START_OPT_RIGHTPAR, 13, PSO_OPT, PCRE2_NO_START_OPTIMIZE },
|
||||
{ (uint8_t *)STRING_LIMIT_MATCH_EQ, 12, PSO_LIMM, 0 },
|
||||
|
@ -675,12 +675,12 @@ static const uint8_t opcode_possessify[] = {
|
|||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_code_free(pcre2_code *code)
|
||||
{
|
||||
if (code != NULL)
|
||||
if (code != NULL)
|
||||
{
|
||||
if (code->executable_jit != NULL)
|
||||
PRIV(jit_free)(code->executable_jit, &code->memctl);
|
||||
code->memctl.free(code, code->memctl.memory_data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -4462,7 +4462,7 @@ for (;; ptr++)
|
|||
syntax, so we just ignore the repeat. */
|
||||
|
||||
if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_FALSE &&
|
||||
previous[GET(previous, 1)] != OP_ALT)
|
||||
previous[GET(previous, 1)] != OP_ALT)
|
||||
goto END_REPEAT;
|
||||
|
||||
/* There is no sense in actually repeating assertions. The only potential
|
||||
|
@ -5169,64 +5169,64 @@ for (;; ptr++)
|
|||
namelen = -1; /* => not a name; must set to avoid warning */
|
||||
name = NULL; /* Always set to avoid warning */
|
||||
recno = 0; /* Always set to avoid warning */
|
||||
|
||||
|
||||
/* Point at character after (?( */
|
||||
|
||||
|
||||
ptr++;
|
||||
|
||||
/* Check for (?(VERSION[>]=n.m), which is a facility whereby indirect
|
||||
users of PCRE2 via an application can discover which release of PCRE2
|
||||
users of PCRE2 via an application can discover which release of PCRE2
|
||||
is being used. */
|
||||
|
||||
if (PRIV(strncmp_c8)(ptr, STRING_VERSION, 7) == 0 &&
|
||||
|
||||
if (PRIV(strncmp_c8)(ptr, STRING_VERSION, 7) == 0 &&
|
||||
ptr[7] != CHAR_RIGHT_PARENTHESIS)
|
||||
{
|
||||
BOOL ge = FALSE;
|
||||
BOOL ge = FALSE;
|
||||
int major = 0;
|
||||
int minor = 0;
|
||||
|
||||
|
||||
ptr += 7;
|
||||
if (*ptr == CHAR_GREATER_THAN_SIGN)
|
||||
{
|
||||
ge = TRUE;
|
||||
ptr++;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* NOTE: cannot write IS_DIGIT(*(++ptr)) here because IS_DIGIT
|
||||
references its argument twice. */
|
||||
|
||||
|
||||
if (*ptr != CHAR_EQUALS_SIGN || (ptr++, !IS_DIGIT(*ptr)))
|
||||
{
|
||||
{
|
||||
*errorcodeptr = ERR79;
|
||||
goto FAILED;
|
||||
}
|
||||
goto FAILED;
|
||||
}
|
||||
|
||||
while (IS_DIGIT(*ptr)) major = major * 10 + *ptr++ - '0';
|
||||
if (*ptr == CHAR_DOT)
|
||||
{
|
||||
ptr++;
|
||||
while (IS_DIGIT(*ptr)) minor = minor * 10 + *ptr++ - '0';
|
||||
}
|
||||
ptr++;
|
||||
while (IS_DIGIT(*ptr)) minor = minor * 10 + *ptr++ - '0';
|
||||
}
|
||||
|
||||
if (*ptr != CHAR_RIGHT_PARENTHESIS)
|
||||
{
|
||||
*errorcodeptr = ERR79;
|
||||
goto FAILED;
|
||||
}
|
||||
}
|
||||
|
||||
if (ge)
|
||||
code[1+LINK_SIZE] = ((PCRE2_MAJOR > major) ||
|
||||
(PCRE2_MAJOR == major && PCRE2_MINOR >= minor))?
|
||||
OP_TRUE : OP_FALSE;
|
||||
else
|
||||
else
|
||||
code[1+LINK_SIZE] = (PCRE2_MAJOR == major && PCRE2_MINOR == minor)?
|
||||
OP_TRUE : OP_FALSE;
|
||||
|
||||
|
||||
ptr++;
|
||||
skipbytes = 1;
|
||||
break; /* End of condition processing */
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* Check for a test for recursion in a named group. */
|
||||
|
||||
if (*ptr == CHAR_R && ptr[1] == CHAR_AMPERSAND)
|
||||
|
@ -5404,8 +5404,8 @@ for (;; ptr++)
|
|||
}
|
||||
|
||||
/* Similarly, check for the (?(DEFINE) "condition", which is always
|
||||
false. During compilation we set OP_DEFINE to distinguish this from
|
||||
other OP_FALSE conditions so that it can be checked for having only one
|
||||
false. During compilation we set OP_DEFINE to distinguish this from
|
||||
other OP_FALSE conditions so that it can be checked for having only one
|
||||
branch, but after that the opcode is changed to OP_FALSE. */
|
||||
|
||||
else if (namelen == 6 && PRIV(strncmp_c8)(name, STRING_DEFINE, 6) == 0)
|
||||
|
@ -6133,7 +6133,7 @@ for (;; ptr++)
|
|||
while (*tc != OP_KET);
|
||||
|
||||
/* A DEFINE group is never obeyed inline (the "condition" is always
|
||||
false). It must have only one branch. Having checked this, change the
|
||||
false). It must have only one branch. Having checked this, change the
|
||||
opcode to OP_FALSE. */
|
||||
|
||||
if (code[LINK_SIZE+1] == OP_DEFINE)
|
||||
|
@ -6143,7 +6143,7 @@ for (;; ptr++)
|
|||
*errorcodeptr = ERR54;
|
||||
goto FAILED;
|
||||
}
|
||||
code[LINK_SIZE+1] = OP_FALSE;
|
||||
code[LINK_SIZE+1] = OP_FALSE;
|
||||
bravalue = OP_DEFINE; /* Just a flag to suppress char handling below */
|
||||
}
|
||||
|
||||
|
@ -6219,7 +6219,7 @@ for (;; ptr++)
|
|||
than one can replicate it as reqcu if necessary. If the subpattern has
|
||||
no firstcu, set "none" for the whole branch. In both cases, a zero
|
||||
repeat forces firstcu to "none". */
|
||||
|
||||
|
||||
if (firstcuflags == REQ_UNSET && subfirstcuflags != REQ_UNSET)
|
||||
{
|
||||
if (subfirstcuflags >= 0)
|
||||
|
@ -6759,7 +6759,7 @@ for (;;)
|
|||
reqcu = firstcu;
|
||||
reqcuflags = firstcuflags;
|
||||
}
|
||||
}
|
||||
}
|
||||
firstcuflags = REQ_NONE;
|
||||
}
|
||||
|
||||
|
@ -7389,12 +7389,12 @@ if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0)
|
|||
|
||||
/* A NULL compile context means "use a default context" */
|
||||
|
||||
if (ccontext == NULL)
|
||||
if (ccontext == NULL)
|
||||
ccontext = (pcre2_compile_context *)(&PRIV(default_compile_context));
|
||||
|
||||
/* A zero-terminated pattern is indicated by the special length value
|
||||
PCRE2_ZERO_TERMINATED. Otherwise, we make a copy of the pattern and add a zero,
|
||||
to ensure that it is always possible to look one code unit beyond the end of
|
||||
/* A zero-terminated pattern is indicated by the special length value
|
||||
PCRE2_ZERO_TERMINATED. Otherwise, we make a copy of the pattern and add a zero,
|
||||
to ensure that it is always possible to look one code unit beyond the end of
|
||||
the pattern's characters. */
|
||||
|
||||
if (patlen == PCRE2_ZERO_TERMINATED) patlen = PRIV(strlen)(pattern); else
|
||||
|
@ -7481,19 +7481,19 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
|
|||
case PSO_OPT:
|
||||
cb.external_options |= p->value;
|
||||
break;
|
||||
|
||||
|
||||
case PSO_FLG:
|
||||
setflags |= p->value;
|
||||
break;
|
||||
break;
|
||||
|
||||
case PSO_NL:
|
||||
newline = p->value;
|
||||
setflags |= PCRE2_NL_SET;
|
||||
setflags |= PCRE2_NL_SET;
|
||||
break;
|
||||
|
||||
case PSO_BSR:
|
||||
bsr = p->value;
|
||||
setflags |= PCRE2_BSR_SET;
|
||||
setflags |= PCRE2_BSR_SET;
|
||||
break;
|
||||
|
||||
case PSO_LIMM:
|
||||
|
@ -7883,8 +7883,8 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0 &&
|
|||
|
||||
/* If the pattern is still not anchored and we do not have a first code unit,
|
||||
see if there is one that is asserted (these are not saved during the compile
|
||||
because they can cause conflicts with actual literals that follow). This code
|
||||
need not be obeyed if PCRE2_NO_START_OPTIMIZE is set, as the data it would
|
||||
because they can cause conflicts with actual literals that follow). This code
|
||||
need not be obeyed if PCRE2_NO_START_OPTIMIZE is set, as the data it would
|
||||
create will not be used. */
|
||||
|
||||
if ((re->overall_options & (PCRE2_ANCHORED|PCRE2_NO_START_OPTIMIZE)) == 0)
|
||||
|
@ -7930,7 +7930,7 @@ if ((re->overall_options & (PCRE2_ANCHORED|PCRE2_NO_START_OPTIMIZE)) == 0)
|
|||
}
|
||||
|
||||
/* Handle the "required code unit", if one is set. In the case of an anchored
|
||||
pattern, do this only if it follows a variable length item in the pattern.
|
||||
pattern, do this only if it follows a variable length item in the pattern.
|
||||
Again, skip this if PCRE2_NO_START_OPTIMIZE is set. */
|
||||
|
||||
if (reqcuflags >= 0 &&
|
||||
|
@ -7973,7 +7973,7 @@ while (*codestart == OP_ALT);
|
|||
to set up information such as a bitmap of starting code units and a minimum
|
||||
matching length. */
|
||||
|
||||
if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 &&
|
||||
if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 &&
|
||||
PRIV(study)(re) != 0)
|
||||
{
|
||||
errorcode = ERR31;
|
||||
|
|
|
@ -42,7 +42,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "config.h"
|
||||
#endif
|
||||
|
||||
/* Save the configured link size, which is in bytes. In 16-bit and 32-bit modes
|
||||
/* Save the configured link size, which is in bytes. In 16-bit and 32-bit modes
|
||||
its value gets changed by pcre2_internal.h to be in code units. */
|
||||
|
||||
static int configured_link_size = LINK_SIZE;
|
||||
|
@ -69,7 +69,7 @@ Arguments:
|
|||
Returns: 0 if data returned
|
||||
>= 0 if where is NULL, giving length required
|
||||
PCRE2_ERROR_BADOPTION if "where" not recognized
|
||||
or JIT target requested when JIT not enabled
|
||||
or JIT target requested when JIT not enabled
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
|
@ -80,33 +80,33 @@ if (where == NULL) /* Requests a length */
|
|||
switch(what)
|
||||
{
|
||||
default:
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
|
||||
case PCRE2_CONFIG_BSR:
|
||||
case PCRE2_CONFIG_JIT:
|
||||
case PCRE2_CONFIG_LINKSIZE:
|
||||
case PCRE2_CONFIG_NEWLINE:
|
||||
case PCRE2_CONFIG_STACKRECURSE:
|
||||
case PCRE2_CONFIG_UNICODE:
|
||||
return sizeof(int);
|
||||
|
||||
return sizeof(int);
|
||||
|
||||
case PCRE2_CONFIG_MATCHLIMIT:
|
||||
case PCRE2_CONFIG_PARENSLIMIT:
|
||||
case PCRE2_CONFIG_RECURSIONLIMIT:
|
||||
return sizeof(long int);
|
||||
|
||||
|
||||
/* These are handled below */
|
||||
|
||||
|
||||
case PCRE2_CONFIG_JITTARGET:
|
||||
case PCRE2_CONFIG_UNICODE_VERSION:
|
||||
case PCRE2_CONFIG_VERSION:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
switch (what)
|
||||
{
|
||||
default:
|
||||
default:
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
|
||||
case PCRE2_CONFIG_BSR:
|
||||
|
@ -129,9 +129,9 @@ switch (what)
|
|||
#ifdef SUPPORT_JIT
|
||||
{
|
||||
const char *v = PRIV(jit_get_target)();
|
||||
return (where == NULL)? (int)strlen(v) :
|
||||
return (where == NULL)? (int)strlen(v) :
|
||||
PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v);
|
||||
}
|
||||
}
|
||||
#else
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
#endif
|
||||
|
@ -163,9 +163,9 @@ switch (what)
|
|||
*((int *)where) = 1;
|
||||
#endif
|
||||
break;
|
||||
|
||||
|
||||
case PCRE2_CONFIG_UNICODE_VERSION:
|
||||
{
|
||||
{
|
||||
#if defined SUPPORT_UNICODE
|
||||
const char *v = PRIV(unicode_version);
|
||||
#else
|
||||
|
@ -183,15 +183,15 @@ switch (what)
|
|||
*((int *)where) = 0;
|
||||
#endif
|
||||
break;
|
||||
|
||||
/* The hackery in setting "v" below is to cope with the case when
|
||||
|
||||
/* The hackery in setting "v" below is to cope with the case when
|
||||
PCRE2_PRERELEASE is set to an empty string (which it is for real releases).
|
||||
If the second alternative is used in this case, it does not leave a space
|
||||
If the second alternative is used in this case, it does not leave a space
|
||||
before the date. On the other hand, if all four macros are put into a single
|
||||
XSTRING when PCRE2_PRERELEASE is not empty, an unwanted space is inserted.
|
||||
XSTRING when PCRE2_PRERELEASE is not empty, an unwanted space is inserted.
|
||||
There are problems using an "obvious" approach like this:
|
||||
|
||||
XSTRING(PCRE2_MAJOR) "." XSTRING(PCRE_MINOR)
|
||||
|
||||
XSTRING(PCRE2_MAJOR) "." XSTRING(PCRE_MINOR)
|
||||
XSTRING(PCRE2_PRERELEASE) " " XSTRING(PCRE_DATE)
|
||||
|
||||
because, when PCRE2_PRERELEASE is empty, this leads to an attempted expansion
|
||||
|
@ -199,18 +199,18 @@ switch (what)
|
|||
argument consists of no preprocessing tokens, the behavior is undefined." It
|
||||
turns out the gcc treats this case as a single empty string - which is what
|
||||
we really want - but Visual C grumbles about the lack of an argument for the
|
||||
macro. Unfortunately, both are within their rights. As there seems to be no
|
||||
way to test for a macro's value being empty at compile time, we have to
|
||||
macro. Unfortunately, both are within their rights. As there seems to be no
|
||||
way to test for a macro's value being empty at compile time, we have to
|
||||
resort to a runtime test. */
|
||||
|
||||
|
||||
case PCRE2_CONFIG_VERSION:
|
||||
{
|
||||
{
|
||||
const char *v = (XSTRING(Z PCRE2_PRERELEASE)[1] == 0)?
|
||||
XSTRING(PCRE2_MAJOR.PCRE2_MINOR PCRE2_DATE) :
|
||||
XSTRING(PCRE2_MAJOR.PCRE2_MINOR) XSTRING(PCRE2_PRERELEASE PCRE2_DATE);
|
||||
return (where == NULL)? (int)strlen(v) :
|
||||
return (where == NULL)? (int)strlen(v) :
|
||||
PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -72,15 +72,15 @@ free(block);
|
|||
* Get a block and save memory control *
|
||||
*************************************************/
|
||||
|
||||
/* This internal function is called to get a block of memory in which the
|
||||
/* This internal function is called to get a block of memory in which the
|
||||
memory control data is to be stored at the start for future use.
|
||||
|
||||
Arguments:
|
||||
size amount of memory required
|
||||
memctl pointer to a memctl block or NULL
|
||||
|
||||
|
||||
Returns: pointer to memory or NULL on failure
|
||||
*/
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN void *
|
||||
PRIV(memctl_malloc)(size_t size, pcre2_memctl *memctl)
|
||||
|
@ -88,7 +88,7 @@ PRIV(memctl_malloc)(size_t size, pcre2_memctl *memctl)
|
|||
pcre2_memctl *newmemctl;
|
||||
void *yield = (memctl == NULL)? malloc(size) :
|
||||
memctl->malloc(size, memctl->memory_data);
|
||||
if (yield == NULL) return NULL;
|
||||
if (yield == NULL) return NULL;
|
||||
newmemctl = (pcre2_memctl *)yield;
|
||||
if (memctl == NULL)
|
||||
{
|
||||
|
@ -96,9 +96,9 @@ if (memctl == NULL)
|
|||
newmemctl->free = default_free;
|
||||
newmemctl->memory_data = NULL;
|
||||
}
|
||||
else *newmemctl = *memctl;
|
||||
else *newmemctl = *memctl;
|
||||
return yield;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
@ -108,11 +108,11 @@ return yield;
|
|||
|
||||
/* Initializing for compile and match contexts is done in separate, private
|
||||
functions so that these can be called from functions such as pcre2_compile()
|
||||
when an external context is not supplied. The initializing functions have an
|
||||
when an external context is not supplied. The initializing functions have an
|
||||
option to set up default memory management. */
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_general_context_create(void *(*private_malloc)(size_t, void *),
|
||||
pcre2_general_context_create(void *(*private_malloc)(size_t, void *),
|
||||
void (*private_free)(void *, void *), void *memory_data)
|
||||
{
|
||||
pcre2_general_context *gcontext;
|
||||
|
@ -121,7 +121,7 @@ if (private_free == NULL) private_free = default_free;
|
|||
gcontext = private_malloc(sizeof(pcre2_real_general_context), memory_data);
|
||||
if (gcontext == NULL) return NULL;
|
||||
gcontext->memctl.malloc = private_malloc;
|
||||
gcontext->memctl.free = private_free;
|
||||
gcontext->memctl.free = private_free;
|
||||
gcontext->memctl.memory_data = memory_data;
|
||||
return gcontext;
|
||||
}
|
||||
|
@ -136,7 +136,7 @@ const pcre2_compile_context PRIV(default_compile_context) = {
|
|||
PRIV(default_tables),
|
||||
BSR_DEFAULT,
|
||||
NEWLINE_DEFAULT,
|
||||
PARENS_NEST_LIMIT };
|
||||
PARENS_NEST_LIMIT };
|
||||
|
||||
/* The create function copies the default into the new memory, but must
|
||||
override the default memory handling functions if a gcontext was provided. */
|
||||
|
@ -145,8 +145,8 @@ PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION
|
|||
pcre2_compile_context_create(pcre2_general_context *gcontext)
|
||||
{
|
||||
pcre2_compile_context *ccontext = PRIV(memctl_malloc)(
|
||||
sizeof(pcre2_real_compile_context), (pcre2_memctl *)gcontext);
|
||||
if (ccontext == NULL) return NULL;
|
||||
sizeof(pcre2_real_compile_context), (pcre2_memctl *)gcontext);
|
||||
if (ccontext == NULL) return NULL;
|
||||
*ccontext = PRIV(default_compile_context);
|
||||
if (gcontext != NULL)
|
||||
*((pcre2_memctl *)ccontext) = *((pcre2_memctl *)gcontext);
|
||||
|
@ -159,14 +159,14 @@ when no context is supplied to a match function. */
|
|||
|
||||
const pcre2_match_context PRIV(default_match_context) = {
|
||||
{ default_malloc, default_free, NULL },
|
||||
#ifdef HEAP_MATCH_RECURSE
|
||||
#ifdef HEAP_MATCH_RECURSE
|
||||
{ default_malloc, default_free, NULL },
|
||||
#endif
|
||||
NULL,
|
||||
NULL,
|
||||
MATCH_LIMIT,
|
||||
MATCH_LIMIT_RECURSION };
|
||||
|
||||
MATCH_LIMIT_RECURSION };
|
||||
|
||||
/* The create function copies the default into the new memory, but must
|
||||
override the default memory handling functions if a gcontext was provided. */
|
||||
|
||||
|
@ -174,8 +174,8 @@ PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION
|
|||
pcre2_match_context_create(pcre2_general_context *gcontext)
|
||||
{
|
||||
pcre2_match_context *mcontext = PRIV(memctl_malloc)(
|
||||
sizeof(pcre2_real_match_context), (pcre2_memctl *)gcontext);
|
||||
if (mcontext == NULL) return NULL;
|
||||
sizeof(pcre2_real_match_context), (pcre2_memctl *)gcontext);
|
||||
if (mcontext == NULL) return NULL;
|
||||
*mcontext = PRIV(default_match_context);
|
||||
if (gcontext != NULL)
|
||||
*((pcre2_memctl *)mcontext) = *((pcre2_memctl *)gcontext);
|
||||
|
@ -190,8 +190,8 @@ return mcontext;
|
|||
PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_general_context_copy(pcre2_general_context *gcontext)
|
||||
{
|
||||
pcre2_general_context *new =
|
||||
gcontext->memctl.malloc(sizeof(pcre2_real_general_context),
|
||||
pcre2_general_context *new =
|
||||
gcontext->memctl.malloc(sizeof(pcre2_real_general_context),
|
||||
gcontext->memctl.memory_data);
|
||||
if (new == NULL) return NULL;
|
||||
memcpy(new, gcontext, sizeof(pcre2_real_general_context));
|
||||
|
@ -202,8 +202,8 @@ return new;
|
|||
PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_compile_context_copy(pcre2_compile_context *ccontext)
|
||||
{
|
||||
pcre2_compile_context *new =
|
||||
ccontext->memctl.malloc(sizeof(pcre2_real_compile_context),
|
||||
pcre2_compile_context *new =
|
||||
ccontext->memctl.malloc(sizeof(pcre2_real_compile_context),
|
||||
ccontext->memctl.memory_data);
|
||||
if (new == NULL) return NULL;
|
||||
memcpy(new, ccontext, sizeof(pcre2_real_compile_context));
|
||||
|
@ -214,8 +214,8 @@ return new;
|
|||
PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION
|
||||
pcre2_match_context_copy(pcre2_match_context *mcontext)
|
||||
{
|
||||
pcre2_match_context *new =
|
||||
mcontext->memctl.malloc(sizeof(pcre2_real_match_context),
|
||||
pcre2_match_context *new =
|
||||
mcontext->memctl.malloc(sizeof(pcre2_real_match_context),
|
||||
mcontext->memctl.memory_data);
|
||||
if (new == NULL) return NULL;
|
||||
memcpy(new, mcontext, sizeof(pcre2_real_match_context));
|
||||
|
@ -267,14 +267,14 @@ data. */
|
|||
/* ------------ Compile contexts ------------ */
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_character_tables(pcre2_compile_context *ccontext,
|
||||
pcre2_set_character_tables(pcre2_compile_context *ccontext,
|
||||
const unsigned char *tables)
|
||||
{
|
||||
ccontext->tables = tables;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_bsr(pcre2_compile_context *ccontext, uint32_t value)
|
||||
{
|
||||
switch(value)
|
||||
|
@ -283,13 +283,13 @@ switch(value)
|
|||
case PCRE2_BSR_UNICODE:
|
||||
ccontext->bsr_convention = value;
|
||||
return 0;
|
||||
|
||||
|
||||
default:
|
||||
return PCRE2_ERROR_BADDATA;
|
||||
return PCRE2_ERROR_BADDATA;
|
||||
}
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_newline(pcre2_compile_context *ccontext, uint32_t newline)
|
||||
{
|
||||
switch(newline)
|
||||
|
@ -301,10 +301,10 @@ switch(newline)
|
|||
case PCRE2_NEWLINE_ANYCRLF:
|
||||
ccontext->newline_convention = newline;
|
||||
return 0;
|
||||
|
||||
default:
|
||||
return PCRE2_ERROR_BADDATA;
|
||||
}
|
||||
|
||||
default:
|
||||
return PCRE2_ERROR_BADDATA;
|
||||
}
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
|
@ -315,7 +315,7 @@ return 0;
|
|||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
|
||||
pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
|
||||
int (*guard)(uint32_t))
|
||||
{
|
||||
ccontext->stack_guard = guard;
|
||||
|
@ -325,8 +325,8 @@ return 0;
|
|||
|
||||
/* ------------ Match contexts ------------ */
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_callout(pcre2_match_context *mcontext,
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_callout(pcre2_match_context *mcontext,
|
||||
int (*callout)(pcre2_callout_block *), void *callout_data)
|
||||
{
|
||||
mcontext->callout = callout;
|
||||
|
@ -349,8 +349,8 @@ return 0;
|
|||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_recursion_memory_management(pcre2_match_context *mcontext,
|
||||
void *(*mymalloc)(size_t, void *), void (*myfree)(void *, void *),
|
||||
pcre2_set_recursion_memory_management(pcre2_match_context *mcontext,
|
||||
void *(*mymalloc)(size_t, void *), void (*myfree)(void *, void *),
|
||||
void *mydata)
|
||||
{
|
||||
#ifdef HEAP_MATCH_RECURSE
|
||||
|
@ -364,6 +364,6 @@ mcontext->stack_memctl.memory_data = mydata;
|
|||
(void)mydata;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* End of pcre2_context.c */
|
||||
|
|
|
@ -376,7 +376,7 @@ stateblock *next_active_state, *next_new_state;
|
|||
|
||||
const uint8_t *ctypes, *lcc, *fcc;
|
||||
PCRE2_SPTR ptr;
|
||||
PCRE2_SPTR end_code;
|
||||
PCRE2_SPTR end_code;
|
||||
PCRE2_SPTR first_op;
|
||||
|
||||
dfa_recursion_info new_recursive;
|
||||
|
@ -542,8 +542,8 @@ for (;;)
|
|||
BOOL partial_newline = FALSE;
|
||||
BOOL could_continue = reset_could_continue;
|
||||
reset_could_continue = FALSE;
|
||||
|
||||
if (ptr > mb->last_used_ptr) mb->last_used_ptr = ptr;
|
||||
|
||||
if (ptr > mb->last_used_ptr) mb->last_used_ptr = ptr;
|
||||
|
||||
/* Make the new state list into the active state list and empty the
|
||||
new state list. */
|
||||
|
@ -633,7 +633,7 @@ for (;;)
|
|||
|
||||
/* If this opcode inspects a character, but we are at the end of the
|
||||
subject, remember the fact for use when testing for a partial match. */
|
||||
|
||||
|
||||
if (clen == 0 && poptable[codevalue] != 0)
|
||||
could_continue = TRUE;
|
||||
|
||||
|
@ -975,7 +975,7 @@ for (;;)
|
|||
if (utf) { FORWARDCHARTEST(temp, mb->end_subject); }
|
||||
#endif
|
||||
mb->last_used_ptr = temp;
|
||||
}
|
||||
}
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if ((mb->poptions & PCRE2_UCP) != 0)
|
||||
{
|
||||
|
@ -2643,7 +2643,7 @@ for (;;)
|
|||
|
||||
if (condcode == OP_FALSE)
|
||||
{ ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
|
||||
|
||||
|
||||
/* There is also an always-true condition */
|
||||
|
||||
if (condcode == OP_TRUE)
|
||||
|
@ -2999,7 +2999,7 @@ for (;;)
|
|||
|
||||
The "could_continue" variable is true if a state could have continued but
|
||||
for the fact that the end of the subject was reached. */
|
||||
|
||||
|
||||
if (new_count <= 0)
|
||||
{
|
||||
if (rlevel == 1 && /* Top level, and */
|
||||
|
@ -3098,7 +3098,7 @@ if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
|
|||
/* Plausibility checks */
|
||||
|
||||
if ((options & ~PUBLIC_DFA_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
|
||||
if (re == NULL || subject == NULL || workspace == NULL || match_data == NULL)
|
||||
if (re == NULL || subject == NULL || workspace == NULL || match_data == NULL)
|
||||
return PCRE2_ERROR_NULL;
|
||||
if (wscount < 20) return PCRE2_ERROR_DFA_WSSIZE;
|
||||
if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
|
||||
|
@ -3127,19 +3127,19 @@ with different endianness. */
|
|||
if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
|
||||
return PCRE2_ERROR_BADMODE;
|
||||
|
||||
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
|
||||
options variable for this function. Users of PCRE2 who are not calling the
|
||||
function directly would like to have a way of setting these flags, in the same
|
||||
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
|
||||
options variable for this function. Users of PCRE2 who are not calling the
|
||||
function directly would like to have a way of setting these flags, in the same
|
||||
way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
|
||||
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
|
||||
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be
|
||||
transferred to the options for this function. The bits are guaranteed to be
|
||||
adjacent, but do not have the same values. This bit of Boolean trickery assumes
|
||||
that the match-time bits are not more significant than the flag bits. If by
|
||||
accident this is not the case, a compile-time division by zero error will
|
||||
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
|
||||
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be
|
||||
transferred to the options for this function. The bits are guaranteed to be
|
||||
adjacent, but do not have the same values. This bit of Boolean trickery assumes
|
||||
that the match-time bits are not more significant than the flag bits. If by
|
||||
accident this is not the case, a compile-time division by zero error will
|
||||
occur. */
|
||||
|
||||
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
|
||||
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
|
||||
#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
|
||||
options |= (re->flags & FF) / ((FF & -FF) / (OO & -OO));
|
||||
#undef FF
|
||||
|
@ -3168,7 +3168,7 @@ end_subject = subject + length;
|
|||
req_cu_ptr = start_match - 1;
|
||||
anchored = (options & (PCRE2_ANCHORED|PCRE2_DFA_RESTART)) != 0 ||
|
||||
(re->overall_options & PCRE2_ANCHORED) != 0;
|
||||
|
||||
|
||||
/* The "must be at the start of a line" flags are used in a loop when finding
|
||||
where to start. */
|
||||
|
||||
|
@ -3307,7 +3307,7 @@ for (;;)
|
|||
/* There are some optimizations that avoid running the match if a known
|
||||
starting point is not found, or if a known later code unit is not present.
|
||||
However, there is an option (settable at compile time) that disables
|
||||
these, for testing and for ensuring that all callouts do actually occur.
|
||||
these, for testing and for ensuring that all callouts do actually occur.
|
||||
The optimizations must also be avoided when restarting a DFA match. */
|
||||
|
||||
if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 &&
|
||||
|
@ -3493,7 +3493,7 @@ for (;;)
|
|||
|
||||
/* Anything other than "no match" means we are done, always; otherwise, carry
|
||||
on only if not anchored. */
|
||||
|
||||
|
||||
if (rc != PCRE2_ERROR_NOMATCH || anchored)
|
||||
{
|
||||
if (rc == PCRE2_ERROR_PARTIAL && match_data->oveccount > 0)
|
||||
|
@ -3504,7 +3504,7 @@ for (;;)
|
|||
match_data->leftchar = (PCRE2_SIZE)(mb->start_used_ptr - subject);
|
||||
match_data->rightchar = mb->last_used_ptr - subject;
|
||||
match_data->startchar = (PCRE2_SIZE)(start_match - subject);
|
||||
match_data->rc = rc;
|
||||
match_data->rc = rc;
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
|
|
@ -48,7 +48,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
#define STRING(a) # a
|
||||
#define XSTRING(s) STRING(s)
|
||||
|
||||
/* The texts of compile-time error messages. Compile-time error numbers start
|
||||
/* The texts of compile-time error messages. Compile-time error numbers start
|
||||
at COMPILE_ERROR_BASE (100).
|
||||
|
||||
Do not ever re-use any error number, because they are documented. Always add a
|
||||
|
@ -101,7 +101,7 @@ static const char compile_error_texts[] =
|
|||
"(?R or (?[+-]digits must be followed by )\0"
|
||||
/* 30 */
|
||||
"unknown POSIX class name\0"
|
||||
"internal error in pcre2_study(): should not occur\0"
|
||||
"internal error in pcre2_study(): should not occur\0"
|
||||
"this version of PCRE does not have UTF or Unicode property support\0"
|
||||
"parentheses are too deeply nested (stack check)\0"
|
||||
"character code point value in \\x{} or \\o{} is too large\0"
|
||||
|
@ -158,94 +158,94 @@ static const char compile_error_texts[] =
|
|||
"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
|
||||
"character code point value in \\u.... sequence is too large\0"
|
||||
"digits missing in \\x{} or \\o{}\0"
|
||||
"syntax error in (?(VERSION condition\0"
|
||||
"syntax error in (?(VERSION condition\0"
|
||||
;
|
||||
|
||||
/* Match-time and UTF error texts are in the same format. */
|
||||
|
||||
static const char match_error_texts[] =
|
||||
"no error\0"
|
||||
"no match\0"
|
||||
"no match\0"
|
||||
"partial match\0"
|
||||
"UTF-8 error: 1 byte missing at end\0"
|
||||
"UTF-8 error: 2 bytes missing at end\0"
|
||||
/* 5 */
|
||||
/* 5 */
|
||||
"UTF-8 error: 3 bytes missing at end\0"
|
||||
"UTF-8 error: 4 bytes missing at end\0"
|
||||
"UTF-8 error: 5 bytes missing at end\0"
|
||||
"UTF-8 error: byte 2 top bits not 0x80\0"
|
||||
"UTF-8 error: byte 3 top bits not 0x80\0"
|
||||
/* 10 */
|
||||
"UTF-8 error: byte 4 top bits not 0x80\0"
|
||||
"UTF-8 error: byte 5 top bits not 0x80\0"
|
||||
"UTF-8 error: byte 2 top bits not 0x80\0"
|
||||
"UTF-8 error: byte 3 top bits not 0x80\0"
|
||||
/* 10 */
|
||||
"UTF-8 error: byte 4 top bits not 0x80\0"
|
||||
"UTF-8 error: byte 5 top bits not 0x80\0"
|
||||
"UTF-8 error: byte 6 top bits not 0x80\0"
|
||||
"UTF-8 error: 5-byte character is not allowed (RFC 3629)\0"
|
||||
"UTF-8 error: 5-byte character is not allowed (RFC 3629)\0"
|
||||
"UTF-8 error: 6-byte character is not allowed (RFC 3629)\0"
|
||||
/* 15 */
|
||||
/* 15 */
|
||||
"UTF-8 error: code points greater than 0x10ffff are not defined\0"
|
||||
"UTF-8 error: code points 0xd800-0xdfff are not defined\0"
|
||||
"UTF-8 error: overlong 2-byte sequence\0"
|
||||
"UTF-8 error: overlong 3-byte sequence\0"
|
||||
"UTF-8 error: code points 0xd800-0xdfff are not defined\0"
|
||||
"UTF-8 error: overlong 2-byte sequence\0"
|
||||
"UTF-8 error: overlong 3-byte sequence\0"
|
||||
"UTF-8 error: overlong 4-byte sequence\0"
|
||||
/* 20 */
|
||||
/* 20 */
|
||||
"UTF-8 error: overlong 5-byte sequence\0"
|
||||
"UTF-8 error: overlong 6-byte sequence\0"
|
||||
"UTF-8 error: isolated 0x80 byte\0"
|
||||
"UTF-8 error: illegal byte (0xfe or 0xff)\0"
|
||||
"UTF-16 error: missing low surrogate at end\0"
|
||||
/* 25 */
|
||||
"UTF-16 error: invalid low surrogate\0"
|
||||
"UTF-16 error: isolated low surrogate\0"
|
||||
"UTF-8 error: illegal byte (0xfe or 0xff)\0"
|
||||
"UTF-16 error: missing low surrogate at end\0"
|
||||
/* 25 */
|
||||
"UTF-16 error: invalid low surrogate\0"
|
||||
"UTF-16 error: isolated low surrogate\0"
|
||||
"UTF-32 error: code points 0xd800-0xdfff are not defined\0"
|
||||
"UTF-32 error: code points greater than 0x10ffff are not defined\0"
|
||||
"UTF-32 error: code points greater than 0x10ffff are not defined\0"
|
||||
"bad data value\0"
|
||||
/* 30 */
|
||||
/* 30 */
|
||||
"bad length\0"
|
||||
"magic number missing\0"
|
||||
"pattern compiled in wrong mode: 8/16/32-bit error\0"
|
||||
"bad offset value\0"
|
||||
"bad option value\0"
|
||||
/* 35 */
|
||||
/* 35 */
|
||||
"bad offset into UTF string\0"
|
||||
"callout error code\0" /* Never returned by PCRE2 itself */
|
||||
"callout error code\0" /* Never returned by PCRE2 itself */
|
||||
"invalid data in workspace for DFA restart\0"
|
||||
"too much recursion for DFA matching\0"
|
||||
"backreference condition or recursion test not supported for DFA matching\0"
|
||||
/* 40 */
|
||||
/* 40 */
|
||||
"item unsupported for DFA matching\0"
|
||||
"workspace size exceeded in DFA matching\0"
|
||||
"internal error - pattern overwritten?\0"
|
||||
"bad JIT option\0"
|
||||
"JIT stack limit reached\0"
|
||||
/* 45 */
|
||||
/* 45 */
|
||||
"match limit exceeded\0"
|
||||
"no more memory\0"
|
||||
"unknown or unset substring\0"
|
||||
"unknown or unset substring\0"
|
||||
"NULL argument passed\0"
|
||||
"nested recursion at the same subject position\0"
|
||||
/* 50 */
|
||||
/* 50 */
|
||||
"recursion limit exceeded\0"
|
||||
"requested value is not set\0"
|
||||
;
|
||||
"requested value is not set\0"
|
||||
;
|
||||
|
||||
|
||||
/*************************************************
|
||||
* Return error message *
|
||||
*************************************************/
|
||||
|
||||
/* This function copies an error message into a buffer whose units are of an
|
||||
appropriate width. Error numbers are positive for compile-time errors, and
|
||||
negative for match-time errors (except for UTF errors), but the numbers are all
|
||||
/* This function copies an error message into a buffer whose units are of an
|
||||
appropriate width. Error numbers are positive for compile-time errors, and
|
||||
negative for match-time errors (except for UTF errors), but the numbers are all
|
||||
distinct.
|
||||
|
||||
Arguments:
|
||||
enumber error number
|
||||
buffer where to put the message (zero terminated)
|
||||
size size of the buffer
|
||||
|
||||
|
||||
Returns: length of message if all is well
|
||||
negative on error
|
||||
*/
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_get_error_message(int enumber, PCRE2_UCHAR *buffer, size_t size)
|
||||
|
@ -260,23 +260,23 @@ if (size == 0) return PCRE2_ERROR_NOMEMORY;
|
|||
if (enumber > COMPILE_ERROR_BASE) /* Compile error */
|
||||
{
|
||||
message = compile_error_texts;
|
||||
n = enumber - COMPILE_ERROR_BASE;
|
||||
}
|
||||
n = enumber - COMPILE_ERROR_BASE;
|
||||
}
|
||||
else /* Match or UTF error */
|
||||
{
|
||||
message = match_error_texts;
|
||||
n = -enumber;
|
||||
}
|
||||
|
||||
n = -enumber;
|
||||
}
|
||||
|
||||
for (; n > 0; n--)
|
||||
{
|
||||
while (*message++ != CHAR_NULL) {};
|
||||
if (*message == CHAR_NULL)
|
||||
{
|
||||
if (*message == CHAR_NULL)
|
||||
{
|
||||
sprintf(xbuff, "Internal error: no text for error %d", enumber);
|
||||
break;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; *message != 0; i++)
|
||||
{
|
||||
|
@ -287,9 +287,9 @@ for (i = 0; *message != 0; i++)
|
|||
}
|
||||
buffer[i] = *message++;
|
||||
}
|
||||
|
||||
|
||||
buffer[i] = 0;
|
||||
return i;
|
||||
return i;
|
||||
}
|
||||
|
||||
/* End of pcre2_error.c */
|
||||
|
|
|
@ -1553,11 +1553,11 @@ enum {
|
|||
/* This is used to skip a subpattern with a {0} quantifier */
|
||||
|
||||
OP_SKIPZERO, /* 162 */
|
||||
|
||||
|
||||
/* This is used to identify a DEFINE group during compilation so that it can
|
||||
be checked for having only one branch. It is changed to OP_FALSE before
|
||||
be checked for having only one branch. It is changed to OP_FALSE before
|
||||
compilation finishes. */
|
||||
|
||||
|
||||
OP_DEFINE, /* 163 */
|
||||
|
||||
/* This is not an opcode, but is used to check that tables indexed by opcode
|
||||
|
@ -1565,7 +1565,7 @@ enum {
|
|||
some in the past. */
|
||||
|
||||
OP_TABLE_LENGTH
|
||||
|
||||
|
||||
};
|
||||
|
||||
/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
|
||||
|
@ -1708,7 +1708,7 @@ in UTF-8 mode. The code that uses this table must know about such things. */
|
|||
1, 3, /* THEN, THEN_ARG */ \
|
||||
1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \
|
||||
1+IMM2_SIZE, 1, /* CLOSE, SKIPZERO */ \
|
||||
1 /* DEFINE */
|
||||
1 /* DEFINE */
|
||||
|
||||
/* A magic value for OP_RREF to indicate the "any recursion" condition. */
|
||||
|
||||
|
@ -1830,9 +1830,9 @@ extern const uint8_t PRIV(ucd_stage1)[];
|
|||
extern const uint16_t PRIV(ucd_stage2)[];
|
||||
extern const uint32_t PRIV(ucp_gbtable)[];
|
||||
extern const uint32_t PRIV(ucp_gentype)[];
|
||||
#ifdef SUPPORT_JIT
|
||||
#ifdef SUPPORT_JIT
|
||||
extern const int PRIV(ucp_typerange)[];
|
||||
#endif
|
||||
#endif
|
||||
extern const char *PRIV(unicode_version);
|
||||
extern const ucp_type_table PRIV(utt)[];
|
||||
extern const char PRIV(utt_names)[];
|
||||
|
|
|
@ -39,16 +39,16 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
*/
|
||||
|
||||
|
||||
/* This module contains mode-dependent macro and structure definitions. The
|
||||
/* This module contains mode-dependent macro and structure definitions. The
|
||||
file is #included by pcre2_internal.h if PCRE2_CODE_UNIT_WIDTH is defined.
|
||||
These mode-dependent items are kept in a separate file so that they can also be
|
||||
#included multiple times for different code unit widths by pcre2test in order
|
||||
to have access to the hidden structures at all supported widths.
|
||||
#included multiple times for different code unit widths by pcre2test in order
|
||||
to have access to the hidden structures at all supported widths.
|
||||
|
||||
Some of the mode-dependent macros are required at different widths for
|
||||
different parts of the pcre2test code (in particular, the included
|
||||
pcre_printint.c file). We undefine them here so that they can be re-defined for
|
||||
multiple inclusions. Not all of these are used in pcre2test, but it's easier
|
||||
multiple inclusions. Not all of these are used in pcre2test, but it's easier
|
||||
just to undefine them all. */
|
||||
|
||||
#undef ACROSSCHAR
|
||||
|
@ -93,7 +93,7 @@ request for an even bigger limit. For this reason, and also to make the code
|
|||
easier to maintain, the storing and loading of offsets from the compiled code
|
||||
unit string is now handled by the macros that are defined here.
|
||||
|
||||
The macros are controlled by the value of LINK_SIZE. This defaults to 2, but
|
||||
The macros are controlled by the value of LINK_SIZE. This defaults to 2, but
|
||||
values of 2 or 4 are also supported. */
|
||||
|
||||
/* ------------------- 8-bit support ------------------ */
|
||||
|
@ -173,14 +173,14 @@ values of 2 or 4 are also supported. */
|
|||
|
||||
#else
|
||||
#error Unsupported compiling mode
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
/* --------------- Other mode-specific macros ----------------- */
|
||||
|
||||
/* PCRE uses some other (at least) 16-bit quantities that do not change when
|
||||
the size of offsets changes. There are used for repeat counts and for other
|
||||
things such as capturing parenthesis numbers in back references.
|
||||
things such as capturing parenthesis numbers in back references.
|
||||
|
||||
Define the number of code units required to hold a 16-bit count/offset, and
|
||||
macros to load and store such a value. For reasons that I do not understand,
|
||||
|
@ -196,7 +196,7 @@ arithmetic results in a signed value. Hence the cast. */
|
|||
#else /* Code units are 16 or 32 bits */
|
||||
#define IMM2_SIZE 1
|
||||
#define GET2(a,n) a[n]
|
||||
#define PUT2(a,n,d) a[n] = d
|
||||
#define PUT2(a,n,d) a[n] = d
|
||||
#endif
|
||||
|
||||
/* Other macros that are different for 8-bit mode. The MAX_255 macro checks
|
||||
|
@ -346,7 +346,7 @@ because almost all calls are already within a block of UTF-8 only code. */
|
|||
/* Same as above, but it allows a fully customizable form. */
|
||||
#define ACROSSCHAR(condition, eptr, action) \
|
||||
while((condition) && ((eptr) & 0xc0) == 0x80) action
|
||||
|
||||
|
||||
/* Deposit a character into memory, returning the number of code units. */
|
||||
|
||||
#define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \
|
||||
|
@ -545,10 +545,10 @@ These are all no-ops since all UTF-32 characters fit into one pcre_uchar. */
|
|||
|
||||
/* ----------------------- HIDDEN STRUCTURES ----------------------------- */
|
||||
|
||||
/* NOTE: All these structures *must* start with a pcre2_memctl structure. The
|
||||
/* NOTE: All these structures *must* start with a pcre2_memctl structure. The
|
||||
code that uses them is simpler because it assumes this. */
|
||||
|
||||
/* The real general context structure. At present it holds only data for custom
|
||||
/* The real general context structure. At present it holds only data for custom
|
||||
memory control. */
|
||||
|
||||
typedef struct pcre2_real_general_context {
|
||||
|
@ -572,9 +572,9 @@ typedef struct pcre2_real_match_context {
|
|||
pcre2_memctl memctl;
|
||||
#ifdef HEAP_MATCH_RECURSE
|
||||
pcre2_memctl stack_memctl;
|
||||
#endif
|
||||
#endif
|
||||
int (*callout)(pcre2_callout_block *);
|
||||
void *callout_data;
|
||||
void *callout_data;
|
||||
uint32_t match_limit;
|
||||
uint32_t recursion_limit;
|
||||
} pcre2_real_match_context;
|
||||
|
@ -584,9 +584,9 @@ typedef struct pcre2_real_match_context {
|
|||
typedef struct pcre2_real_code {
|
||||
pcre2_memctl memctl; /* Memory control fields */
|
||||
const uint8_t *tables; /* The character tables */
|
||||
void *executable_jit; /* Pointer to JIT code */
|
||||
void *executable_jit; /* Pointer to JIT code */
|
||||
uint8_t start_bitmap[32]; /* Bitmap for starting code unit < 256 */
|
||||
size_t blocksize; /* Total (bytes) that was malloc-ed */
|
||||
size_t blocksize; /* Total (bytes) that was malloc-ed */
|
||||
uint32_t magic_number; /* Paranoid and endianness check */
|
||||
uint32_t compile_options; /* Options passed to pcre2_compile() */
|
||||
uint32_t overall_options; /* Options after processing the pattern */
|
||||
|
@ -596,10 +596,10 @@ typedef struct pcre2_real_code {
|
|||
uint32_t first_codeunit; /* Starting code unit */
|
||||
uint32_t last_codeunit; /* This codeunit must be seen */
|
||||
uint16_t bsr_convention; /* What \R matches */
|
||||
uint16_t newline_convention; /* What is a newline? */
|
||||
uint16_t newline_convention; /* What is a newline? */
|
||||
uint16_t max_lookbehind; /* Longest lookbehind (characters) */
|
||||
uint16_t minlength; /* Minimum length of match */
|
||||
uint16_t top_bracket; /* Highest numbered group */
|
||||
uint16_t minlength; /* Minimum length of match */
|
||||
uint16_t top_bracket; /* Highest numbered group */
|
||||
uint16_t top_backref; /* Highest numbered back reference */
|
||||
uint16_t name_entry_size; /* Size (code units) of table entries */
|
||||
uint16_t name_count; /* Number of name entries in the table */
|
||||
|
@ -614,10 +614,10 @@ typedef struct pcre2_real_match_data {
|
|||
int rc; /* The return code from the match */
|
||||
PCRE2_SIZE leftchar; /* Offset to leftmost code unit */
|
||||
PCRE2_SIZE rightchar; /* Offset to rightmost code unit */
|
||||
PCRE2_SIZE startchar; /* Offset to starting code unit */
|
||||
PCRE2_SPTR mark; /* Pointer to last mark */
|
||||
PCRE2_SIZE startchar; /* Offset to starting code unit */
|
||||
PCRE2_SPTR mark; /* Pointer to last mark */
|
||||
uint16_t oveccount; /* Number of pairs */
|
||||
PCRE2_SIZE ovector[1]; /* The first field */
|
||||
PCRE2_SIZE ovector[1]; /* The first field */
|
||||
} pcre2_real_match_data;
|
||||
|
||||
|
||||
|
@ -700,7 +700,7 @@ the system stack. */
|
|||
typedef struct ovecsave_frame {
|
||||
struct ovecsave_frame *next; /* Next frame on free chain */
|
||||
PCRE2_SIZE saved_ovec[1]; /* First vector element */
|
||||
} ovecsave_frame;
|
||||
} ovecsave_frame;
|
||||
|
||||
/* Structure for items in a linked list that represents an explicit recursive
|
||||
call within the pattern; used by pcre_match(). */
|
||||
|
@ -738,7 +738,7 @@ typedef struct match_block {
|
|||
pcre2_memctl memctl; /* For general use */
|
||||
#ifdef HEAP_MATCH_RECURSE
|
||||
pcre2_memctl stack_memctl; /* For "stack" frames */
|
||||
#endif
|
||||
#endif
|
||||
uint32_t match_call_count; /* As it says */
|
||||
uint32_t match_limit; /* As it says */
|
||||
uint32_t match_limit_recursion; /* As it says */
|
||||
|
@ -763,7 +763,7 @@ typedef struct match_block {
|
|||
PCRE2_SPTR start_match_ptr; /* Start of matched string */
|
||||
PCRE2_SPTR end_match_ptr; /* Subject position at end match */
|
||||
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
|
||||
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
|
||||
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
|
||||
PCRE2_SPTR mark; /* Mark pointer to pass back on success */
|
||||
PCRE2_SPTR nomatch_mark; /* Mark pointer to pass back on failure */
|
||||
PCRE2_SPTR once_target; /* Where to back up to for atomic groups */
|
||||
|
@ -778,7 +778,7 @@ typedef struct match_block {
|
|||
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
|
||||
eptrblock *eptrchain; /* Chain of eptrblocks for tail recursions */
|
||||
recursion_info *recursive; /* Linked list of recursion data */
|
||||
ovecsave_frame *ovecsave_chain; /* Linked list of free ovecsave blocks */
|
||||
ovecsave_frame *ovecsave_chain; /* Linked list of free ovecsave blocks */
|
||||
void *callout_data; /* To pass back to callouts */
|
||||
int (*callout)(pcre2_callout_block *); /* Callout function or NULL */
|
||||
#ifdef HEAP_MATCH_RECURSE
|
||||
|
@ -795,7 +795,7 @@ typedef struct dfa_match_block {
|
|||
PCRE2_SPTR start_subject ; /* Start of the subject string */
|
||||
PCRE2_SPTR end_subject; /* End of subject string */
|
||||
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
|
||||
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
|
||||
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
|
||||
const uint8_t *tables; /* Character tables */
|
||||
PCRE2_SIZE start_offset; /* The start offset value */
|
||||
uint32_t moptions; /* Match options */
|
||||
|
|
|
@ -72,9 +72,9 @@ Arguments:
|
|||
length length of subject string (may contain binary zeros)
|
||||
start_offset where to start in the subject string
|
||||
options option bits
|
||||
match_data points to a match_data block
|
||||
mcontext points to a match context
|
||||
jit_stack points to a JIT stack
|
||||
match_data points to a match_data block
|
||||
mcontext points to a match context
|
||||
jit_stack points to a JIT stack
|
||||
|
||||
Returns: > 0 => success; value is the number of ovector pairs filled
|
||||
= 0 => success, but ovector is not big enough
|
||||
|
|
|
@ -60,9 +60,9 @@ compilation of dftables.c, in which case the macro DFTABLES is defined. */
|
|||
/* This function builds a set of character tables for use by PCRE2 and returns
|
||||
a pointer to them. They are build using the ctype functions, and consequently
|
||||
their contents will depend upon the current locale setting. When compiled as
|
||||
part of the library, the store is obtained via a general context malloc, if
|
||||
supplied, but when DFTABLES is defined (when compiling the dftables auxiliary
|
||||
program) malloc() is used, and the function has a different name so as not to
|
||||
part of the library, the store is obtained via a general context malloc, if
|
||||
supplied, but when DFTABLES is defined (when compiling the dftables auxiliary
|
||||
program) malloc() is used, and the function has a different name so as not to
|
||||
clash with the prototype in pcre2.h.
|
||||
|
||||
Arguments: none when DFTABLES is defined
|
||||
|
|
|
@ -56,7 +56,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
(PCRE2_ANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
|
||||
PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \
|
||||
PCRE2_PARTIAL_SOFT)
|
||||
|
||||
|
||||
#define PUBLIC_JIT_MATCH_OPTIONS \
|
||||
(PCRE2_NO_UTF_CHECK|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY|\
|
||||
PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_SOFT|PCRE2_PARTIAL_HARD)
|
||||
|
@ -125,24 +125,24 @@ ovector length is always a multiple of 3. */
|
|||
/* This function is called only when it is known that the offset lies within
|
||||
the offsets that have so far been used in the match. Note that in caseless
|
||||
UTF-8 mode, the number of subject bytes matched may be different to the number
|
||||
of reference bytes. (In theory this could also happen in UTF-16 mode, but it
|
||||
of reference bytes. (In theory this could also happen in UTF-16 mode, but it
|
||||
seems unlikely.)
|
||||
|
||||
Arguments:
|
||||
offset index into the offset vector
|
||||
offset_top top of the used offset vector
|
||||
offset_top top of the used offset vector
|
||||
eptr pointer into the subject
|
||||
mb points to match block
|
||||
caseless TRUE if caseless
|
||||
lengthptr pointer for returning the length matched
|
||||
lengthptr pointer for returning the length matched
|
||||
|
||||
Returns: = 0 sucessful match; number of code units matched is set
|
||||
< 0 no match
|
||||
> 0 partial match
|
||||
> 0 partial match
|
||||
*/
|
||||
|
||||
static int
|
||||
match_ref(PCRE2_SIZE offset, PCRE2_SIZE offset_top, register PCRE2_SPTR eptr,
|
||||
match_ref(PCRE2_SIZE offset, PCRE2_SIZE offset_top, register PCRE2_SPTR eptr,
|
||||
match_block *mb, BOOL caseless, PCRE2_SIZE *lengthptr)
|
||||
{
|
||||
#if defined SUPPORT_UNICODE
|
||||
|
@ -153,7 +153,7 @@ register PCRE2_SPTR p;
|
|||
PCRE2_SIZE length;
|
||||
PCRE2_SPTR eptr_start = eptr;
|
||||
|
||||
/* Deal with an unset group. The default is no match, but there is an option to
|
||||
/* Deal with an unset group. The default is no match, but there is an option to
|
||||
match an empty string. */
|
||||
|
||||
if (offset >= offset_top || mb->ovector[offset] == PCRE2_UNSET)
|
||||
|
@ -164,7 +164,7 @@ if (offset >= offset_top || mb->ovector[offset] == PCRE2_UNSET)
|
|||
return 0; /* Match */
|
||||
}
|
||||
else return -1; /* No match */
|
||||
}
|
||||
}
|
||||
|
||||
/* Separate the caseless and UTF cases for speed. */
|
||||
|
||||
|
@ -217,7 +217,7 @@ if (caseless)
|
|||
if (eptr >= mb->end_subject) return 1; /* Partial match */
|
||||
cc = UCHAR21TEST(eptr);
|
||||
cp = UCHAR21TEST(p);
|
||||
if (TABLE_GET(cp, mb->lcc, cp) != TABLE_GET(cc, mb->lcc, cc))
|
||||
if (TABLE_GET(cp, mb->lcc, cp) != TABLE_GET(cc, mb->lcc, cc))
|
||||
return -1; /* No match */
|
||||
p++;
|
||||
eptr++;
|
||||
|
@ -345,7 +345,7 @@ argument of match(), which never changes. */
|
|||
}
|
||||
|
||||
|
||||
/* Structure for remembering the local variables in a private frame. Arrange it
|
||||
/* Structure for remembering the local variables in a private frame. Arrange it
|
||||
so as to minimize the number of holes. */
|
||||
|
||||
typedef struct heapframe {
|
||||
|
@ -364,7 +364,7 @@ typedef struct heapframe {
|
|||
PCRE2_SPTR Xpp;
|
||||
PCRE2_SPTR Xprev;
|
||||
PCRE2_SPTR Xsaved_eptr;
|
||||
|
||||
|
||||
eptrblock *Xeptrb;
|
||||
|
||||
PCRE2_SIZE Xlength;
|
||||
|
@ -377,7 +377,7 @@ typedef struct heapframe {
|
|||
uint32_t Xrdepth;
|
||||
uint32_t Xop;
|
||||
uint32_t Xsave_capture_last;
|
||||
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
uint32_t Xprop_value;
|
||||
int Xprop_type;
|
||||
|
@ -401,7 +401,7 @@ typedef struct heapframe {
|
|||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
PCRE2_UCHAR Xocchars[6];
|
||||
#endif
|
||||
#endif
|
||||
} heapframe;
|
||||
|
||||
#endif
|
||||
|
@ -414,9 +414,9 @@ typedef struct heapframe {
|
|||
/* When HEAP_MATCH_RECURSE is not defined, the match() function implements
|
||||
backtrack points by calling itself recursively in all but one case. The one
|
||||
special case is when processing OP_RECURSE, which specifies recursion in the
|
||||
pattern. The entire ovector must be saved and restored while processing
|
||||
OP_RECURSE. If the ovector is small enough, instead of calling match()
|
||||
directly, op_recurse_ovecsave() is called. This function uses the system stack
|
||||
pattern. The entire ovector must be saved and restored while processing
|
||||
OP_RECURSE. If the ovector is small enough, instead of calling match()
|
||||
directly, op_recurse_ovecsave() is called. This function uses the system stack
|
||||
to save the ovector while calling match() to process the pattern recursion. */
|
||||
|
||||
#ifndef HEAP_MATCH_RECURSE
|
||||
|
@ -425,7 +425,7 @@ to save the ovector while calling match() to process the pattern recursion. */
|
|||
op_recurse_ovecsave(). */
|
||||
|
||||
static int
|
||||
match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart,
|
||||
match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart,
|
||||
PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb, uint32_t rdepth);
|
||||
|
||||
|
||||
|
@ -433,7 +433,7 @@ match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart,
|
|||
* Process OP_RECURSE, stacking ovector *
|
||||
*************************************************/
|
||||
|
||||
/* When this function is called, mb->recursive has already been updated to
|
||||
/* When this function is called, mb->recursive has already been updated to
|
||||
point to a new recursion data block, and all its fields other than ovec_save
|
||||
have been set.
|
||||
|
||||
|
@ -447,9 +447,9 @@ Arguments:
|
|||
eptrb pointer to chain of blocks containing eptr at start of
|
||||
brackets - for testing for empty matches
|
||||
rdepth the recursion depth
|
||||
|
||||
|
||||
Returns: a match() return code
|
||||
*/
|
||||
*/
|
||||
|
||||
static int
|
||||
op_recurse_ovecsave(REGISTER PCRE2_SPTR eptr, PCRE2_SPTR callpat,
|
||||
|
@ -472,7 +472,7 @@ data and the last captured value. */
|
|||
do
|
||||
{
|
||||
if (cbegroup) mb->match_function_type = MATCH_CBEGROUP;
|
||||
rrc = match(eptr, callpat + PRIV(OP_lengths)[*callpat], mstart, offset_top,
|
||||
rrc = match(eptr, callpat + PRIV(OP_lengths)[*callpat], mstart, offset_top,
|
||||
mb, eptrb, rdepth + 1);
|
||||
memcpy(mb->ovector, new_recursive->ovec_save,
|
||||
mb->offset_end * sizeof(PCRE2_SIZE));
|
||||
|
@ -560,7 +560,7 @@ Returns: MATCH_MATCH if matched ) these values are >= 0
|
|||
*/
|
||||
|
||||
static int
|
||||
match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart,
|
||||
match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart,
|
||||
PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb, uint32_t rdepth)
|
||||
{
|
||||
/* These variables do not need to be preserved over recursion in this function,
|
||||
|
@ -1382,10 +1382,10 @@ for (;;)
|
|||
|
||||
case OP_FALSE:
|
||||
break;
|
||||
|
||||
|
||||
case OP_TRUE:
|
||||
condition = TRUE;
|
||||
break;
|
||||
break;
|
||||
|
||||
/* The condition is an assertion. Call match() to evaluate it - setting
|
||||
mb->match_function_type to MATCH_CONDASSERT causes it to stop at the end
|
||||
|
@ -1475,7 +1475,7 @@ for (;;)
|
|||
update the last used pointer. */
|
||||
|
||||
case OP_ASSERT_ACCEPT:
|
||||
if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
|
||||
if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
|
||||
|
||||
case OP_ACCEPT:
|
||||
case OP_END:
|
||||
|
@ -1735,7 +1735,7 @@ for (;;)
|
|||
|
||||
case OP_RECURSE:
|
||||
{
|
||||
ovecsave_frame *fr;
|
||||
ovecsave_frame *fr;
|
||||
recursion_info *ri;
|
||||
uint32_t recno;
|
||||
|
||||
|
@ -1762,15 +1762,15 @@ for (;;)
|
|||
|
||||
ecode += 1 + LINK_SIZE;
|
||||
|
||||
/* When we are using the system stack for match() recursion we can call a
|
||||
function that uses the system stack for preserving the ovector while
|
||||
/* When we are using the system stack for match() recursion we can call a
|
||||
function that uses the system stack for preserving the ovector while
|
||||
processing the pattern recursion, but only if the ovector is small
|
||||
enough. */
|
||||
|
||||
|
||||
#ifndef HEAP_MATCH_RECURSE
|
||||
if (mb->offset_end <= OP_RECURSE_STACK_SAVE_MAX)
|
||||
{
|
||||
rrc = op_recurse_ovecsave(eptr, callpat, mstart, offset_top, mb,
|
||||
rrc = op_recurse_ovecsave(eptr, callpat, mstart, offset_top, mb,
|
||||
eptrb, rdepth);
|
||||
mb->recursive = new_recursive.prevrec;
|
||||
if (rrc != MATCH_MATCH && rrc != MATCH_ACCEPT) RRETURN(rrc);
|
||||
|
@ -1785,10 +1785,10 @@ for (;;)
|
|||
}
|
||||
#endif
|
||||
/* If the ovector is too big, or if we are using the heap for match()
|
||||
recursion, we have to use the heap for saving the ovector. Used ovecsave
|
||||
frames are kept on a chain and re-used. This makes a small improvement in
|
||||
recursion, we have to use the heap for saving the ovector. Used ovecsave
|
||||
frames are kept on a chain and re-used. This makes a small improvement in
|
||||
execution time on Linux. */
|
||||
|
||||
|
||||
if (mb->ovecsave_chain != NULL)
|
||||
{
|
||||
new_recursive.ovec_save = mb->ovecsave_chain->saved_ovec;
|
||||
|
@ -1800,17 +1800,17 @@ for (;;)
|
|||
mb->offset_end * sizeof(PCRE2_SIZE), mb->memctl.memory_data));
|
||||
if (fr == NULL) RRETURN(PCRE2_ERROR_NOMEMORY);
|
||||
new_recursive.ovec_save = fr->saved_ovec;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
memcpy(new_recursive.ovec_save, mb->ovector,
|
||||
mb->offset_end * sizeof(PCRE2_SIZE));
|
||||
|
||||
|
||||
/* Do the recursion. After processing each alternative, restore the
|
||||
ovector data and the last captured value. This code has the same overall
|
||||
logic as the code in the op_recurse_ovecsave() function, but is adapted
|
||||
to use RMATCH/RRETURN and to release the heap block containing the saved
|
||||
ovector. */
|
||||
|
||||
|
||||
cbegroup = (*callpat >= OP_SBRA);
|
||||
do
|
||||
{
|
||||
|
@ -1821,51 +1821,51 @@ for (;;)
|
|||
mb->offset_end * sizeof(PCRE2_SIZE));
|
||||
mb->capture_last = new_recursive.saved_capture_last;
|
||||
mb->recursive = new_recursive.prevrec;
|
||||
|
||||
|
||||
if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
|
||||
{
|
||||
fr = (ovecsave_frame *)
|
||||
((uint8_t *)new_recursive.ovec_save - sizeof(ovecsave_frame *));
|
||||
fr->next = mb->ovecsave_chain;
|
||||
mb->ovecsave_chain = fr;
|
||||
|
||||
mb->ovecsave_chain = fr;
|
||||
|
||||
/* Set where we got to in the subject, and reset the start, in case
|
||||
it was changed by \K. This *is* propagated back out of a recursion,
|
||||
for Perl compatibility. */
|
||||
|
||||
|
||||
eptr = mb->end_match_ptr;
|
||||
mstart = mb->start_match_ptr;
|
||||
goto RECURSION_MATCHED; /* Exit loop; end processing */
|
||||
}
|
||||
|
||||
|
||||
/* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
|
||||
recursion; they cause a NOMATCH for the entire recursion. These codes
|
||||
are defined in a range that can be tested for. */
|
||||
|
||||
|
||||
if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
|
||||
{
|
||||
{
|
||||
rrc = MATCH_NOMATCH;
|
||||
goto RECURSION_RETURN;
|
||||
goto RECURSION_RETURN;
|
||||
}
|
||||
|
||||
|
||||
/* Any return code other than NOMATCH is an error. */
|
||||
|
||||
|
||||
if (rrc != MATCH_NOMATCH) goto RECURSION_RETURN;
|
||||
mb->recursive = &new_recursive;
|
||||
callpat += GET(callpat, 1);
|
||||
}
|
||||
while (*callpat == OP_ALT);
|
||||
|
||||
|
||||
RECURSION_RETURN:
|
||||
mb->recursive = new_recursive.prevrec;
|
||||
fr = (ovecsave_frame *)
|
||||
((uint8_t *)new_recursive.ovec_save - sizeof(ovecsave_frame *));
|
||||
fr->next = mb->ovecsave_chain;
|
||||
mb->ovecsave_chain = fr;
|
||||
mb->ovecsave_chain = fr;
|
||||
RRETURN(rrc);
|
||||
}
|
||||
|
||||
RECURSION_MATCHED:
|
||||
|
||||
RECURSION_MATCHED:
|
||||
break;
|
||||
|
||||
/* An alternation is the end of a branch; scan along to find the end of the
|
||||
|
@ -1942,7 +1942,7 @@ for (;;)
|
|||
mb->end_match_ptr = eptr; /* For ONCE_NC */
|
||||
mb->end_offset_top = offset_top;
|
||||
mb->start_match_ptr = mstart;
|
||||
if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
|
||||
if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
|
||||
RRETURN(MATCH_MATCH); /* Sets mb->mark */
|
||||
}
|
||||
|
||||
|
@ -1966,7 +1966,7 @@ for (;;)
|
|||
{
|
||||
mb->end_match_ptr = eptr;
|
||||
mb->start_match_ptr = mstart;
|
||||
if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
|
||||
if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
|
||||
RRETURN(MATCH_MATCH);
|
||||
}
|
||||
|
||||
|
@ -2010,7 +2010,7 @@ for (;;)
|
|||
mb->start_match_ptr = mstart; /* In case \K reset it */
|
||||
mb->end_match_ptr = eptr;
|
||||
mb->end_offset_top = offset_top;
|
||||
if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
|
||||
if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
|
||||
RRETURN(MATCH_KETRPOS);
|
||||
}
|
||||
|
||||
|
@ -2230,8 +2230,8 @@ for (;;)
|
|||
else
|
||||
{
|
||||
PCRE2_SPTR nextptr = eptr + 1;
|
||||
FORWARDCHARTEST(nextptr, mb->end_subject);
|
||||
if (nextptr > mb->last_used_ptr) mb->last_used_ptr = nextptr;
|
||||
FORWARDCHARTEST(nextptr, mb->end_subject);
|
||||
if (nextptr > mb->last_used_ptr) mb->last_used_ptr = nextptr;
|
||||
GETCHAR(c, eptr);
|
||||
if ((mb->poptions & PCRE2_UCP) != 0)
|
||||
{
|
||||
|
@ -2282,7 +2282,7 @@ for (;;)
|
|||
}
|
||||
else
|
||||
{
|
||||
if (eptr >= mb->last_used_ptr) mb->last_used_ptr = eptr + 1;
|
||||
if (eptr >= mb->last_used_ptr) mb->last_used_ptr = eptr + 1;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if ((mb->poptions & PCRE2_UCP) != 0)
|
||||
{
|
||||
|
@ -2297,7 +2297,7 @@ for (;;)
|
|||
#endif
|
||||
cur_is_word = MAX_255(*eptr)
|
||||
&& ((mb->ctypes[*eptr] & ctype_word) != 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Now see if the situation is what we want */
|
||||
|
@ -2689,7 +2689,7 @@ for (;;)
|
|||
|
||||
/* Match a back reference, possibly repeatedly. Look past the end of the
|
||||
item to see if there is repeat information following.
|
||||
|
||||
|
||||
The OP_REF and OP_REFI opcodes are used for a reference to a numbered group
|
||||
or to a non-duplicated named group. For a duplicated named group, OP_DNREF
|
||||
and OP_DNREFI are used. In this case we must scan the list of groups to
|
||||
|
@ -2705,7 +2705,7 @@ for (;;)
|
|||
|
||||
/* Initializing 'offset' avoids a compiler warning in the REF_REPEAT
|
||||
code. */
|
||||
|
||||
|
||||
offset = 0;
|
||||
while (count-- > 0)
|
||||
{
|
||||
|
@ -2721,7 +2721,7 @@ for (;;)
|
|||
caseless = op == OP_REFI;
|
||||
offset = GET2(ecode, 1) << 1; /* Doubled ref number */
|
||||
ecode += 1 + IMM2_SIZE;
|
||||
|
||||
|
||||
/* Set up for repetition, or handle the non-repeated case */
|
||||
|
||||
REF_REPEAT:
|
||||
|
@ -2750,7 +2750,7 @@ for (;;)
|
|||
break;
|
||||
|
||||
default: /* No repeat follows */
|
||||
{
|
||||
{
|
||||
int rc = match_ref(offset, offset_top, eptr, mb, caseless, &length);
|
||||
if (rc != 0)
|
||||
{
|
||||
|
@ -2758,7 +2758,7 @@ for (;;)
|
|||
CHECK_PARTIAL();
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
}
|
||||
}
|
||||
eptr += length;
|
||||
continue; /* With the main loop */
|
||||
}
|
||||
|
@ -2769,16 +2769,16 @@ for (;;)
|
|||
also continue if PCRE2_MATCH_UNSET_BACKREF is set, because this makes unset
|
||||
group be have as a zero-length group. For any other unset cases, carrying
|
||||
on will result in NOMATCH. */
|
||||
|
||||
|
||||
if (offset < offset_top && mb->ovector[offset] != PCRE2_UNSET)
|
||||
{
|
||||
{
|
||||
if (mb->ovector[offset] == mb->ovector[offset + 1]) continue;
|
||||
}
|
||||
else /* Group is not set */
|
||||
{
|
||||
if (min == 0 || (mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
|
||||
continue;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* First, ensure the minimum number of matches are present. We get back
|
||||
the length of the reference string explicitly rather than passing the
|
||||
|
@ -2787,7 +2787,7 @@ for (;;)
|
|||
for (i = 1; i <= min; i++)
|
||||
{
|
||||
PCRE2_SIZE slength;
|
||||
int rc = match_ref(offset, offset_top, eptr, mb, caseless, &slength);
|
||||
int rc = match_ref(offset, offset_top, eptr, mb, caseless, &slength);
|
||||
if (rc != 0)
|
||||
{
|
||||
if (rc > 0) eptr = mb->end_subject; /* Partial match */
|
||||
|
@ -2808,13 +2808,13 @@ for (;;)
|
|||
{
|
||||
for (fi = min;; fi++)
|
||||
{
|
||||
int rc;
|
||||
int rc;
|
||||
PCRE2_SIZE slength;
|
||||
RMATCH(eptr, ecode, offset_top, mb, eptrb, RM14);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (fi >= max) RRETURN(MATCH_NOMATCH);
|
||||
rc = match_ref(offset, offset_top, eptr, mb, caseless, &slength);
|
||||
if (rc != 0)
|
||||
if (rc != 0)
|
||||
{
|
||||
if (rc > 0) eptr = mb->end_subject; /* Partial match */
|
||||
CHECK_PARTIAL();
|
||||
|
@ -2825,12 +2825,12 @@ for (;;)
|
|||
/* Control never gets here */
|
||||
}
|
||||
|
||||
/* If maximizing, find the longest string and work backwards, as long as
|
||||
/* If maximizing, find the longest string and work backwards, as long as
|
||||
the matched lengths for each iteration are the same. */
|
||||
|
||||
else
|
||||
{
|
||||
BOOL samelengths = TRUE;
|
||||
BOOL samelengths = TRUE;
|
||||
pp = eptr;
|
||||
length = mb->ovector[offset+1] - mb->ovector[offset];
|
||||
|
||||
|
@ -2839,7 +2839,7 @@ for (;;)
|
|||
PCRE2_SIZE slength;
|
||||
int rc = match_ref(offset, offset_top, eptr, mb, caseless, &slength);
|
||||
|
||||
if (rc != 0)
|
||||
if (rc != 0)
|
||||
{
|
||||
/* Can't use CHECK_PARTIAL because we don't want to update eptr in
|
||||
the soft partial matching case. */
|
||||
|
@ -2857,14 +2857,14 @@ for (;;)
|
|||
eptr += slength;
|
||||
}
|
||||
|
||||
/* If the length matched for each repetition is the same as the length of
|
||||
the captured group, we can easily work backwards. This is the normal
|
||||
case. However, in caseless UTF-8 mode there are pairs of case-equivalent
|
||||
/* If the length matched for each repetition is the same as the length of
|
||||
the captured group, we can easily work backwards. This is the normal
|
||||
case. However, in caseless UTF-8 mode there are pairs of case-equivalent
|
||||
characters whose lengths (in terms of code units) differ. However, this
|
||||
is very rare, so we handle it by re-matching fewer and fewer times. */
|
||||
|
||||
|
||||
if (samelengths)
|
||||
{
|
||||
{
|
||||
while (eptr >= pp)
|
||||
{
|
||||
RMATCH(eptr, ecode, offset_top, mb, eptrb, RM15);
|
||||
|
@ -2872,20 +2872,20 @@ for (;;)
|
|||
eptr -= length;
|
||||
}
|
||||
}
|
||||
|
||||
/* The rare case of non-matching lengths. Re-scan the repetition for each
|
||||
|
||||
/* The rare case of non-matching lengths. Re-scan the repetition for each
|
||||
iteration. We know that match_ref() will succeed every time. */
|
||||
|
||||
|
||||
else
|
||||
{
|
||||
max = i;
|
||||
max = i;
|
||||
for (;;)
|
||||
{
|
||||
RMATCH(eptr, ecode, offset_top, mb, eptrb, RM68);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (eptr == pp) break; /* Failed after minimal repetition */
|
||||
eptr = pp;
|
||||
max--;
|
||||
max--;
|
||||
for (i = min; i < max; i++)
|
||||
{
|
||||
PCRE2_SIZE slength;
|
||||
|
@ -2893,8 +2893,8 @@ for (;;)
|
|||
eptr += slength;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
/* Control never gets here */
|
||||
|
@ -6417,20 +6417,20 @@ with different endianness. */
|
|||
|
||||
if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
|
||||
return PCRE2_ERROR_BADMODE;
|
||||
|
||||
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
|
||||
options variable for this function. Users of PCRE2 who are not calling the
|
||||
function directly would like to have a way of setting these flags, in the same
|
||||
|
||||
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
|
||||
options variable for this function. Users of PCRE2 who are not calling the
|
||||
function directly would like to have a way of setting these flags, in the same
|
||||
way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
|
||||
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
|
||||
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be
|
||||
transferred to the options for this function. The bits are guaranteed to be
|
||||
adjacent, but do not have the same values. This bit of Boolean trickery assumes
|
||||
that the match-time bits are not more significant than the flag bits. If by
|
||||
accident this is not the case, a compile-time division by zero error will
|
||||
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
|
||||
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be
|
||||
transferred to the options for this function. The bits are guaranteed to be
|
||||
adjacent, but do not have the same values. This bit of Boolean trickery assumes
|
||||
that the match-time bits are not more significant than the flag bits. If by
|
||||
accident this is not the case, a compile-time division by zero error will
|
||||
occur. */
|
||||
|
||||
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
|
||||
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
|
||||
#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
|
||||
options |= (re->flags & FF) / ((FF & -FF) / (OO & -OO));
|
||||
#undef FF
|
||||
|
@ -6541,7 +6541,7 @@ mb->match_limit = (mcontext->match_limit < re->limit_match)?
|
|||
mcontext->match_limit : re->limit_match;
|
||||
mb->match_limit_recursion = (mcontext->recursion_limit < re->limit_recursion)?
|
||||
mcontext->recursion_limit : re->limit_recursion;
|
||||
|
||||
|
||||
/* Pointers to the individual character tables */
|
||||
|
||||
mb->lcc = re->tables + lcc_offset;
|
||||
|
@ -6580,7 +6580,7 @@ switch(re->newline_convention)
|
|||
|
||||
default: return PCRE2_ERROR_INTERNAL;
|
||||
}
|
||||
|
||||
|
||||
/* If the expression has got more back references than the offsets supplied can
|
||||
hold, we get a temporary chunk of memory to use during the matching. Otherwise,
|
||||
we can use the vector supplied. The size of the ovector is three times the
|
||||
|
@ -6854,7 +6854,7 @@ for(;;)
|
|||
|
||||
mb->start_match_ptr = start_match;
|
||||
mb->start_used_ptr = start_match;
|
||||
mb->last_used_ptr = start_match;
|
||||
mb->last_used_ptr = start_match;
|
||||
mb->match_call_count = 0;
|
||||
mb->match_function_type = 0;
|
||||
mb->end_offset_top = 0;
|
||||
|
@ -6990,7 +6990,7 @@ while (mb->ovecsave_chain != NULL)
|
|||
ovecsave_frame *this = mb->ovecsave_chain;
|
||||
mb->ovecsave_chain = this->next;
|
||||
mb->memctl.free(this, mb->memctl.memory_data);
|
||||
}
|
||||
}
|
||||
|
||||
/* Fill in fields that are always returned in the match data. */
|
||||
|
||||
|
@ -7057,9 +7057,9 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
|
|||
match_data->ovector[0] = mb->start_match_ptr - mb->start_subject;
|
||||
match_data->ovector[1] = mb->end_match_ptr - mb->start_subject;
|
||||
}
|
||||
|
||||
|
||||
/* Set the remaining returned values */
|
||||
|
||||
|
||||
match_data->startchar = start_match - subject;
|
||||
match_data->leftchar = mb->start_used_ptr - subject;
|
||||
match_data->rightchar = ((mb->last_used_ptr > mb->end_match_ptr)?
|
||||
|
@ -7068,7 +7068,7 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
|
|||
}
|
||||
|
||||
/* Control gets here if there has been a partial match, an error, or if the
|
||||
overall match attempt has failed at all permitted starting positions. Any mark
|
||||
overall match attempt has failed at all permitted starting positions. Any mark
|
||||
data is in the nomatch_mark field. */
|
||||
|
||||
match_data->mark = mb->nomatch_mark;
|
||||
|
|
|
@ -72,10 +72,10 @@ return yield;
|
|||
*************************************************/
|
||||
|
||||
PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
|
||||
pcre2_match_data_create_from_pattern(pcre2_code *code,
|
||||
pcre2_match_data_create_from_pattern(pcre2_code *code,
|
||||
pcre2_general_context *gcontext)
|
||||
{
|
||||
return pcre2_match_data_create(((pcre2_real_code *)code)->top_bracket + 1,
|
||||
return pcre2_match_data_create(((pcre2_real_code *)code)->top_bracket + 1,
|
||||
gcontext);
|
||||
}
|
||||
|
||||
|
@ -88,7 +88,7 @@ return pcre2_match_data_create(((pcre2_real_code *)code)->top_bracket + 1,
|
|||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||
pcre2_match_data_free(pcre2_match_data *match_data)
|
||||
{
|
||||
if (match_data != NULL)
|
||||
if (match_data != NULL)
|
||||
match_data->memctl.free(match_data, match_data->memctl.memory_data);
|
||||
}
|
||||
|
||||
|
|
|
@ -60,9 +60,9 @@ http://unicode.org/unicode/reports/tr18/. */
|
|||
* Check for newline at given position *
|
||||
*************************************************/
|
||||
|
||||
/* This function is called only via the IS_NEWLINE macro, which does so only
|
||||
/* This function is called only via the IS_NEWLINE macro, which does so only
|
||||
when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
|
||||
newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the code unit
|
||||
newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the code unit
|
||||
pointed to by ptr is less than the end of the string.
|
||||
|
||||
Arguments:
|
||||
|
@ -76,7 +76,7 @@ Returns: TRUE or FALSE
|
|||
*/
|
||||
|
||||
BOOL
|
||||
PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr,
|
||||
PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr,
|
||||
uint32_t *lenptr, BOOL utf)
|
||||
{
|
||||
uint32_t c;
|
||||
|
@ -90,15 +90,15 @@ c = *ptr;
|
|||
|
||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||
{
|
||||
case CHAR_LF:
|
||||
*lenptr = 1;
|
||||
case CHAR_LF:
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
|
||||
case CHAR_CR:
|
||||
|
||||
case CHAR_CR:
|
||||
*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
default:
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
@ -111,8 +111,8 @@ else switch(c)
|
|||
#endif
|
||||
case CHAR_LF:
|
||||
case CHAR_VT:
|
||||
case CHAR_FF:
|
||||
*lenptr = 1;
|
||||
case CHAR_FF:
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
|
||||
case CHAR_CR:
|
||||
|
@ -121,25 +121,25 @@ else switch(c)
|
|||
|
||||
#ifndef EBCDIC
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
case CHAR_NEL:
|
||||
*lenptr = utf? 2 : 1;
|
||||
case CHAR_NEL:
|
||||
*lenptr = utf? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: /* PS */
|
||||
*lenptr = 3;
|
||||
return TRUE;
|
||||
|
||||
*lenptr = 3;
|
||||
return TRUE;
|
||||
|
||||
#else /* 16-bit or 32-bit code units */
|
||||
case CHAR_NEL:
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: /* PS */
|
||||
*lenptr = 1;
|
||||
case 0x2029: /* PS */
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
#endif
|
||||
#endif /* Not EBCDIC */
|
||||
|
||||
default:
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
@ -166,7 +166,7 @@ Returns: TRUE or FALSE
|
|||
*/
|
||||
|
||||
BOOL
|
||||
PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr,
|
||||
PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr,
|
||||
uint32_t *lenptr, BOOL utf)
|
||||
{
|
||||
uint32_t c;
|
||||
|
@ -190,11 +190,11 @@ if (type == NLTYPE_ANYCRLF) switch(c)
|
|||
*lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
case CHAR_CR:
|
||||
*lenptr = 1;
|
||||
case CHAR_CR:
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
|
||||
default:
|
||||
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
@ -211,31 +211,31 @@ else switch(c)
|
|||
#endif
|
||||
case CHAR_VT:
|
||||
case CHAR_FF:
|
||||
case CHAR_CR:
|
||||
*lenptr = 1;
|
||||
case CHAR_CR:
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
|
||||
#ifndef EBCDIC
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
case CHAR_NEL:
|
||||
*lenptr = utf? 2 : 1;
|
||||
case CHAR_NEL:
|
||||
*lenptr = utf? 2 : 1;
|
||||
return TRUE;
|
||||
|
||||
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: /* PS */
|
||||
*lenptr = 3;
|
||||
*lenptr = 3;
|
||||
return TRUE;
|
||||
|
||||
|
||||
#else /* 16-bit or 32-bit code units */
|
||||
case CHAR_NEL:
|
||||
case 0x2028: /* LS */
|
||||
case 0x2029: /* PS */
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
*lenptr = 1;
|
||||
return TRUE;
|
||||
#endif
|
||||
#endif /* Not EBCDIC */
|
||||
|
||||
default:
|
||||
default:
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -89,17 +89,17 @@ if (where == NULL) /* Requests field length */
|
|||
case PCRE2_INFO_NAMECOUNT:
|
||||
case PCRE2_INFO_NEWLINE:
|
||||
case PCRE2_INFO_RECURSIONLIMIT:
|
||||
return sizeof(uint32_t);
|
||||
return sizeof(uint32_t);
|
||||
|
||||
case PCRE2_INFO_FIRSTBITMAP:
|
||||
return sizeof(const uint8_t *);
|
||||
|
||||
case PCRE2_INFO_JITSIZE:
|
||||
case PCRE2_INFO_SIZE:
|
||||
return sizeof(size_t);
|
||||
return sizeof(size_t);
|
||||
|
||||
case PCRE2_INFO_NAMETABLE:
|
||||
return sizeof(PCRE2_SPTR);
|
||||
return sizeof(PCRE2_SPTR);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -41,8 +41,8 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
/* This module contains a PCRE private debugging function for printing out the
|
||||
internal form of a compiled regular expression, along with some supporting
|
||||
local functions. This source file is #included in pcre2test.c at each supported
|
||||
code unit width, with PCRE2_SUFFIX set appropriately, just like the functions
|
||||
local functions. This source file is #included in pcre2test.c at each supported
|
||||
code unit width, with PCRE2_SUFFIX set appropriately, just like the functions
|
||||
that comprise the library. */
|
||||
|
||||
|
||||
|
@ -82,9 +82,9 @@ Arguments:
|
|||
f file to write to
|
||||
ptr pointer to first code unit of the character
|
||||
utf TRUE if string is UTF (will be FALSE if UTF is not supported)
|
||||
|
||||
|
||||
Returns: number of additional code units used
|
||||
*/
|
||||
*/
|
||||
|
||||
static unsigned int
|
||||
print_char(FILE *f, PCRE2_SPTR ptr, BOOL utf)
|
||||
|
@ -105,7 +105,7 @@ if (utf)
|
|||
one_code_unit = (c & 0xfffff800u) != 0xd800u;
|
||||
#endif /* CODE_UNIT_WIDTH */
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Handle a valid one-code-unit character at any width. */
|
||||
|
||||
|
@ -115,10 +115,10 @@ if (one_code_unit)
|
|||
else if (c < 0x80) fprintf(f, "\\x%02x", c);
|
||||
else fprintf(f, "\\x{%02x}", c);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Code for invalid UTF code units and multi-unit UTF characters is different
|
||||
for each width. If UTF is not supported, control should never get here, but we
|
||||
/* Code for invalid UTF code units and multi-unit UTF characters is different
|
||||
for each width. If UTF is not supported, control should never get here, but we
|
||||
need a return statement to keep the compiler happy. */
|
||||
|
||||
#ifndef SUPPORT_UNICODE
|
||||
|
@ -134,10 +134,10 @@ if ((c & 0xc0) != 0xc0)
|
|||
{
|
||||
fprintf(f, "\\X{%x}", c); /* Invalid starting byte */
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int i;
|
||||
int i;
|
||||
int a = utf8_table4[c & 0x3f]; /* Number of additional bytes */
|
||||
int s = 6*a;
|
||||
c = (c & utf8_table3[a]) << s;
|
||||
|
@ -153,7 +153,7 @@ else
|
|||
}
|
||||
fprintf(f, "\\x{%x}", c);
|
||||
return a;
|
||||
}
|
||||
}
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||
|
||||
/* UTF-16: rather than swallow a low surrogate, just stop if we hit a bad one.
|
||||
|
@ -173,7 +173,7 @@ return 1;
|
|||
/* For UTF-32 we get here only for a malformed code unit, which should only
|
||||
occur if the sanity check has been turned off. Print it with \X instead of \x
|
||||
as an indication. */
|
||||
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
fprintf(f, "\\X{%x}", c);
|
||||
return 0;
|
||||
|
@ -187,15 +187,15 @@ return 0;
|
|||
* Print string as a list of code units *
|
||||
*************************************************/
|
||||
|
||||
/* This takes no account of UTF as it always prints each individual code unit.
|
||||
/* This takes no account of UTF as it always prints each individual code unit.
|
||||
The string is zero-terminated.
|
||||
|
||||
Arguments:
|
||||
f file to write to
|
||||
ptr point to the string
|
||||
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
*/
|
||||
|
||||
static void
|
||||
print_custring(FILE *f, PCRE2_SPTR ptr)
|
||||
|
@ -213,9 +213,9 @@ while (*ptr != '\0')
|
|||
* Find Unicode property name *
|
||||
*************************************************/
|
||||
|
||||
/* When there is no UTF/UCP support, the table of names does not exist. This
|
||||
function should not be called in such configurations, because a pattern that
|
||||
tries to use Unicode properties won't compile. Rather than put lots of #ifdefs
|
||||
/* When there is no UTF/UCP support, the table of names does not exist. This
|
||||
function should not be called in such configurations, because a pattern that
|
||||
tries to use Unicode properties won't compile. Rather than put lots of #ifdefs
|
||||
into the main code, however, we just put one into this function. */
|
||||
|
||||
static const char *
|
||||
|
@ -244,15 +244,15 @@ return "??";
|
|||
|
||||
/* "Normal" properties can be printed from tables. The PT_CLIST property is a
|
||||
pseudo-property that contains a pointer to a list of case-equivalent
|
||||
characters.
|
||||
characters.
|
||||
|
||||
Arguments:
|
||||
f file to write to
|
||||
code pointer in the compiled code
|
||||
before text to print before
|
||||
after text to print after
|
||||
|
||||
Returns: nothing
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
|
@ -281,14 +281,14 @@ else
|
|||
|
||||
/* The print_lengths flag controls whether offsets and lengths of items are
|
||||
printed. Lenths can be turned off from pcre2test so that automatic tests on
|
||||
bytecode can be written that do not depend on the value of LINK_SIZE.
|
||||
bytecode can be written that do not depend on the value of LINK_SIZE.
|
||||
|
||||
Arguments:
|
||||
re a compiled pattern
|
||||
f the file to write to
|
||||
print_lenghts show various lengths
|
||||
|
||||
Returns: nothing
|
||||
print_lenghts show various lengths
|
||||
|
||||
Returns: nothing
|
||||
*/
|
||||
|
||||
static void
|
||||
|
@ -460,7 +460,7 @@ for(;;)
|
|||
case OP_TYPEMINQUERY:
|
||||
case OP_TYPEPOSQUERY:
|
||||
fprintf(f, " %s ", flag);
|
||||
|
||||
|
||||
if (*code >= OP_TYPESTAR)
|
||||
{
|
||||
if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
|
||||
|
|
|
@ -39,7 +39,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
*/
|
||||
|
||||
/* This module contains internal functions for comparing and finding the length
|
||||
of strings. These are used instead of strcmp() etc because the standard
|
||||
of strings. These are used instead of strcmp() etc because the standard
|
||||
functions work only on 8-bit data. */
|
||||
|
||||
|
||||
|
@ -54,7 +54,7 @@ functions work only on 8-bit data. */
|
|||
* Compare two zero-terminated PCRE2 strings *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
/*
|
||||
Arguments:
|
||||
str1 first string
|
||||
str2 second string
|
||||
|
@ -80,7 +80,7 @@ return 0;
|
|||
* Compare zero-terminated PCRE2 & 8-bit strings *
|
||||
*************************************************/
|
||||
|
||||
/* As the 8-bit string is almost always a literal, its type is specified as
|
||||
/* As the 8-bit string is almost always a literal, its type is specified as
|
||||
const char *.
|
||||
|
||||
Arguments:
|
||||
|
@ -108,7 +108,7 @@ return 0;
|
|||
* Compare two PCRE2 strings, given a length *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
/*
|
||||
Arguments:
|
||||
str1 first string
|
||||
str2 second string
|
||||
|
@ -135,7 +135,7 @@ return 0;
|
|||
* Compare PCRE2 string to 8-bit string by length *
|
||||
*************************************************/
|
||||
|
||||
/* As the 8-bit string is almost always a literal, its type is specified as
|
||||
/* As the 8-bit string is almost always a literal, its type is specified as
|
||||
const char *.
|
||||
|
||||
Arguments:
|
||||
|
@ -164,7 +164,7 @@ return 0;
|
|||
* Find the length of a PCRE2 string *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
/*
|
||||
Argument: the string
|
||||
Returns: the length
|
||||
*/
|
||||
|
@ -185,9 +185,9 @@ return c;
|
|||
/* Arguments:
|
||||
str1 buffer to receive the string
|
||||
str2 8-bit string to be copied
|
||||
|
||||
|
||||
Returns: the number of code units used (excluding trailing zero)
|
||||
*/
|
||||
*/
|
||||
|
||||
int
|
||||
PRIV(strcpy_c8)(PCRE2_UCHAR *str1, const char *str2)
|
||||
|
|
|
@ -74,7 +74,7 @@ Arguments:
|
|||
code pointer to start of group (the bracket)
|
||||
startcode pointer to start of the whole pattern's code
|
||||
recurse_depth RECURSE depth
|
||||
utf UTF flag
|
||||
utf UTF flag
|
||||
|
||||
Returns: the minimum length
|
||||
-1 if \C in UTF-8 mode or (*ACCEPT) was encountered
|
||||
|
@ -388,10 +388,10 @@ for (;;)
|
|||
if ((re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
|
||||
{
|
||||
int count = GET2(cc, 1+IMM2_SIZE);
|
||||
PCRE2_UCHAR *slot =
|
||||
PCRE2_UCHAR *slot =
|
||||
(PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) +
|
||||
GET2(cc, 1) * re->name_entry_size;
|
||||
|
||||
GET2(cc, 1) * re->name_entry_size;
|
||||
|
||||
d = INT_MAX;
|
||||
while (count-- > 0)
|
||||
{
|
||||
|
@ -579,7 +579,7 @@ for (;;)
|
|||
*************************************************/
|
||||
|
||||
/* Given a character, set its first code unit's bit in the table, and also the
|
||||
corresponding bit for the other version of a letter if we are caseless.
|
||||
corresponding bit for the other version of a letter if we are caseless.
|
||||
|
||||
Arguments:
|
||||
re points to the regex block
|
||||
|
@ -590,20 +590,20 @@ Arguments:
|
|||
Returns: pointer after the character
|
||||
*/
|
||||
|
||||
static PCRE2_SPTR
|
||||
static PCRE2_SPTR
|
||||
set_table_bit(pcre2_real_code *re, PCRE2_SPTR p, BOOL caseless, BOOL utf)
|
||||
{
|
||||
uint32_t c = *p++; /* First code unit */
|
||||
(void)utf; /* Stop compiler warning when UTF not supported */
|
||||
|
||||
/* In 16-bit and 32-bit modes, code units greater than 0xff set the bit for
|
||||
/* In 16-bit and 32-bit modes, code units greater than 0xff set the bit for
|
||||
0xff. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
if (c > 0xff) SET_BIT(0xff); else
|
||||
if (c > 0xff) SET_BIT(0xff); else
|
||||
#endif
|
||||
|
||||
SET_BIT(c);
|
||||
SET_BIT(c);
|
||||
|
||||
/* In UTF-8 or UTF-16 mode, pick up the remaining code units in order to find
|
||||
the end of the character, even when caseless. */
|
||||
|
@ -617,7 +617,7 @@ if (utf)
|
|||
if ((c & 0xfc00) == 0xd800) GETUTF16INC(c, p);
|
||||
#endif
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* If caseless, handle the other case of the character. */
|
||||
|
||||
|
@ -669,7 +669,7 @@ static void
|
|||
set_type_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit)
|
||||
{
|
||||
register uint32_t c;
|
||||
for (c = 0; c < table_limit; c++)
|
||||
for (c = 0; c < table_limit; c++)
|
||||
re->start_bitmap[c] |= re->tables[c+cbits_offset+cbit_type];
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (table_limit == 32) return;
|
||||
|
@ -710,7 +710,7 @@ static void
|
|||
set_nottype_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit)
|
||||
{
|
||||
register uint32_t c;
|
||||
for (c = 0; c < table_limit; c++)
|
||||
for (c = 0; c < table_limit; c++)
|
||||
re->start_bitmap[c] |= ~(re->tables[c+cbits_offset+cbit_type]);
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmap[c] = 0xff;
|
||||
|
@ -724,10 +724,10 @@ if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmap[c] = 0xff;
|
|||
*************************************************/
|
||||
|
||||
/* This function scans a compiled unanchored expression recursively and
|
||||
attempts to build a bitmap of the set of possible starting code units whose
|
||||
values are less than 256. In 16-bit and 32-bit mode, values above 255 all cause
|
||||
attempts to build a bitmap of the set of possible starting code units whose
|
||||
values are less than 256. In 16-bit and 32-bit mode, values above 255 all cause
|
||||
the 255 bit to be set. When calling set[_not]_type_bits() in UTF-8 (sic) mode
|
||||
we pass a value of 16 rather than 32 as the final argument. (See comments in
|
||||
we pass a value of 16 rather than 32 as the final argument. (See comments in
|
||||
those functions for the reason.)
|
||||
|
||||
The SSB_CONTINUE return is useful for parenthesized groups in patterns such as
|
||||
|
@ -769,8 +769,8 @@ do
|
|||
while (try_next) /* Loop for items in this branch */
|
||||
{
|
||||
int rc;
|
||||
uint8_t *classmap = NULL;
|
||||
|
||||
uint8_t *classmap = NULL;
|
||||
|
||||
switch(*tcode)
|
||||
{
|
||||
/* If we reach something we don't understand, it means a new opcode has
|
||||
|
@ -854,31 +854,31 @@ do
|
|||
case OP_THEN:
|
||||
case OP_THEN_ARG:
|
||||
return SSB_FAIL;
|
||||
|
||||
|
||||
/* A "real" property test implies no starting bits, but the fake property
|
||||
PT_CLIST identifies a list of characters. These lists are short, as they
|
||||
are used for characters with more than one "other case", so there is no
|
||||
point in recognizing them for OP_NOTPROP. */
|
||||
|
||||
|
||||
case OP_PROP:
|
||||
if (tcode[1] != PT_CLIST) return SSB_FAIL;
|
||||
{
|
||||
{
|
||||
const uint32_t *p = PRIV(ucd_caseless_sets) + tcode[2];
|
||||
while ((c = *p++) < NOTACHAR)
|
||||
{
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (utf)
|
||||
{
|
||||
PCRE2_UCHAR buff[6];
|
||||
(void)PRIV(ord2utf)(c, buff);
|
||||
c = buff[0];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
if (c > 0xff) SET_BIT(0xff); else SET_BIT(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
try_next = FALSE;
|
||||
break;
|
||||
break;
|
||||
|
||||
/* We can ignore word boundary tests. */
|
||||
|
||||
|
@ -1032,14 +1032,14 @@ do
|
|||
SET_BIT(CHAR_HT);
|
||||
SET_BIT(CHAR_SPACE);
|
||||
|
||||
/* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set
|
||||
/* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set
|
||||
the bits for 0xA0 and for code units >= 255, independently of UTF. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
SET_BIT(0xA0);
|
||||
SET_BIT(0xFF);
|
||||
#else
|
||||
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
||||
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
||||
units of horizontal space characters. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
|
@ -1052,7 +1052,7 @@ do
|
|||
}
|
||||
else
|
||||
#endif
|
||||
/* For the 8-bit library not in UTF-8 mode, set the bit for 0xA0, unless
|
||||
/* For the 8-bit library not in UTF-8 mode, set the bit for 0xA0, unless
|
||||
the code is EBCDIC. */
|
||||
{
|
||||
#ifndef EBCDIC
|
||||
|
@ -1060,7 +1060,7 @@ do
|
|||
#endif /* Not EBCDIC */
|
||||
}
|
||||
#endif /* 8-bit support */
|
||||
|
||||
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
|
@ -1071,16 +1071,16 @@ do
|
|||
SET_BIT(CHAR_FF);
|
||||
SET_BIT(CHAR_CR);
|
||||
|
||||
/* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set
|
||||
/* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set
|
||||
the bits for NEL and for code units >= 255, independently of UTF. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
SET_BIT(CHAR_NEL);
|
||||
SET_BIT(0xFF);
|
||||
#else
|
||||
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
||||
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
||||
units of vertical space characters. */
|
||||
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
|
@ -1093,8 +1093,8 @@ do
|
|||
{
|
||||
SET_BIT(CHAR_NEL);
|
||||
}
|
||||
#endif /* 8-bit support */
|
||||
|
||||
#endif /* 8-bit support */
|
||||
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
|
@ -1166,7 +1166,7 @@ do
|
|||
case OP_ANY:
|
||||
case OP_ALLANY:
|
||||
return SSB_FAIL;
|
||||
|
||||
|
||||
case OP_HSPACE:
|
||||
SET_BIT(CHAR_HT);
|
||||
SET_BIT(CHAR_SPACE);
|
||||
|
@ -1178,7 +1178,7 @@ do
|
|||
SET_BIT(0xA0);
|
||||
SET_BIT(0xFF);
|
||||
#else
|
||||
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
||||
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
||||
units of horizontal space characters. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
|
@ -1191,7 +1191,7 @@ do
|
|||
}
|
||||
else
|
||||
#endif
|
||||
/* For the 8-bit library not in UTF-8 mode, set the bit for 0xA0, unless
|
||||
/* For the 8-bit library not in UTF-8 mode, set the bit for 0xA0, unless
|
||||
the code is EBCDIC. */
|
||||
{
|
||||
#ifndef EBCDIC
|
||||
|
@ -1208,16 +1208,16 @@ do
|
|||
SET_BIT(CHAR_FF);
|
||||
SET_BIT(CHAR_CR);
|
||||
|
||||
/* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set
|
||||
/* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set
|
||||
the bits for NEL and for code units >= 255, independently of UTF. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
SET_BIT(CHAR_NEL);
|
||||
SET_BIT(0xFF);
|
||||
#else
|
||||
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
||||
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
||||
units of vertical space characters. */
|
||||
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
{
|
||||
|
@ -1230,7 +1230,7 @@ do
|
|||
{
|
||||
SET_BIT(CHAR_NEL);
|
||||
}
|
||||
#endif /* 8-bit support */
|
||||
#endif /* 8-bit support */
|
||||
break;
|
||||
|
||||
case OP_NOT_DIGIT:
|
||||
|
@ -1260,8 +1260,8 @@ do
|
|||
|
||||
tcode += 2;
|
||||
break;
|
||||
|
||||
/* Extended class: if there are any property checks, or if this is a
|
||||
|
||||
/* Extended class: if there are any property checks, or if this is a
|
||||
negative XCLASS without a map, give up. If there are no property checks,
|
||||
there must be wide characters on the XCLASS list, because otherwise an
|
||||
XCLASS would not have been created. This means that code points >= 255
|
||||
|
@ -1270,19 +1270,19 @@ do
|
|||
#ifdef SUPPORT_WIDE_CHARS
|
||||
case OP_XCLASS:
|
||||
if ((tcode[1 + LINK_SIZE] & XCL_HASPROP) != 0 ||
|
||||
(tcode[1 + LINK_SIZE] & (XCL_MAP|XCL_NOT)) == XCL_NOT)
|
||||
(tcode[1 + LINK_SIZE] & (XCL_MAP|XCL_NOT)) == XCL_NOT)
|
||||
return SSB_FAIL;
|
||||
|
||||
|
||||
/* We have a positive XCLASS or a negative one without a map. Set up the
|
||||
map pointer if there is one, and fall through. */
|
||||
|
||||
|
||||
classmap = ((tcode[1 + LINK_SIZE] & XCL_MAP) == 0)? NULL :
|
||||
(uint8_t *)(tcode + 1 + LINK_SIZE + 1);
|
||||
#endif
|
||||
|
||||
/* Enter here for a negative non-XCLASS. In the 8-bit library, if we are
|
||||
in UTF mode, any byte with a value >= 0xc4 is a potentially valid starter
|
||||
because it starts a character with a value > 255. In 8-bit non-UTF mode,
|
||||
because it starts a character with a value > 255. In 8-bit non-UTF mode,
|
||||
there is no difference between CLASS and NCLASS. In all other wide
|
||||
character modes, set the 0xFF bit to indicate code units >= 255. */
|
||||
|
||||
|
@ -1298,26 +1298,26 @@ do
|
|||
#endif
|
||||
/* Fall through */
|
||||
|
||||
/* Enter here for a positive non-XCLASS. If we have fallen through from
|
||||
an XCLASS, classmap will already be set; just advance the code pointer.
|
||||
/* Enter here for a positive non-XCLASS. If we have fallen through from
|
||||
an XCLASS, classmap will already be set; just advance the code pointer.
|
||||
Otherwise, set up classmap for a a non-XCLASS and advance past it. */
|
||||
|
||||
|
||||
case OP_CLASS:
|
||||
if (*tcode == OP_XCLASS) tcode += GET(tcode, 1); else
|
||||
{
|
||||
{
|
||||
classmap = (uint8_t *)(++tcode);
|
||||
tcode += 32 / sizeof(PCRE2_UCHAR);
|
||||
}
|
||||
|
||||
|
||||
/* When wide characters are supported, classmap may be NULL. In UTF-8
|
||||
(sic) mode, the bits in a class bit map correspond to character values,
|
||||
not to byte values. However, the bit map we are constructing is for byte
|
||||
values. So we have to do a conversion for characters whose code point is
|
||||
values. So we have to do a conversion for characters whose code point is
|
||||
greater than 127. In fact, there are only two possible starting bytes for
|
||||
characters in the range 128 - 255. */
|
||||
|
||||
|
||||
if (classmap != NULL)
|
||||
{
|
||||
{
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (utf)
|
||||
{
|
||||
|
@ -1334,11 +1334,11 @@ do
|
|||
}
|
||||
else
|
||||
#endif
|
||||
/* In all modes except UTF-8, the two bit maps are compatible. */
|
||||
|
||||
/* In all modes except UTF-8, the two bit maps are compatible. */
|
||||
|
||||
{
|
||||
for (c = 0; c < 32; c++) re->start_bitmap[c] |= classmap[c];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Act on what follows the class. For a zero minimum repeat, continue;
|
||||
|
@ -1384,13 +1384,13 @@ return yield;
|
|||
*************************************************/
|
||||
|
||||
/* This function is handed a compiled expression that it must study to produce
|
||||
information that will speed up the matching.
|
||||
information that will speed up the matching.
|
||||
|
||||
Argument: points to the compiled expression
|
||||
Returns: 0 normally; non-zero should never normally occur
|
||||
1 unknown opcode in set_start_bits
|
||||
2 missing capturing bracket
|
||||
3 unknown opcode in find_minlength
|
||||
3 unknown opcode in find_minlength
|
||||
*/
|
||||
|
||||
int
|
||||
|
@ -1402,7 +1402,7 @@ BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
|
|||
|
||||
/* Find start of compiled code */
|
||||
|
||||
code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) +
|
||||
code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) +
|
||||
re->name_entry_size * re->name_count;
|
||||
|
||||
/* For an anchored pattern, or an unanchored pattern that has a first code
|
||||
|
@ -1422,17 +1422,17 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0 &&
|
|||
switch(min = find_minlength(re, code, code, 0, utf))
|
||||
{
|
||||
case -1: /* \C in UTF mode or (*ACCEPT) was encountered */
|
||||
break;
|
||||
|
||||
break;
|
||||
|
||||
case -2:
|
||||
return 2; /* missing capturing bracket */
|
||||
|
||||
|
||||
case -3:
|
||||
return 3; /* unrecognized opcode */
|
||||
|
||||
|
||||
default:
|
||||
re->minlength = min;
|
||||
break;
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -81,7 +81,7 @@ for (entry = first; entry <= last; entry += entrysize)
|
|||
{
|
||||
uint16_t n = GET2(entry, 0);
|
||||
if (n < match_data->oveccount && match_data->ovector[n*2] != PCRE2_UNSET)
|
||||
return pcre2_substring_copy_bynumber(match_data, n, buffer, sizeptr);
|
||||
return pcre2_substring_copy_bynumber(match_data, n, buffer, sizeptr);
|
||||
}
|
||||
return PCRE2_ERROR_NOSUBSTRING;
|
||||
}
|
||||
|
@ -108,7 +108,7 @@ Returns: if successful: 0
|
|||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_copy_bynumber(pcre2_match_data *match_data,
|
||||
pcre2_substring_copy_bynumber(pcre2_match_data *match_data,
|
||||
unsigned int stringnumber, PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
PCRE2_SIZE left, right;
|
||||
|
@ -119,7 +119,7 @@ if (stringnumber >= match_data->oveccount ||
|
|||
(left = match_data->ovector[stringnumber*2]) == PCRE2_UNSET)
|
||||
return PCRE2_ERROR_NOSUBSTRING;
|
||||
right = match_data->ovector[stringnumber*2+1];
|
||||
if (right - left + 1 > *sizeptr) return PCRE2_ERROR_NOMEMORY;
|
||||
if (right - left + 1 > *sizeptr) return PCRE2_ERROR_NOMEMORY;
|
||||
while (left < right) buffer[p++] = subject[left++];
|
||||
buffer[p] = 0;
|
||||
*sizeptr = p;
|
||||
|
@ -140,7 +140,7 @@ Arguments:
|
|||
match_data pointer to match_data
|
||||
stringname the name of the required substring
|
||||
stringptr where to put the pointer to the new memory
|
||||
sizeptr where to put the length of the substring
|
||||
sizeptr where to put the length of the substring
|
||||
|
||||
Returns: if successful: zero
|
||||
if not successful, a negative value:
|
||||
|
@ -162,7 +162,7 @@ for (entry = first; entry <= last; entry += entrysize)
|
|||
{
|
||||
uint16_t n = GET2(entry, 0);
|
||||
if (n < match_data->oveccount && match_data->ovector[n*2] != PCRE2_UNSET)
|
||||
return pcre2_substring_get_bynumber(match_data, n, stringptr, sizeptr);
|
||||
return pcre2_substring_get_bynumber(match_data, n, stringptr, sizeptr);
|
||||
}
|
||||
return PCRE2_ERROR_NOSUBSTRING;
|
||||
}
|
||||
|
@ -180,7 +180,7 @@ Arguments:
|
|||
match_data points to match data
|
||||
stringnumber the number of the required substring
|
||||
stringptr where to put a pointer to the new memory
|
||||
sizeptr where to put the size of the substring
|
||||
sizeptr where to put the size of the substring
|
||||
|
||||
Returns: if successful: zero
|
||||
if not successful a negative error code:
|
||||
|
@ -189,7 +189,7 @@ Returns: if successful: zero
|
|||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_get_bynumber(pcre2_match_data *match_data,
|
||||
pcre2_substring_get_bynumber(pcre2_match_data *match_data,
|
||||
unsigned int stringnumber, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
|
||||
{
|
||||
PCRE2_SIZE left, right;
|
||||
|
@ -204,8 +204,8 @@ if (stringnumber >= match_data->oveccount ||
|
|||
return PCRE2_ERROR_NOSUBSTRING;
|
||||
right = match_data->ovector[stringnumber*2+1];
|
||||
|
||||
block = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
|
||||
(right-left+1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)match_data);
|
||||
block = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
|
||||
(right-left+1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)match_data);
|
||||
if (block == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
|
||||
yield = (PCRE2_UCHAR *)((char *)block + sizeof(pcre2_memctl));
|
||||
|
@ -222,7 +222,7 @@ return 0;
|
|||
* Free memory obtained by get_substring *
|
||||
*************************************************/
|
||||
|
||||
/*
|
||||
/*
|
||||
Argument: the result of a previous pcre2_substring_get_byxxx()
|
||||
Returns: nothing
|
||||
*/
|
||||
|
@ -246,7 +246,7 @@ permits duplicate names, the first substring that is set is chosen.
|
|||
Arguments:
|
||||
match_data pointer to match data
|
||||
stringname the name of the required substring
|
||||
sizeptr where to put the length
|
||||
sizeptr where to put the length
|
||||
|
||||
Returns: 0 if successful, else a negative error number
|
||||
*/
|
||||
|
@ -265,7 +265,7 @@ for (entry = first; entry <= last; entry += entrysize)
|
|||
{
|
||||
uint16_t n = GET2(entry, 0);
|
||||
if (n < match_data->oveccount && match_data->ovector[n*2] != PCRE2_UNSET)
|
||||
return pcre2_substring_length_bynumber(match_data, n, sizeptr);
|
||||
return pcre2_substring_length_bynumber(match_data, n, sizeptr);
|
||||
}
|
||||
return PCRE2_ERROR_NOSUBSTRING;
|
||||
}
|
||||
|
@ -281,7 +281,7 @@ return PCRE2_ERROR_NOSUBSTRING;
|
|||
Arguments:
|
||||
match_data pointer to match data
|
||||
stringnumber the number of the required substring
|
||||
sizeptr where to put the length
|
||||
sizeptr where to put the length
|
||||
|
||||
Returns: 0 if successful, else a negative error number
|
||||
*/
|
||||
|
@ -296,7 +296,7 @@ if (stringnumber >= match_data->oveccount ||
|
|||
return PCRE2_ERROR_NOSUBSTRING;
|
||||
*sizeptr = match_data->ovector[stringnumber*2 + 1] -
|
||||
match_data->ovector[stringnumber*2];
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -307,23 +307,23 @@ return 0;
|
|||
|
||||
/* This function gets one chunk of memory and builds a list of pointers and all
|
||||
the captured substrings in it. A NULL pointer is put on the end of the list.
|
||||
The substrings are zero-terminated, but also, if the final argument is
|
||||
non-NULL, a list of lengths is also returned. This allows binary data to be
|
||||
The substrings are zero-terminated, but also, if the final argument is
|
||||
non-NULL, a list of lengths is also returned. This allows binary data to be
|
||||
handled.
|
||||
|
||||
Arguments:
|
||||
match_data points to the match data
|
||||
listptr set to point to the list of pointers
|
||||
lengthsptr set to point to the list of lengths (may be NULL)
|
||||
lengthsptr set to point to the list of lengths (may be NULL)
|
||||
|
||||
Returns: if successful: 0
|
||||
if not successful, a negative error code:
|
||||
PCRE2_ERROR_NOMEMORY: failed to get memory,
|
||||
or a match failure code
|
||||
or a match failure code
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_list_get(pcre2_match_data *match_data, PCRE2_UCHAR ***listptr,
|
||||
pcre2_substring_list_get(pcre2_match_data *match_data, PCRE2_UCHAR ***listptr,
|
||||
PCRE2_SIZE **lengthsptr)
|
||||
{
|
||||
int i, count, count2;
|
||||
|
@ -343,22 +343,22 @@ if (lengthsptr != NULL) size += sizeof(PCRE2_SIZE) * count; /* For lengths */
|
|||
|
||||
for (i = 0; i < count2; i += 2)
|
||||
size += sizeof(PCRE2_UCHAR *) + CU2BYTES(ovector[i+1] - ovector[i] + 1);
|
||||
memp = PRIV(memctl_malloc)(size, (pcre2_memctl *)match_data);
|
||||
memp = PRIV(memctl_malloc)(size, (pcre2_memctl *)match_data);
|
||||
if (memp == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
|
||||
*listptr = listp = (PCRE2_UCHAR **)((char *)memp + sizeof(pcre2_memctl));
|
||||
lensp = (PCRE2_SIZE *)((char *)listp + sizeof(PCRE2_UCHAR *) * (count + 1));
|
||||
|
||||
|
||||
if (lengthsptr == NULL)
|
||||
{
|
||||
sp = (PCRE2_UCHAR *)lensp;
|
||||
sp = (PCRE2_UCHAR *)lensp;
|
||||
lensp = NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
*lengthsptr = lensp;
|
||||
sp = (PCRE2_UCHAR *)((char *)lensp + sizeof(PCRE2_SIZE) * count);
|
||||
}
|
||||
{
|
||||
*lengthsptr = lensp;
|
||||
sp = (PCRE2_UCHAR *)((char *)lensp + sizeof(PCRE2_SIZE) * count);
|
||||
}
|
||||
|
||||
for (i = 0; i < count2; i += 2)
|
||||
{
|
||||
|
@ -398,9 +398,9 @@ memctl->free(memctl, memctl->memory_data);
|
|||
* Find (multiple) entries for named string *
|
||||
*************************************************/
|
||||
|
||||
/* This function scans the nametable for a given name, using binary chop. It
|
||||
returns either two pointers to the entries in the table, or, if no pointers are
|
||||
given, the number of a group with the given name. If duplicate names are
|
||||
/* This function scans the nametable for a given name, using binary chop. It
|
||||
returns either two pointers to the entries in the table, or, if no pointers are
|
||||
given, the number of a group with the given name. If duplicate names are
|
||||
permitted, this may not be unique.
|
||||
|
||||
Arguments:
|
||||
|
@ -428,11 +428,11 @@ while (top > bot)
|
|||
uint16_t mid = (top + bot) / 2;
|
||||
PCRE2_SPTR entry = nametable + entrysize*mid;
|
||||
int c = PRIV(strcmp)(stringname, entry + IMM2_SIZE);
|
||||
if (c == 0)
|
||||
if (c == 0)
|
||||
{
|
||||
PCRE2_SPTR first;
|
||||
PCRE2_SPTR last;
|
||||
PCRE2_SPTR lastentry;
|
||||
PCRE2_SPTR lastentry;
|
||||
if (firstptr == NULL) return GET2(entry, 0);
|
||||
lastentry = nametable + entrysize * (code->name_count - 1);
|
||||
first = last = entry;
|
||||
|
@ -447,7 +447,7 @@ while (top > bot)
|
|||
last += entrysize;
|
||||
}
|
||||
*firstptr = first;
|
||||
*lastptr = last;
|
||||
*lastptr = last;
|
||||
return entrysize;
|
||||
}
|
||||
if (c > 0) bot = mid + 1; else top = mid;
|
||||
|
@ -462,7 +462,7 @@ return PCRE2_ERROR_NOSUBSTRING;
|
|||
*************************************************/
|
||||
|
||||
/* This function is a convenience wrapper for pcre2_substring_nametable_scan()
|
||||
when it is known that names are unique. If there are duplicate names, it is not
|
||||
when it is known that names are unique. If there are duplicate names, it is not
|
||||
defined which number is returned.
|
||||
|
||||
Arguments:
|
||||
|
@ -474,7 +474,7 @@ Returns: the number of the named parenthesis, or a negative number
|
|||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_substring_number_from_name(const pcre2_code *code,
|
||||
pcre2_substring_number_from_name(const pcre2_code *code,
|
||||
PCRE2_SPTR stringname)
|
||||
{
|
||||
return pcre2_substring_nametable_scan(code, stringname, NULL, NULL);
|
||||
|
|
|
@ -232,7 +232,7 @@ enum {
|
|||
ucp_Takri,
|
||||
/* New for Unicode 7.0.0: */
|
||||
ucp_Bassa_Vah,
|
||||
ucp_Caucasian_Albanian,
|
||||
ucp_Caucasian_Albanian,
|
||||
ucp_Duployan,
|
||||
ucp_Elbasan,
|
||||
ucp_Grantha,
|
||||
|
|
|
@ -154,11 +154,11 @@ for (p = string; length-- > 0; p++)
|
|||
*erroroffset = (int)(p - string);
|
||||
switch(ab - length)
|
||||
{
|
||||
case 1: return PCRE2_ERROR_UTF8_ERR1;
|
||||
case 2: return PCRE2_ERROR_UTF8_ERR2;
|
||||
case 3: return PCRE2_ERROR_UTF8_ERR3;
|
||||
case 4: return PCRE2_ERROR_UTF8_ERR4;
|
||||
case 5: return PCRE2_ERROR_UTF8_ERR5;
|
||||
case 1: return PCRE2_ERROR_UTF8_ERR1;
|
||||
case 2: return PCRE2_ERROR_UTF8_ERR2;
|
||||
case 3: return PCRE2_ERROR_UTF8_ERR3;
|
||||
case 4: return PCRE2_ERROR_UTF8_ERR4;
|
||||
case 5: return PCRE2_ERROR_UTF8_ERR5;
|
||||
}
|
||||
}
|
||||
length -= ab; /* Length remaining */
|
||||
|
@ -314,7 +314,7 @@ return 0;
|
|||
|
||||
/* ----------------- Check a UTF-16 string ----------------- */
|
||||
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||
|
||||
/* There's not so much work, nor so many errors, for UTF-16.
|
||||
PCRE2_ERROR_UTF16_ERR1 Missing low surrogate at the end of the string
|
||||
|
|
|
@ -60,7 +60,7 @@ might contain codepoints above 255 and/or Unicode properties.
|
|||
Arguments:
|
||||
c the character
|
||||
data points to the flag code unit of the XCLASS data
|
||||
utf TRUE if in UTF mode
|
||||
utf TRUE if in UTF mode
|
||||
|
||||
Returns: TRUE if character matches, else FALSE
|
||||
*/
|
||||
|
@ -261,7 +261,7 @@ while ((t = *data++) != XCL_END)
|
|||
data += 2;
|
||||
}
|
||||
#else
|
||||
(void)utf; /* Avoid compiler warning */
|
||||
(void)utf; /* Avoid compiler warning */
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@ pcre2sample documentation for a short discussion ("man pcre2sample" if you have
|
|||
the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is
|
||||
incompatible with the original PCRE API.
|
||||
|
||||
There are actually three libraries, each supporting a different code unit
|
||||
There are actually three libraries, each supporting a different code unit
|
||||
width. This demonstration program uses the 8-bit library.
|
||||
|
||||
In Unix-like environments, if PCRE2 is installed in your standard system
|
||||
|
@ -39,8 +39,8 @@ the following line. */
|
|||
|
||||
/* #define PCRE2_STATIC */
|
||||
|
||||
/* This macro must be defined before including pcre2.h. For a program that uses
|
||||
only one code unit width, it makes it possible to use generic function names
|
||||
/* This macro must be defined before including pcre2.h. For a program that uses
|
||||
only one code unit width, it makes it possible to use generic function names
|
||||
such as pcre2_compile(). */
|
||||
|
||||
#define PCRE2_CODE_UNIT_WIDTH 8
|
||||
|
@ -124,7 +124,7 @@ subject_length = strlen((char *)subject);
|
|||
|
||||
re = pcre2_compile(
|
||||
pattern, /* the pattern */
|
||||
PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
|
||||
PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
|
||||
0, /* default options */
|
||||
&errornumber, /* for error number */
|
||||
&erroroffset, /* for error offset */
|
||||
|
@ -134,9 +134,9 @@ re = pcre2_compile(
|
|||
|
||||
if (re == NULL)
|
||||
{
|
||||
PCRE2_UCHAR buffer[256];
|
||||
PCRE2_UCHAR buffer[256];
|
||||
pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
|
||||
printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset,
|
||||
printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset,
|
||||
buffer);
|
||||
return 1;
|
||||
}
|
||||
|
@ -180,7 +180,7 @@ if (rc < 0)
|
|||
return 1;
|
||||
}
|
||||
|
||||
/* Match succeded. Get a pointer to the output vector, where string offsets are
|
||||
/* Match succeded. Get a pointer to the output vector, where string offsets are
|
||||
stored. */
|
||||
|
||||
ovector = pcre2_get_ovector_pointer(match_data);
|
||||
|
@ -193,7 +193,7 @@ printf("\nMatch succeeded at offset %d\n", (int)ovector[0]);
|
|||
* captured. *
|
||||
*************************************************************************/
|
||||
|
||||
/* The output vector wasn't big enough. This should not happen, because we used
|
||||
/* The output vector wasn't big enough. This should not happen, because we used
|
||||
pcre2_match_data_create_from_pattern() above. */
|
||||
|
||||
if (rc == 0)
|
||||
|
@ -244,7 +244,7 @@ if (namecount <= 0) printf("No named substrings\n"); else
|
|||
&name_entry_size); /* where to put the answer */
|
||||
|
||||
/* Now we can scan the table and, for each entry, print the number, the name,
|
||||
and the substring itself. In the 8-bit library the number is held in two
|
||||
and the substring itself. In the 8-bit library the number is held in two
|
||||
bytes, most significant first. */
|
||||
|
||||
tabptr = name_table;
|
||||
|
@ -289,7 +289,7 @@ if (namecount <= 0) printf("No named substrings\n"); else
|
|||
|
||||
if (!find_all) /* Check for -g */
|
||||
{
|
||||
pcre2_match_data_free(match_data); /* Release the memory that was used */
|
||||
pcre2_match_data_free(match_data); /* Release the memory that was used */
|
||||
pcre2_code_free(re); /* for the match data and the pattern. */
|
||||
return 0; /* Exit the program. */
|
||||
}
|
||||
|
@ -307,7 +307,7 @@ sequence. */
|
|||
(void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &newline);
|
||||
crlf_is_newline = newline == PCRE2_NEWLINE_ANY ||
|
||||
newline == PCRE2_NEWLINE_CRLF ||
|
||||
newline == PCRE2_NEWLINE_ANYCRLF;
|
||||
newline == PCRE2_NEWLINE_ANYCRLF;
|
||||
|
||||
/* Loop for second and subsequent matches */
|
||||
|
||||
|
|
|
@ -450,7 +450,7 @@ pcre2grep_exit(int rc)
|
|||
if (resource_error)
|
||||
{
|
||||
fprintf(stderr, "pcre2grep: Error %d, %d or %d means that a resource limit "
|
||||
"was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
|
||||
"was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
|
||||
PCRE2_ERROR_RECURSIONLIMIT);
|
||||
fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n");
|
||||
}
|
||||
|
@ -485,7 +485,7 @@ if (strlen(s) > MAXPATLEN)
|
|||
{
|
||||
fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n",
|
||||
MAXPATLEN);
|
||||
free(p);
|
||||
free(p);
|
||||
return NULL;
|
||||
}
|
||||
p->next = NULL;
|
||||
|
@ -2381,7 +2381,7 @@ switch(letter)
|
|||
unsigned char buffer[128];
|
||||
(void)pcre2_config(PCRE2_CONFIG_VERSION, buffer);
|
||||
fprintf(stdout, "pcre2grep version %s\n", buffer);
|
||||
}
|
||||
}
|
||||
pcre2grep_exit(0);
|
||||
break;
|
||||
|
||||
|
@ -2472,7 +2472,7 @@ if ((popts & PO_FIXED_STRINGS) != 0)
|
|||
}
|
||||
|
||||
sprintf((char *)buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
|
||||
p->compiled = pcre2_compile(buffer, -1, options, &errcode, &erroffset,
|
||||
p->compiled = pcre2_compile(buffer, -1, options, &errcode, &erroffset,
|
||||
compile_context);
|
||||
if (p->compiled != NULL) return TRUE;
|
||||
|
||||
|
@ -2555,11 +2555,11 @@ while (fgets(buffer, PATBUFSIZE, f) != NULL)
|
|||
afterwards, as a precaution against any later code trying to use it. */
|
||||
|
||||
*patlastptr = add_pattern(buffer, *patlastptr);
|
||||
if (*patlastptr == NULL)
|
||||
if (*patlastptr == NULL)
|
||||
{
|
||||
if (f != stdin) fclose(f);
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
if (*patptr == NULL) *patptr = *patlastptr;
|
||||
|
||||
/* This loop is needed because compiling a "pattern" when -F is set may add
|
||||
|
@ -2571,10 +2571,10 @@ while (fgets(buffer, PATBUFSIZE, f) != NULL)
|
|||
{
|
||||
if (!compile_pattern(*patlastptr, pcre2_options, popts, TRUE, filename,
|
||||
linenumber))
|
||||
{
|
||||
{
|
||||
if (f != stdin) fclose(f);
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
(*patlastptr)->string = NULL; /* Insurance */
|
||||
if ((*patlastptr)->next == NULL) break;
|
||||
*patlastptr = (*patlastptr)->next;
|
||||
|
@ -2622,7 +2622,7 @@ for (i = 1; i < argc; i++)
|
|||
char *option_data = (char *)""; /* default to keep compiler happy */
|
||||
BOOL longop;
|
||||
BOOL longopwasequals = FALSE;
|
||||
|
||||
|
||||
if (argv[i][0] != '-') break;
|
||||
|
||||
/* If we hit an argument that is just "-", it may be a reference to STDIN,
|
||||
|
@ -2925,7 +2925,7 @@ for (i = 1; i < argc; i++)
|
|||
else *((int *)op->dataptr) = n;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Options have been decoded. If -C was used, its value is used as a default
|
||||
for -A and -B. */
|
||||
|
||||
|
@ -2946,15 +2946,15 @@ if ((only_matching != NULL && (file_offsets || line_offsets)) ||
|
|||
"and/or --line-offsets\n");
|
||||
pcre2grep_exit(usage(2));
|
||||
}
|
||||
|
||||
|
||||
/* Put limits into the match data block. */
|
||||
|
||||
if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
|
||||
if (recursion_limit > 0) pcre2_set_recursion_limit(match_context, recursion_limit);
|
||||
if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
|
||||
if (recursion_limit > 0) pcre2_set_recursion_limit(match_context, recursion_limit);
|
||||
|
||||
if (only_matching != NULL || file_offsets || line_offsets)
|
||||
show_only_matching = TRUE;
|
||||
|
||||
|
||||
/* If a locale has not been provided as an option, see if the LC_CTYPE or
|
||||
LC_ALL environment variable is set, and if so, use it. */
|
||||
|
||||
|
@ -2980,7 +2980,7 @@ if (locale != NULL)
|
|||
locale, locale_from);
|
||||
goto EXIT2;
|
||||
}
|
||||
pcre2_set_character_tables(compile_context, pcre2_maketables(NULL));
|
||||
pcre2_set_character_tables(compile_context, pcre2_maketables(NULL));
|
||||
}
|
||||
|
||||
/* Sort out colouring */
|
||||
|
@ -3007,27 +3007,27 @@ if (colour_option != NULL && strcmp(colour_option, "never") != 0)
|
|||
|
||||
if (newline_arg != NULL)
|
||||
{
|
||||
for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *));
|
||||
for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *));
|
||||
endlinetype++)
|
||||
{
|
||||
if (strcmpic(newline_arg, newlines[endlinetype]) == 0) break;
|
||||
}
|
||||
if (endlinetype < (int)(sizeof(newlines)/sizeof(char *)))
|
||||
pcre2_set_newline(compile_context, endlinetype);
|
||||
else
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n",
|
||||
fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n",
|
||||
newline_arg);
|
||||
goto EXIT2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Find default newline convention */
|
||||
|
||||
|
||||
/* Find default newline convention */
|
||||
|
||||
else
|
||||
{
|
||||
(void)pcre2_config(PCRE2_CONFIG_NEWLINE, &endlinetype);
|
||||
}
|
||||
}
|
||||
|
||||
/* Interpret the text values for -d and -D */
|
||||
|
||||
|
|
|
@ -68,7 +68,7 @@ already set. */
|
|||
#include "pcre2_internal.h"
|
||||
#include "pcre2posix.h"
|
||||
|
||||
/* Table to translate PCRE2 compile time error codes into POSIX error codes.
|
||||
/* Table to translate PCRE2 compile time error codes into POSIX error codes.
|
||||
Only a few PCRE2 errors with a value greater than 23 turn into special POSIX
|
||||
codes: most go to REG_BADPAT. The second table lists, in pairs, those that
|
||||
don't. */
|
||||
|
@ -89,7 +89,7 @@ static const int eint1[] = {
|
|||
REG_ASSERT, /* internal error: unexpected repeat */
|
||||
REG_BADPAT, /* unrecognized character after (? or (?- */
|
||||
REG_BADPAT, /* POSIX named classes are supported only within a class */
|
||||
REG_BADPAT, /* POSIX collating elements are not supported */
|
||||
REG_BADPAT, /* POSIX collating elements are not supported */
|
||||
REG_EPAREN, /* missing ) */
|
||||
/* 15 */
|
||||
REG_ESUBREG, /* reference to non-existent subpattern */
|
||||
|
@ -103,7 +103,7 @@ static const int eint1[] = {
|
|||
REG_EPAREN, /* unmatched closing parenthesis */
|
||||
REG_ASSERT /* internal error: code overflow */
|
||||
};
|
||||
|
||||
|
||||
static const int eint2[] = {
|
||||
30, REG_ECTYPE, /* unknown POSIX class name */
|
||||
32, REG_INVARG, /* this version of PCRE does not have UTF or UCP support */
|
||||
|
@ -216,14 +216,14 @@ if ((cflags & REG_UTF) != 0) options |= PCRE2_UTF;
|
|||
if ((cflags & REG_UCP) != 0) options |= PCRE2_UCP;
|
||||
if ((cflags & REG_UNGREEDY) != 0) options |= PCRE2_UNGREEDY;
|
||||
|
||||
preg->re_pcre2_code = pcre2_compile((PCRE2_SPTR)pattern, -1, options,
|
||||
preg->re_pcre2_code = pcre2_compile((PCRE2_SPTR)pattern, -1, options,
|
||||
&errorcode, &erroffset, NULL);
|
||||
preg->re_erroffset = erroffset;
|
||||
|
||||
if (preg->re_pcre2_code == NULL)
|
||||
{
|
||||
unsigned int i;
|
||||
if (errorcode < 0) return REG_BADPAT; /* UTF error */
|
||||
unsigned int i;
|
||||
if (errorcode < 0) return REG_BADPAT; /* UTF error */
|
||||
errorcode -= COMPILE_ERROR_BASE;
|
||||
if (errorcode < (int)(sizeof(eint1)/sizeof(const int)))
|
||||
return eint1[errorcode];
|
||||
|
@ -232,7 +232,7 @@ if (preg->re_pcre2_code == NULL)
|
|||
return REG_BADPAT;
|
||||
}
|
||||
|
||||
(void)pcre2_pattern_info((const pcre2_code *)preg->re_pcre2_code,
|
||||
(void)pcre2_pattern_info((const pcre2_code *)preg->re_pcre2_code,
|
||||
PCRE2_INFO_CAPTURECOUNT, &re_nsub);
|
||||
preg->re_nsub = (size_t)re_nsub;
|
||||
if ((options & PCRE2_NO_AUTO_CAPTURE) != 0) re_nsub = -1;
|
||||
|
@ -288,7 +288,7 @@ else
|
|||
eo = (int)strlen(string);
|
||||
}
|
||||
|
||||
rc = pcre2_match((const pcre2_code *)preg->re_pcre2_code,
|
||||
rc = pcre2_match((const pcre2_code *)preg->re_pcre2_code,
|
||||
(PCRE2_SPTR)string + so, (eo - so), 0, options, md, NULL);
|
||||
|
||||
/* Successful match */
|
||||
|
|
|
@ -95,7 +95,7 @@ enum {
|
|||
|
||||
typedef struct {
|
||||
void *re_pcre2_code;
|
||||
void *re_match_data;
|
||||
void *re_match_data;
|
||||
size_t re_nsub;
|
||||
size_t re_erroffset;
|
||||
} regex_t;
|
||||
|
|
|
@ -4797,9 +4797,9 @@ for (gmatched = 0;; gmatched++)
|
|||
PCRE2_SIZE length;
|
||||
uint32_t copybuffer[256];
|
||||
int namelen = strlen((const char *)nptr);
|
||||
#if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
|
||||
#if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
|
||||
PCRE2_SIZE cnl = namelen;
|
||||
#endif
|
||||
#endif
|
||||
if (namelen == 0) break;
|
||||
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
|
@ -4864,9 +4864,9 @@ for (gmatched = 0;; gmatched++)
|
|||
void *gotbuffer;
|
||||
int rc;
|
||||
int namelen = strlen((const char *)nptr);
|
||||
#if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
|
||||
#if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
|
||||
PCRE2_SIZE cnl = namelen;
|
||||
#endif
|
||||
#endif
|
||||
if (namelen == 0) break;
|
||||
|
||||
#ifdef SUPPORT_PCRE2_8
|
||||
|
@ -5389,25 +5389,25 @@ if (PO(options) != DO(options) || PO(control) != DO(control))
|
|||
return 1;
|
||||
}
|
||||
|
||||
/* Get the PCRE2 and Unicode version number and JIT target information, at the
|
||||
same time checking that a request for the length gives the same answer. Also
|
||||
/* Get the PCRE2 and Unicode version number and JIT target information, at the
|
||||
same time checking that a request for the length gives the same answer. Also
|
||||
check lengths for non-string items. */
|
||||
|
||||
if (PCRE2_CONFIG(PCRE2_CONFIG_VERSION, NULL) !=
|
||||
if (PCRE2_CONFIG(PCRE2_CONFIG_VERSION, NULL) !=
|
||||
PCRE2_CONFIG(PCRE2_CONFIG_VERSION, version) ||
|
||||
|
||||
|
||||
PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, NULL) !=
|
||||
PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, uversion) ||
|
||||
|
||||
|
||||
PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, NULL) !=
|
||||
PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, jittarget) ||
|
||||
|
||||
|
||||
PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, NULL) != sizeof(int) ||
|
||||
PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, NULL) != sizeof(long int))
|
||||
PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, NULL) != sizeof(long int))
|
||||
{
|
||||
fprintf(stderr, "** Error in pcre2_config(): bad length\n");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Get buffers from malloc() so that valgrind will check their misuse when
|
||||
debugging. They grow automatically when very long lines are read. The 16-
|
||||
|
|
Loading…
Reference in New Issue