Tidy a lot of files (remove trailing spaces)
This commit is contained in:
parent
4352f00bb9
commit
c3799e750f
|
@ -382,21 +382,21 @@ SET(PCRE2_SOURCES
|
||||||
${PROJECT_BINARY_DIR}/pcre2_chartables.c
|
${PROJECT_BINARY_DIR}/pcre2_chartables.c
|
||||||
src/pcre2_compile.c
|
src/pcre2_compile.c
|
||||||
src/pcre2_config.c
|
src/pcre2_config.c
|
||||||
src/pcre2_context.c
|
src/pcre2_context.c
|
||||||
src/pcre2_dfa_match.c
|
src/pcre2_dfa_match.c
|
||||||
src/pcre2_error.c
|
src/pcre2_error.c
|
||||||
src/pcre2_jit_compile.c
|
src/pcre2_jit_compile.c
|
||||||
src/pcre2_jit_match.c
|
src/pcre2_jit_match.c
|
||||||
src/pcre2_jit_misc.c
|
src/pcre2_jit_misc.c
|
||||||
src/pcre2_maketables.c
|
src/pcre2_maketables.c
|
||||||
src/pcre2_match.c
|
src/pcre2_match.c
|
||||||
src/pcre2_match_data.c
|
src/pcre2_match_data.c
|
||||||
src/pcre2_newline.c
|
src/pcre2_newline.c
|
||||||
src/pcre2_ord2utf.c
|
src/pcre2_ord2utf.c
|
||||||
src/pcre2_pattern_info.c
|
src/pcre2_pattern_info.c
|
||||||
src/pcre2_string_utils.c
|
src/pcre2_string_utils.c
|
||||||
src/pcre2_study.c
|
src/pcre2_study.c
|
||||||
src/pcre2_substring.c
|
src/pcre2_substring.c
|
||||||
src/pcre2_tables.c
|
src/pcre2_tables.c
|
||||||
src/pcre2_ucd.c
|
src/pcre2_ucd.c
|
||||||
src/pcre2_valid_utf.c
|
src/pcre2_valid_utf.c
|
||||||
|
@ -462,11 +462,11 @@ SET(targets)
|
||||||
IF(PCRE2_BUILD_PCRE2_8)
|
IF(PCRE2_BUILD_PCRE2_8)
|
||||||
ADD_LIBRARY(pcre2-8 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
ADD_LIBRARY(pcre2-8 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||||
SET_PROPERTY(TARGET pcre2-8
|
SET_PROPERTY(TARGET pcre2-8
|
||||||
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
|
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
|
||||||
SET(targets ${targets} pcre2-8)
|
SET(targets ${targets} pcre2-8)
|
||||||
ADD_LIBRARY(pcre2posix ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES})
|
ADD_LIBRARY(pcre2posix ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES})
|
||||||
SET_PROPERTY(TARGET pcre2posix
|
SET_PROPERTY(TARGET pcre2posix
|
||||||
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
|
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
|
||||||
SET(targets ${targets} pcre2posix)
|
SET(targets ${targets} pcre2posix)
|
||||||
TARGET_LINK_LIBRARIES(pcre2posix pcre2-8)
|
TARGET_LINK_LIBRARIES(pcre2posix pcre2-8)
|
||||||
|
|
||||||
|
@ -503,7 +503,7 @@ ENDIF(PCRE2_BUILD_PCRE2_16)
|
||||||
IF(PCRE2_BUILD_PCRE2_32)
|
IF(PCRE2_BUILD_PCRE2_32)
|
||||||
ADD_LIBRARY(pcre2-32 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
ADD_LIBRARY(pcre2-32 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
|
||||||
SET_PROPERTY(TARGET pcre2-32
|
SET_PROPERTY(TARGET pcre2-32
|
||||||
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32)
|
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32)
|
||||||
SET(targets ${targets} pcre2-32)
|
SET(targets ${targets} pcre2-32)
|
||||||
|
|
||||||
IF(MINGW AND NOT PCRE2_STATIC)
|
IF(MINGW AND NOT PCRE2_STATIC)
|
||||||
|
@ -521,7 +521,7 @@ ENDIF(PCRE2_BUILD_PCRE2_32)
|
||||||
IF(PCRE2_BUILD_PCRE2GREP)
|
IF(PCRE2_BUILD_PCRE2GREP)
|
||||||
ADD_EXECUTABLE(pcre2grep src/pcre2grep.c)
|
ADD_EXECUTABLE(pcre2grep src/pcre2grep.c)
|
||||||
SET_PROPERTY(TARGET pcre2grep
|
SET_PROPERTY(TARGET pcre2grep
|
||||||
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
|
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
|
||||||
SET(targets ${targets} pcre2grep)
|
SET(targets ${targets} pcre2grep)
|
||||||
TARGET_LINK_LIBRARIES(pcre2grep pcre2posix ${PCRE2GREP_LIBS})
|
TARGET_LINK_LIBRARIES(pcre2grep pcre2posix ${PCRE2GREP_LIBS})
|
||||||
ENDIF(PCRE2_BUILD_PCRE2GREP)
|
ENDIF(PCRE2_BUILD_PCRE2GREP)
|
||||||
|
|
38
ChangeLog
38
ChangeLog
|
@ -5,41 +5,41 @@ Version 10.0 xx-xxxx-2014
|
||||||
-------------------------
|
-------------------------
|
||||||
|
|
||||||
Version 10.0 is the first release of PCRE2, a revised API for the PCRE library.
|
Version 10.0 is the first release of PCRE2, a revised API for the PCRE library.
|
||||||
Changes prior to 10.0 are logged in the ChangeLog file for the old API, up to
|
Changes prior to 10.0 are logged in the ChangeLog file for the old API, up to
|
||||||
item 20 for release 8.36.
|
item 20 for release 8.36.
|
||||||
|
|
||||||
The code of the library was heavily revised as part of the new API
|
The code of the library was heavily revised as part of the new API
|
||||||
implementation. Details of each and every modification were not individually
|
implementation. Details of each and every modification were not individually
|
||||||
logged. In addition to the API changes, the following changes were made. They
|
logged. In addition to the API changes, the following changes were made. They
|
||||||
are either new functionality, or bug fixes and other noticeable changes of
|
are either new functionality, or bug fixes and other noticeable changes of
|
||||||
behaviour that were implemented after the code had been forked.
|
behaviour that were implemented after the code had been forked.
|
||||||
|
|
||||||
1. The test program, now called pcre2test, was re-specified and almost
|
1. The test program, now called pcre2test, was re-specified and almost
|
||||||
completely re-written. Its input is not compatible with input for pcretest.
|
completely re-written. Its input is not compatible with input for pcretest.
|
||||||
|
|
||||||
2. Patterns may start with (*NOTEMPTY) or (*NOTEMPTY_ATSTART) to set the
|
2. Patterns may start with (*NOTEMPTY) or (*NOTEMPTY_ATSTART) to set the
|
||||||
PCRE2_NOTEMPTY or PCRE2_NOTEMPTY_ATSTART options for every subject line that is
|
PCRE2_NOTEMPTY or PCRE2_NOTEMPTY_ATSTART options for every subject line that is
|
||||||
matched by that pattern.
|
matched by that pattern.
|
||||||
|
|
||||||
3. For the benefit of those who use PCRE2 via some other application, that is,
|
3. For the benefit of those who use PCRE2 via some other application, that is,
|
||||||
not writing the function calls themselves, it is possible to check the PCRE2
|
not writing the function calls themselves, it is possible to check the PCRE2
|
||||||
version by matching a pattern such as /(?(VERSION>=10.0)yes|no)/ against a
|
version by matching a pattern such as /(?(VERSION>=10.0)yes|no)/ against a
|
||||||
string such as "yesno".
|
string such as "yesno".
|
||||||
|
|
||||||
4. There are case-equivalent Unicode characters whose encodings use different
|
4. There are case-equivalent Unicode characters whose encodings use different
|
||||||
numbers of code units in UTF-8. U+023A and U+2C65 are one example. (It is
|
numbers of code units in UTF-8. U+023A and U+2C65 are one example. (It is
|
||||||
theoretically possible for this to happen in UTF-16 too.) If a backreference to
|
theoretically possible for this to happen in UTF-16 too.) If a backreference to
|
||||||
a group containing one of these characters was greedily repeated, and during
|
a group containing one of these characters was greedily repeated, and during
|
||||||
the match a backtrack occurred, the subject might be backtracked by the wrong
|
the match a backtrack occurred, the subject might be backtracked by the wrong
|
||||||
number of code units. For example, if /^(\x{23a})\1*(.)/ is matched caselessly
|
number of code units. For example, if /^(\x{23a})\1*(.)/ is matched caselessly
|
||||||
(and in UTF-8 mode) against "\x{23a}\x{2c65}\x{2c65}\x{2c65}", group 2 should
|
(and in UTF-8 mode) against "\x{23a}\x{2c65}\x{2c65}\x{2c65}", group 2 should
|
||||||
capture the final character, which is the three bytes E2, B1, and A5 in UTF-8.
|
capture the final character, which is the three bytes E2, B1, and A5 in UTF-8.
|
||||||
Incorrect backtracking meant that group 2 captured only the last two bytes.
|
Incorrect backtracking meant that group 2 captured only the last two bytes.
|
||||||
This bug has been fixed; the new code is slower, but it is used only when the
|
This bug has been fixed; the new code is slower, but it is used only when the
|
||||||
strings matched by the repetition are not all the same length.
|
strings matched by the repetition are not all the same length.
|
||||||
|
|
||||||
5. A pattern such as /()a/ was not setting the "first character must be 'a'"
|
5. A pattern such as /()a/ was not setting the "first character must be 'a'"
|
||||||
information. This applied to any pattern with a group that matched no
|
information. This applied to any pattern with a group that matched no
|
||||||
characters, for example: /(?:(?=.)|(?<!x))a/.
|
characters, for example: /(?:(?=.)|(?<!x))a/.
|
||||||
|
|
||||||
****
|
****
|
||||||
|
|
2
NEWS
2
NEWS
|
@ -5,7 +5,7 @@ Version 10.0 xx-xxxx-2014
|
||||||
-------------------------
|
-------------------------
|
||||||
|
|
||||||
Version 10.0 is the first release of PCRE2, a revised API for the PCRE library.
|
Version 10.0 is the first release of PCRE2, a revised API for the PCRE library.
|
||||||
Changes prior to 10.0 are logged in the ChangeLog file for the old API, up to
|
Changes prior to 10.0 are logged in the ChangeLog file for the old API, up to
|
||||||
item 20 for release 8.36.
|
item 20 for release 8.36.
|
||||||
|
|
||||||
****
|
****
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
Building PCRE2 without using autotools
|
Building PCRE2 without using autotools
|
||||||
--------------------------------------
|
--------------------------------------
|
||||||
|
|
||||||
This document has been converted from the PCRE1 document, but is not yet
|
This document has been converted from the PCRE1 document, but is not yet
|
||||||
complete. I have removed a number of quite old sections about building in
|
complete. I have removed a number of quite old sections about building in
|
||||||
various environments, as they applied only to PCRE1 and are probably out of
|
various environments, as they applied only to PCRE1 and are probably out of
|
||||||
date.
|
date.
|
||||||
|
|
||||||
|
|
||||||
|
@ -57,7 +57,7 @@ can skip ahead to the CMake section.
|
||||||
environment. In particular, you can alter the definition of the NEWLINE
|
environment. In particular, you can alter the definition of the NEWLINE
|
||||||
macro to specify what character(s) you want to be interpreted as line
|
macro to specify what character(s) you want to be interpreted as line
|
||||||
terminators.
|
terminators.
|
||||||
|
|
||||||
When you compile any of the PCRE2 modules, you must specify
|
When you compile any of the PCRE2 modules, you must specify
|
||||||
-DHAVE_CONFIG_H to your compiler so that src/config.h is included in the
|
-DHAVE_CONFIG_H to your compiler so that src/config.h is included in the
|
||||||
sources.
|
sources.
|
||||||
|
@ -100,7 +100,7 @@ can skip ahead to the CMake section.
|
||||||
pcre2_chartables.c
|
pcre2_chartables.c
|
||||||
pcre2_compile.c
|
pcre2_compile.c
|
||||||
pcre2_config.c
|
pcre2_config.c
|
||||||
pcre2_context.c
|
pcre2_context.c
|
||||||
pcre2_dfa_match.c
|
pcre2_dfa_match.c
|
||||||
pcre2_error.c
|
pcre2_error.c
|
||||||
pcre2_jit_compile.c
|
pcre2_jit_compile.c
|
||||||
|
@ -114,7 +114,7 @@ can skip ahead to the CMake section.
|
||||||
pcre2_pattern_info.c
|
pcre2_pattern_info.c
|
||||||
pcre2_string_utils.c
|
pcre2_string_utils.c
|
||||||
pcre2_study.c
|
pcre2_study.c
|
||||||
pcre2_substring.c
|
pcre2_substring.c
|
||||||
pcre2_tables.c
|
pcre2_tables.c
|
||||||
pcre2_ucd.c
|
pcre2_ucd.c
|
||||||
pcre2_valid_utf.c
|
pcre2_valid_utf.c
|
||||||
|
@ -138,8 +138,8 @@ can skip ahead to the CMake section.
|
||||||
|
|
||||||
(6) If you want to build a 16-bit library or 32-bit library (as well as, or
|
(6) If you want to build a 16-bit library or 32-bit library (as well as, or
|
||||||
instead of the 8-bit library) just supply 16 or 32 as the value of
|
instead of the 8-bit library) just supply 16 or 32 as the value of
|
||||||
-DPCRE2_CODE_UNIT_WIDTH when you are compiling.
|
-DPCRE2_CODE_UNIT_WIDTH when you are compiling.
|
||||||
|
|
||||||
(7) If you want to build the POSIX wrapper functions (which apply only to the
|
(7) If you want to build the POSIX wrapper functions (which apply only to the
|
||||||
8-bit library), ensure that you have the pcre2posix.h file and then
|
8-bit library), ensure that you have the pcre2posix.h file and then
|
||||||
compile pcre2posix.c. Link the result (on its own) as the pcre2posix
|
compile pcre2posix.c. Link the result (on its own) as the pcre2posix
|
||||||
|
@ -295,7 +295,7 @@ Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no
|
||||||
spaces in the names for your CMake installation and your PCRE2 source and build
|
spaces in the names for your CMake installation and your PCRE2 source and build
|
||||||
directories.
|
directories.
|
||||||
|
|
||||||
The following instructions were contributed by a PCRE1 user, but they should
|
The following instructions were contributed by a PCRE1 user, but they should
|
||||||
also work for PCRE2. If they are not followed exactly, errors may occur. In the
|
also work for PCRE2. If they are not followed exactly, errors may occur. In the
|
||||||
event that errors do occur, it is recommended that you delete the CMake cache
|
event that errors do occur, it is recommended that you delete the CMake cache
|
||||||
before attempting to repeat the CMake build process. In the CMake GUI, the
|
before attempting to repeat the CMake build process. In the CMake GUI, the
|
||||||
|
@ -394,9 +394,9 @@ required. For details, please see this web site:
|
||||||
There is also a mirror here:
|
There is also a mirror here:
|
||||||
|
|
||||||
http://www.vsoft-software.com/downloads.html
|
http://www.vsoft-software.com/downloads.html
|
||||||
|
|
||||||
The site currently has ports for PCRE1 releases, but PCRE2 should follow in due
|
The site currently has ports for PCRE1 releases, but PCRE2 should follow in due
|
||||||
course.
|
course.
|
||||||
|
|
||||||
==========================
|
==========================
|
||||||
Last Updated: 28 September 2014
|
Last Updated: 28 September 2014
|
||||||
|
|
|
@ -27,7 +27,7 @@
|
||||||
|
|
||||||
# README & NON-AUTOTOOLS-BUILD
|
# README & NON-AUTOTOOLS-BUILD
|
||||||
# These files are copied into the doc/html directory, with .txt
|
# These files are copied into the doc/html directory, with .txt
|
||||||
# extensions so that they can by hyperlinked from the HTML
|
# extensions so that they can by hyperlinked from the HTML
|
||||||
# documentation, because some people just go to the HTML without
|
# documentation, because some people just go to the HTML without
|
||||||
# looking for text files.
|
# looking for text files.
|
||||||
|
|
||||||
|
@ -71,7 +71,7 @@ for file in pcre2 pcre2api pcre2build pcre2callout pcre2compat pcre2jit \
|
||||||
# pcre2syntax \
|
# pcre2syntax \
|
||||||
# pcre2precompile pcre2perform pcre2posix pcre2sample \
|
# pcre2precompile pcre2perform pcre2posix pcre2sample \
|
||||||
# pcre2stack ; do
|
# pcre2stack ; do
|
||||||
|
|
||||||
echo " Processing $file.3"
|
echo " Processing $file.3"
|
||||||
nroff -c -man $file.3 >$file.rawtxt
|
nroff -c -man $file.3 >$file.rawtxt
|
||||||
perl ../CleanTxt <$file.rawtxt >>pcre2.txt
|
perl ../CleanTxt <$file.rawtxt >>pcre2.txt
|
||||||
|
@ -168,17 +168,13 @@ cd ..
|
||||||
echo Documentation done
|
echo Documentation done
|
||||||
if [ "$1" = "doc" ] ; then exit; fi
|
if [ "$1" = "doc" ] ; then exit; fi
|
||||||
|
|
||||||
# FIXME pro tem only do docs
|
|
||||||
exit
|
|
||||||
|
|
||||||
# These files are detrailed; do not detrail the test data because there may be
|
# These files are detrailed; do not detrail the test data because there may be
|
||||||
# significant trailing spaces. Do not detrail RunTest.bat, because it has CRLF
|
# significant trailing spaces. Do not detrail RunTest.bat, because it has CRLF
|
||||||
# line endings and the detrail script removes all trailing white space. The
|
# line endings and the detrail script removes all trailing white space. The
|
||||||
# configure files are also omitted from the detrailing.
|
# configure files are also omitted from the detrailing.
|
||||||
|
|
||||||
files="\
|
files="\
|
||||||
Makefile.am \
|
Makefile.am \
|
||||||
Makefile.in \
|
|
||||||
configure.ac \
|
configure.ac \
|
||||||
README \
|
README \
|
||||||
LICENCE \
|
LICENCE \
|
||||||
|
@ -195,54 +191,45 @@ files="\
|
||||||
RunGrepTest \
|
RunGrepTest \
|
||||||
RunTest \
|
RunTest \
|
||||||
pcre2-config.in \
|
pcre2-config.in \
|
||||||
libpcre.pc.in \
|
|
||||||
libpcre16.pc.in \
|
|
||||||
libpcre32.pc.in \
|
|
||||||
libpcreposix.pc.in \
|
|
||||||
libpcrecpp.pc.in \
|
|
||||||
config.h.in \
|
|
||||||
pcre2_chartables.c.dist \
|
|
||||||
pcre2demo.c \
|
|
||||||
pcre2grep.c \
|
|
||||||
pcre2test.c \
|
|
||||||
dftables.c \
|
|
||||||
pcre2posix.c \
|
|
||||||
pcre2posix.h \
|
|
||||||
pcre2.h.in \
|
|
||||||
pcre2_internal.h \
|
|
||||||
pcre2_byte_order.c \
|
|
||||||
pcre2_compile.c \
|
|
||||||
pcre2_config.c \
|
|
||||||
pcre2_dfa_exec.c \
|
|
||||||
pcre2_exec.c \
|
|
||||||
pcre2_fullinfo.c \
|
|
||||||
pcre2_get.c \
|
|
||||||
pcre2_globals.c \
|
|
||||||
pcre2_jit_compile.c \
|
|
||||||
pcre2_jit_test.c \
|
|
||||||
pcre2_maketables.c \
|
|
||||||
pcre2_newline.c \
|
|
||||||
pcre2_ord2utf8.c \
|
|
||||||
pcre16_ord2utf16.c \
|
|
||||||
pcre32_ord2utf32.c \
|
|
||||||
pcre2_printint.c \
|
|
||||||
pcre2_refcount.c \
|
|
||||||
pcre2_string_utils.c \
|
|
||||||
pcre2_study.c \
|
|
||||||
pcre2_tables.c \
|
|
||||||
pcre2_valid_utf8.c \
|
|
||||||
pcre2_version.c \
|
|
||||||
pcre2_xclass.c \
|
|
||||||
pcre16_utf16_utils.c \
|
|
||||||
pcre32_utf32_utils.c \
|
|
||||||
pcre16_valid_utf16.c \
|
|
||||||
pcre32_valid_utf32.c \
|
|
||||||
perltest.pl \
|
perltest.pl \
|
||||||
ucp.h \
|
libpcre2-8.pc.in \
|
||||||
makevp.bat \
|
libpcre2-16.pc.in \
|
||||||
pcre.def \
|
libpcre2-32.pc.in \
|
||||||
libpcre.def \
|
libpcre2-posix.pc.in \
|
||||||
libpcreposix.def"
|
src/dftables.c \
|
||||||
|
src/pcre2.h.in \
|
||||||
|
src/pcre2_auto_possess.c \
|
||||||
|
src/pcre2_compile.c \
|
||||||
|
src/pcre2_config.c \
|
||||||
|
src/pcre2_context.c \
|
||||||
|
src/pcre2_dfa_match.c \
|
||||||
|
src/pcre2_error.c \
|
||||||
|
src/pcre2_internal.h \
|
||||||
|
src/pcre2_intmodedep.h \
|
||||||
|
src/pcre2_jit_compile.c \
|
||||||
|
src/pcre2_jit_match.c \
|
||||||
|
src/pcre2_jit_misc.c \
|
||||||
|
src/pcre2_jit_test.c \
|
||||||
|
src/pcre2_maketables.c \
|
||||||
|
src/pcre2_match.c \
|
||||||
|
src/pcre2_match_data.c \
|
||||||
|
src/pcre2_newline.c \
|
||||||
|
src/pcre2_ord2utf.c \
|
||||||
|
src/pcre2_pattern_info.c \
|
||||||
|
src/pcre2_printint.c \
|
||||||
|
src/pcre2_string_utils.c \
|
||||||
|
src/pcre2_study.c \
|
||||||
|
src/pcre2_substring.c \
|
||||||
|
src/pcre2_tables.c \
|
||||||
|
src/pcre2_ucd.c \
|
||||||
|
src/pcre2_ucp.h \
|
||||||
|
src/pcre2_valid_utf.c \
|
||||||
|
src/pcre2_xclass.c \
|
||||||
|
src/pcre2demo.c \
|
||||||
|
src/pcre2grep.c \
|
||||||
|
src/pcre2posix.c \
|
||||||
|
src/pcre2posix.h \
|
||||||
|
src/pcre2test.c"
|
||||||
|
|
||||||
echo Detrailing
|
echo Detrailing
|
||||||
perl ./Detrail $files doc/p* doc/html/*
|
perl ./Detrail $files doc/p* doc/html/*
|
||||||
|
|
46
README
46
README
|
@ -1,7 +1,7 @@
|
||||||
README file for PCRE2 (Perl-compatible regular expression library)
|
README file for PCRE2 (Perl-compatible regular expression library)
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
|
|
||||||
PCRE2 is a re-implementation of the original PCRE library with an entirely new
|
PCRE2 is a re-implementation of the original PCRE library with an entirely new
|
||||||
API. The latest release of PCRE2 is always available in three alternative
|
API. The latest release of PCRE2 is always available in three alternative
|
||||||
formats from:
|
formats from:
|
||||||
|
|
||||||
|
@ -11,7 +11,7 @@ FIXME: THIS WILL NOT BE THE CASE UNTIL THERE IS A FORMAL RELEASE.
|
||||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.tar.bz2
|
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.tar.bz2
|
||||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.zip
|
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.zip
|
||||||
|
|
||||||
There is a mailing list for discussion about the development of PCRE (both the
|
There is a mailing list for discussion about the development of PCRE (both the
|
||||||
original and new APIs) at pcre-dev@exim.org. You can access the archives and
|
original and new APIs) at pcre-dev@exim.org. You can access the archives and
|
||||||
subscribe or manage your subscription here:
|
subscribe or manage your subscription here:
|
||||||
|
|
||||||
|
@ -41,7 +41,7 @@ The PCRE2 APIs
|
||||||
PCRE2 is written in C, and it has its own API. There are three sets of
|
PCRE2 is written in C, and it has its own API. There are three sets of
|
||||||
functions, one for the 8-bit library, which processes strings of bytes, one for
|
functions, one for the 8-bit library, which processes strings of bytes, one for
|
||||||
the 16-bit library, which processes strings of 16-bit values, and one for the
|
the 16-bit library, which processes strings of 16-bit values, and one for the
|
||||||
32-bit library, which processes strings of 32-bit values. As this is a new API,
|
32-bit library, which processes strings of 32-bit values. As this is a new API,
|
||||||
there as yet no C++ wrappers.
|
there as yet no C++ wrappers.
|
||||||
|
|
||||||
The distribution does contain a set of C wrapper functions for the 8-bit
|
The distribution does contain a set of C wrapper functions for the 8-bit
|
||||||
|
@ -102,7 +102,7 @@ NON-AUTOTOOLS-BUILD, though if your system supports the use of "configure" and
|
||||||
"make" you may be able to build PCRE2 using autotools in the same way as for
|
"make" you may be able to build PCRE2 using autotools in the same way as for
|
||||||
many Unix-like systems.
|
many Unix-like systems.
|
||||||
|
|
||||||
PCRE2 can also be configured using CMake, which can be run in various ways
|
PCRE2 can also be configured using CMake, which can be run in various ways
|
||||||
(command line, GUI, etc). This creates Makefiles, solution files, etc. The file
|
(command line, GUI, etc). This creates Makefiles, solution files, etc. The file
|
||||||
NON-AUTOTOOLS-BUILD has information about CMake.
|
NON-AUTOTOOLS-BUILD has information about CMake.
|
||||||
|
|
||||||
|
@ -186,13 +186,13 @@ library. They are also documented in the pcre2build man page.
|
||||||
handling UTF-8, UTF-16 and UTF-8 is not included. It is not possible to
|
handling UTF-8, UTF-16 and UTF-8 is not included. It is not possible to
|
||||||
configure one library with UTF support and the other without in the same
|
configure one library with UTF support and the other without in the same
|
||||||
configuration.
|
configuration.
|
||||||
|
|
||||||
Even when --enable-unicode is included, the use of a UTF encoding still has
|
Even when --enable-unicode is included, the use of a UTF encoding still has
|
||||||
to be enabled by an option at run time. When PCRE2 is compiled with this
|
to be enabled by an option at run time. When PCRE2 is compiled with this
|
||||||
option, its input can only either be ASCII or UTF-8/16/32, even when running
|
option, its input can only either be ASCII or UTF-8/16/32, even when running
|
||||||
on EBCDIC platforms. It is not possible to use both --enable-unicode and
|
on EBCDIC platforms. It is not possible to use both --enable-unicode and
|
||||||
--enable-ebcdic at the same time.
|
--enable-ebcdic at the same time.
|
||||||
|
|
||||||
When --enable-unicode is specified, as well as supporting UTF strings, PCRE2
|
When --enable-unicode is specified, as well as supporting UTF strings, PCRE2
|
||||||
includes support for the \P, \p, and \X sequences that recognize Unicode
|
includes support for the \P, \p, and \X sequences that recognize Unicode
|
||||||
character properties. However, only the basic two-letter properties such as
|
character properties. However, only the basic two-letter properties such as
|
||||||
|
@ -248,7 +248,7 @@ library. They are also documented in the pcre2build man page.
|
||||||
cause programs to crash in strange ways. There is a discussion about stack
|
cause programs to crash in strange ways. There is a discussion about stack
|
||||||
sizes in the pcre2stack man page.
|
sizes in the pcre2stack man page.
|
||||||
|
|
||||||
. In the 8-bit library, the default maximum compiled pattern size is around
|
. In the 8-bit library, the default maximum compiled pattern size is around
|
||||||
64K. You can increase this by adding --with-link-size=3 to the "configure"
|
64K. You can increase this by adding --with-link-size=3 to the "configure"
|
||||||
command. PCRE2 then uses three bytes instead of two for offsets to different
|
command. PCRE2 then uses three bytes instead of two for offsets to different
|
||||||
parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is
|
parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is
|
||||||
|
@ -360,7 +360,7 @@ The "configure" script builds the following files for the basic C library:
|
||||||
. src/pcre2.h the public PCRE2 header file
|
. src/pcre2.h the public PCRE2 header file
|
||||||
. pcre2-config script that shows the building settings such as CFLAGS
|
. pcre2-config script that shows the building settings such as CFLAGS
|
||||||
that were set for "configure"
|
that were set for "configure"
|
||||||
. libpcre2-8.pc )
|
. libpcre2-8.pc )
|
||||||
. libpcre2-16.pc ) data for the pkg-config command
|
. libpcre2-16.pc ) data for the pkg-config command
|
||||||
. libpcre2-32.pc )
|
. libpcre2-32.pc )
|
||||||
. libpcre2-posix.pc )
|
. libpcre2-posix.pc )
|
||||||
|
@ -452,7 +452,7 @@ prints the version number, and
|
||||||
|
|
||||||
outputs information about where the 8-bit library is installed. This command
|
outputs information about where the 8-bit library is installed. This command
|
||||||
can be included in makefiles for programs that use PCRE2, saving the programmer
|
can be included in makefiles for programs that use PCRE2, saving the programmer
|
||||||
from having to remember too many details. Run pcre2-config with no arguments to
|
from having to remember too many details. Run pcre2-config with no arguments to
|
||||||
obtain a list of possible arguments.
|
obtain a list of possible arguments.
|
||||||
|
|
||||||
The pkg-config command is another system for saving and retrieving information
|
The pkg-config command is another system for saving and retrieving information
|
||||||
|
@ -593,7 +593,7 @@ bug in PCRE2.
|
||||||
|
|
||||||
The third set of tests checks pcre2_maketables(), the facility for building a
|
The third set of tests checks pcre2_maketables(), the facility for building a
|
||||||
set of character tables for a specific locale and using them instead of the
|
set of character tables for a specific locale and using them instead of the
|
||||||
default tables. The script uses the "locale" command to check for the
|
default tables. The script uses the "locale" command to check for the
|
||||||
availability of the "fr_FR", "french", or "fr" locale, and uses the first one
|
availability of the "fr_FR", "french", or "fr" locale, and uses the first one
|
||||||
that it finds. If the "locale" command fails, or if its output doesn't include
|
that it finds. If the "locale" command fails, or if its output doesn't include
|
||||||
"fr_FR", "french", or "fr" in the list of available locales, the third test
|
"fr_FR", "french", or "fr" in the list of available locales, the third test
|
||||||
|
@ -609,7 +609,7 @@ of the French locale have been encountered. The test passes if its output
|
||||||
matches any one of them.
|
matches any one of them.
|
||||||
|
|
||||||
The fourth and fifth tests check UTF and Unicode property support, the fourth
|
The fourth and fifth tests check UTF and Unicode property support, the fourth
|
||||||
being compatible with the perltest.pl script, and the fifth checking
|
being compatible with the perltest.pl script, and the fifth checking
|
||||||
PCRE2-specific things.
|
PCRE2-specific things.
|
||||||
|
|
||||||
The sixth and seventh tests check the pcre2_dfa_match() alternative matching
|
The sixth and seventh tests check the pcre2_dfa_match() alternative matching
|
||||||
|
@ -623,8 +623,8 @@ change) and when Unicode support is enabled.
|
||||||
The ninth and tenth tests are run only in 8-bit mode, and the eleventh and
|
The ninth and tenth tests are run only in 8-bit mode, and the eleventh and
|
||||||
twelfth tests are run only in 16-bit and 32-bit modes. These are tests that
|
twelfth tests are run only in 16-bit and 32-bit modes. These are tests that
|
||||||
generate different output in 8-bit mode. Each pair are for general cases and
|
generate different output in 8-bit mode. Each pair are for general cases and
|
||||||
Unicode support, respectively. The thirteenth test checks the handling of
|
Unicode support, respectively. The thirteenth test checks the handling of
|
||||||
non-UTF characters greater than 255 by pcre2_dfa_match() in 16-bit and 32-bit
|
non-UTF characters greater than 255 by pcre2_dfa_match() in 16-bit and 32-bit
|
||||||
modes.
|
modes.
|
||||||
|
|
||||||
The fourteenth test is run only when JIT support is not available, and the
|
The fourteenth test is run only when JIT support is not available, and the
|
||||||
|
@ -632,8 +632,8 @@ fifteenth test is run only when JIT support is available. They test some
|
||||||
JIT-specific features such as information output from pcre2test about JIT
|
JIT-specific features such as information output from pcre2test about JIT
|
||||||
compilation.
|
compilation.
|
||||||
|
|
||||||
The sixteenth and seventeenth tests are run only in 8-bit mode. They check the
|
The sixteenth and seventeenth tests are run only in 8-bit mode. They check the
|
||||||
POSIX interface to the 8-bit library, withouth and with Unicode support,
|
POSIX interface to the 8-bit library, withouth and with Unicode support,
|
||||||
respectively.
|
respectively.
|
||||||
|
|
||||||
|
|
||||||
|
@ -692,9 +692,9 @@ will cause PCRE2 to malfunction.
|
||||||
File manifest
|
File manifest
|
||||||
-------------
|
-------------
|
||||||
|
|
||||||
The distribution should contain the files listed below.
|
The distribution should contain the files listed below.
|
||||||
|
|
||||||
(A) Source files for the PCRE2 library functions and their headers are found in
|
(A) Source files for the PCRE2 library functions and their headers are found in
|
||||||
the src directory:
|
the src directory:
|
||||||
|
|
||||||
src/dftables.c auxiliary program for building pcre2_chartables.c
|
src/dftables.c auxiliary program for building pcre2_chartables.c
|
||||||
|
@ -705,25 +705,25 @@ The distribution should contain the files listed below.
|
||||||
specified, used by copying to pcre2_chartables.c
|
specified, used by copying to pcre2_chartables.c
|
||||||
|
|
||||||
src/pcre2posix.c )
|
src/pcre2posix.c )
|
||||||
src/pcre2_auto_possess.c )
|
src/pcre2_auto_possess.c )
|
||||||
src/pcre2_compile.c )
|
src/pcre2_compile.c )
|
||||||
src/pcre2_config.c )
|
src/pcre2_config.c )
|
||||||
src/pcre2_context.c )
|
src/pcre2_context.c )
|
||||||
src/pcre2_dfa_match.c )
|
src/pcre2_dfa_match.c )
|
||||||
src/pcre2_error.c )
|
src/pcre2_error.c )
|
||||||
src/pcre2_exec.c )
|
src/pcre2_exec.c )
|
||||||
src/pcre2_jit_compile.c )
|
src/pcre2_jit_compile.c )
|
||||||
src/pcre2_jit_match.c ) sources for the functions in the library,
|
src/pcre2_jit_match.c ) sources for the functions in the library,
|
||||||
src/pcre2_jit_misc.c ) and some internal functions that they use
|
src/pcre2_jit_misc.c ) and some internal functions that they use
|
||||||
src/pcre2_maketables.c )
|
src/pcre2_maketables.c )
|
||||||
src/pcre2_match.c )
|
src/pcre2_match.c )
|
||||||
src/pcre2_match_data.c )
|
src/pcre2_match_data.c )
|
||||||
src/pcre2_newline.c )
|
src/pcre2_newline.c )
|
||||||
src/pcre2_ord2utf.c )
|
src/pcre2_ord2utf.c )
|
||||||
src/pcre2_pattern_info.c )
|
src/pcre2_pattern_info.c )
|
||||||
src/pcre2_string_utils.c )
|
src/pcre2_string_utils.c )
|
||||||
src/pcre2_study.c )
|
src/pcre2_study.c )
|
||||||
src/pcre2_substring.c )
|
src/pcre2_substring.c )
|
||||||
src/pcre2_tables.c )
|
src/pcre2_tables.c )
|
||||||
src/pcre2_ucd.c )
|
src/pcre2_ucd.c )
|
||||||
src/pcre2_valid_utf.c )
|
src/pcre2_valid_utf.c )
|
||||||
|
|
|
@ -23,7 +23,7 @@ pcre2grep=$builddir/pcre2grep
|
||||||
if [ ! -x $pcre2grep ] ; then
|
if [ ! -x $pcre2grep ] ; then
|
||||||
echo "** $pcre2grep does not exist or is not execuatble."
|
echo "** $pcre2grep does not exist or is not execuatble."
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
valgrind=
|
valgrind=
|
||||||
while [ $# -gt 0 ] ; do
|
while [ $# -gt 0 ] ; do
|
||||||
|
|
2
RunTest
2
RunTest
|
@ -126,7 +126,7 @@ fi
|
||||||
|
|
||||||
checkresult()
|
checkresult()
|
||||||
{
|
{
|
||||||
if [ $1 -ne 0 ] ; then
|
if [ $1 -ne 0 ] ; then
|
||||||
echo "** pcre2test failed - check testtry"
|
echo "** pcre2test failed - check testtry"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
16
configure.ac
16
configure.ac
|
@ -106,7 +106,7 @@ AC_ARG_ENABLE(pcre32,,,enable_pcre32=no)
|
||||||
if test "$enable_pcre8$enable_pcre16$enable_pcre32" != "nonono"
|
if test "$enable_pcre8$enable_pcre16$enable_pcre32" != "nonono"
|
||||||
then
|
then
|
||||||
echo "** ERROR: Use --[[en|dis]]able-pcre2-[[8|16|32]], not --[[en|dis]]able-pcre[[8|16|32]]"
|
echo "** ERROR: Use --[[en|dis]]able-pcre2-[[8|16|32]], not --[[en|dis]]able-pcre[[8|16|32]]"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Handle --disable-pcre2-8 (enabled by default)
|
# Handle --disable-pcre2-8 (enabled by default)
|
||||||
|
@ -512,7 +512,7 @@ if test "$enable_jit" = "yes"; then
|
||||||
CC="$PTHREAD_CC"
|
CC="$PTHREAD_CC"
|
||||||
CFLAGS="$PTHREAD_CFLAGS $CFLAGS"
|
CFLAGS="$PTHREAD_CFLAGS $CFLAGS"
|
||||||
LIBS="$PTHREAD_LIBS $LIBS"
|
LIBS="$PTHREAD_LIBS $LIBS"
|
||||||
fi
|
fi
|
||||||
AC_DEFINE([SUPPORT_JIT], [], [
|
AC_DEFINE([SUPPORT_JIT], [], [
|
||||||
Define to any value to enable support for Just-In-Time compiling.])
|
Define to any value to enable support for Just-In-Time compiling.])
|
||||||
else
|
else
|
||||||
|
@ -538,7 +538,7 @@ if test "$enable_stack_for_recursion" = "no"; then
|
||||||
matching. This can sometimes be a problem on systems that have
|
matching. This can sometimes be a problem on systems that have
|
||||||
stacks of limited size. Define HEAP_MATCH_RECURSE to any value to get a
|
stacks of limited size. Define HEAP_MATCH_RECURSE to any value to get a
|
||||||
version that doesn't use recursion in the match() function; instead
|
version that doesn't use recursion in the match() function; instead
|
||||||
it creates its own stack by steam using memory from the heap. For more
|
it creates its own stack by steam using memory from the heap. For more
|
||||||
detail, see the comments and other stuff just above the match() function.])
|
detail, see the comments and other stuff just above the match() function.])
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
@ -559,8 +559,8 @@ if test $with_pcre2grep_bufsize -lt 8192 ; then
|
||||||
with_pcre2grep_bufsize="8192"
|
with_pcre2grep_bufsize="8192"
|
||||||
else
|
else
|
||||||
if test $? -gt 1 ; then
|
if test $? -gt 1 ; then
|
||||||
AC_MSG_ERROR([Bad value for --with-pcre2grep-bufsize])
|
AC_MSG_ERROR([Bad value for --with-pcre2grep-bufsize])
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
AC_DEFINE_UNQUOTED([PCRE2GREP_BUFSIZE], [$with_pcre2grep_bufsize], [
|
AC_DEFINE_UNQUOTED([PCRE2GREP_BUFSIZE], [$with_pcre2grep_bufsize], [
|
||||||
|
@ -579,9 +579,9 @@ elif test "$enable_pcre2test_libreadline" = "yes"; then
|
||||||
fi
|
fi
|
||||||
|
|
||||||
AC_DEFINE_UNQUOTED([NEWLINE_DEFAULT], [$ac_pcre2_newline_value], [
|
AC_DEFINE_UNQUOTED([NEWLINE_DEFAULT], [$ac_pcre2_newline_value], [
|
||||||
The value of NEWLINE_DEFAULT determines the default newline character
|
The value of NEWLINE_DEFAULT determines the default newline character
|
||||||
sequence. PCRE2 client programs can override this by selecting other values
|
sequence. PCRE2 client programs can override this by selecting other values
|
||||||
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY),
|
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY),
|
||||||
and 5 (ANYCRLF).])
|
and 5 (ANYCRLF).])
|
||||||
|
|
||||||
if test "$enable_bsr_anycrlf" = "yes"; then
|
if test "$enable_bsr_anycrlf" = "yes"; then
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
Building PCRE2 without using autotools
|
Building PCRE2 without using autotools
|
||||||
--------------------------------------
|
--------------------------------------
|
||||||
|
|
||||||
This document has been converted from the PCRE1 document, but is not yet
|
This document has been converted from the PCRE1 document, but is not yet
|
||||||
complete. I have removed a number of quite old sections about building in
|
complete. I have removed a number of quite old sections about building in
|
||||||
various environments, as they applied only to PCRE1 and are probably out of
|
various environments, as they applied only to PCRE1 and are probably out of
|
||||||
date.
|
date.
|
||||||
|
|
||||||
|
|
||||||
|
@ -57,7 +57,7 @@ can skip ahead to the CMake section.
|
||||||
environment. In particular, you can alter the definition of the NEWLINE
|
environment. In particular, you can alter the definition of the NEWLINE
|
||||||
macro to specify what character(s) you want to be interpreted as line
|
macro to specify what character(s) you want to be interpreted as line
|
||||||
terminators.
|
terminators.
|
||||||
|
|
||||||
When you compile any of the PCRE2 modules, you must specify
|
When you compile any of the PCRE2 modules, you must specify
|
||||||
-DHAVE_CONFIG_H to your compiler so that src/config.h is included in the
|
-DHAVE_CONFIG_H to your compiler so that src/config.h is included in the
|
||||||
sources.
|
sources.
|
||||||
|
@ -100,7 +100,7 @@ can skip ahead to the CMake section.
|
||||||
pcre2_chartables.c
|
pcre2_chartables.c
|
||||||
pcre2_compile.c
|
pcre2_compile.c
|
||||||
pcre2_config.c
|
pcre2_config.c
|
||||||
pcre2_context.c
|
pcre2_context.c
|
||||||
pcre2_dfa_match.c
|
pcre2_dfa_match.c
|
||||||
pcre2_error.c
|
pcre2_error.c
|
||||||
pcre2_jit_compile.c
|
pcre2_jit_compile.c
|
||||||
|
@ -114,7 +114,7 @@ can skip ahead to the CMake section.
|
||||||
pcre2_pattern_info.c
|
pcre2_pattern_info.c
|
||||||
pcre2_string_utils.c
|
pcre2_string_utils.c
|
||||||
pcre2_study.c
|
pcre2_study.c
|
||||||
pcre2_substring.c
|
pcre2_substring.c
|
||||||
pcre2_tables.c
|
pcre2_tables.c
|
||||||
pcre2_ucd.c
|
pcre2_ucd.c
|
||||||
pcre2_valid_utf.c
|
pcre2_valid_utf.c
|
||||||
|
@ -138,8 +138,8 @@ can skip ahead to the CMake section.
|
||||||
|
|
||||||
(6) If you want to build a 16-bit library or 32-bit library (as well as, or
|
(6) If you want to build a 16-bit library or 32-bit library (as well as, or
|
||||||
instead of the 8-bit library) just supply 16 or 32 as the value of
|
instead of the 8-bit library) just supply 16 or 32 as the value of
|
||||||
-DPCRE2_CODE_UNIT_WIDTH when you are compiling.
|
-DPCRE2_CODE_UNIT_WIDTH when you are compiling.
|
||||||
|
|
||||||
(7) If you want to build the POSIX wrapper functions (which apply only to the
|
(7) If you want to build the POSIX wrapper functions (which apply only to the
|
||||||
8-bit library), ensure that you have the pcre2posix.h file and then
|
8-bit library), ensure that you have the pcre2posix.h file and then
|
||||||
compile pcre2posix.c. Link the result (on its own) as the pcre2posix
|
compile pcre2posix.c. Link the result (on its own) as the pcre2posix
|
||||||
|
@ -295,7 +295,7 @@ Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no
|
||||||
spaces in the names for your CMake installation and your PCRE2 source and build
|
spaces in the names for your CMake installation and your PCRE2 source and build
|
||||||
directories.
|
directories.
|
||||||
|
|
||||||
The following instructions were contributed by a PCRE1 user, but they should
|
The following instructions were contributed by a PCRE1 user, but they should
|
||||||
also work for PCRE2. If they are not followed exactly, errors may occur. In the
|
also work for PCRE2. If they are not followed exactly, errors may occur. In the
|
||||||
event that errors do occur, it is recommended that you delete the CMake cache
|
event that errors do occur, it is recommended that you delete the CMake cache
|
||||||
before attempting to repeat the CMake build process. In the CMake GUI, the
|
before attempting to repeat the CMake build process. In the CMake GUI, the
|
||||||
|
@ -394,9 +394,9 @@ required. For details, please see this web site:
|
||||||
There is also a mirror here:
|
There is also a mirror here:
|
||||||
|
|
||||||
http://www.vsoft-software.com/downloads.html
|
http://www.vsoft-software.com/downloads.html
|
||||||
|
|
||||||
The site currently has ports for PCRE1 releases, but PCRE2 should follow in due
|
The site currently has ports for PCRE1 releases, but PCRE2 should follow in due
|
||||||
course.
|
course.
|
||||||
|
|
||||||
==========================
|
==========================
|
||||||
Last Updated: 28 September 2014
|
Last Updated: 28 September 2014
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
README file for PCRE2 (Perl-compatible regular expression library)
|
README file for PCRE2 (Perl-compatible regular expression library)
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
|
|
||||||
PCRE2 is a re-implementation of the original PCRE library with an entirely new
|
PCRE2 is a re-implementation of the original PCRE library with an entirely new
|
||||||
API. The latest release of PCRE2 is always available in three alternative
|
API. The latest release of PCRE2 is always available in three alternative
|
||||||
formats from:
|
formats from:
|
||||||
|
|
||||||
|
@ -11,7 +11,7 @@ FIXME: THIS WILL NOT BE THE CASE UNTIL THERE IS A FORMAL RELEASE.
|
||||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.tar.bz2
|
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.tar.bz2
|
||||||
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.zip
|
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.zip
|
||||||
|
|
||||||
There is a mailing list for discussion about the development of PCRE (both the
|
There is a mailing list for discussion about the development of PCRE (both the
|
||||||
original and new APIs) at pcre-dev@exim.org. You can access the archives and
|
original and new APIs) at pcre-dev@exim.org. You can access the archives and
|
||||||
subscribe or manage your subscription here:
|
subscribe or manage your subscription here:
|
||||||
|
|
||||||
|
@ -41,7 +41,7 @@ The PCRE2 APIs
|
||||||
PCRE2 is written in C, and it has its own API. There are three sets of
|
PCRE2 is written in C, and it has its own API. There are three sets of
|
||||||
functions, one for the 8-bit library, which processes strings of bytes, one for
|
functions, one for the 8-bit library, which processes strings of bytes, one for
|
||||||
the 16-bit library, which processes strings of 16-bit values, and one for the
|
the 16-bit library, which processes strings of 16-bit values, and one for the
|
||||||
32-bit library, which processes strings of 32-bit values. As this is a new API,
|
32-bit library, which processes strings of 32-bit values. As this is a new API,
|
||||||
there as yet no C++ wrappers.
|
there as yet no C++ wrappers.
|
||||||
|
|
||||||
The distribution does contain a set of C wrapper functions for the 8-bit
|
The distribution does contain a set of C wrapper functions for the 8-bit
|
||||||
|
@ -102,7 +102,7 @@ NON-AUTOTOOLS-BUILD, though if your system supports the use of "configure" and
|
||||||
"make" you may be able to build PCRE2 using autotools in the same way as for
|
"make" you may be able to build PCRE2 using autotools in the same way as for
|
||||||
many Unix-like systems.
|
many Unix-like systems.
|
||||||
|
|
||||||
PCRE2 can also be configured using CMake, which can be run in various ways
|
PCRE2 can also be configured using CMake, which can be run in various ways
|
||||||
(command line, GUI, etc). This creates Makefiles, solution files, etc. The file
|
(command line, GUI, etc). This creates Makefiles, solution files, etc. The file
|
||||||
NON-AUTOTOOLS-BUILD has information about CMake.
|
NON-AUTOTOOLS-BUILD has information about CMake.
|
||||||
|
|
||||||
|
@ -186,13 +186,13 @@ library. They are also documented in the pcre2build man page.
|
||||||
handling UTF-8, UTF-16 and UTF-8 is not included. It is not possible to
|
handling UTF-8, UTF-16 and UTF-8 is not included. It is not possible to
|
||||||
configure one library with UTF support and the other without in the same
|
configure one library with UTF support and the other without in the same
|
||||||
configuration.
|
configuration.
|
||||||
|
|
||||||
Even when --enable-unicode is included, the use of a UTF encoding still has
|
Even when --enable-unicode is included, the use of a UTF encoding still has
|
||||||
to be enabled by an option at run time. When PCRE2 is compiled with this
|
to be enabled by an option at run time. When PCRE2 is compiled with this
|
||||||
option, its input can only either be ASCII or UTF-8/16/32, even when running
|
option, its input can only either be ASCII or UTF-8/16/32, even when running
|
||||||
on EBCDIC platforms. It is not possible to use both --enable-unicode and
|
on EBCDIC platforms. It is not possible to use both --enable-unicode and
|
||||||
--enable-ebcdic at the same time.
|
--enable-ebcdic at the same time.
|
||||||
|
|
||||||
When --enable-unicode is specified, as well as supporting UTF strings, PCRE2
|
When --enable-unicode is specified, as well as supporting UTF strings, PCRE2
|
||||||
includes support for the \P, \p, and \X sequences that recognize Unicode
|
includes support for the \P, \p, and \X sequences that recognize Unicode
|
||||||
character properties. However, only the basic two-letter properties such as
|
character properties. However, only the basic two-letter properties such as
|
||||||
|
@ -248,7 +248,7 @@ library. They are also documented in the pcre2build man page.
|
||||||
cause programs to crash in strange ways. There is a discussion about stack
|
cause programs to crash in strange ways. There is a discussion about stack
|
||||||
sizes in the pcre2stack man page.
|
sizes in the pcre2stack man page.
|
||||||
|
|
||||||
. In the 8-bit library, the default maximum compiled pattern size is around
|
. In the 8-bit library, the default maximum compiled pattern size is around
|
||||||
64K. You can increase this by adding --with-link-size=3 to the "configure"
|
64K. You can increase this by adding --with-link-size=3 to the "configure"
|
||||||
command. PCRE2 then uses three bytes instead of two for offsets to different
|
command. PCRE2 then uses three bytes instead of two for offsets to different
|
||||||
parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is
|
parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is
|
||||||
|
@ -360,7 +360,7 @@ The "configure" script builds the following files for the basic C library:
|
||||||
. src/pcre2.h the public PCRE2 header file
|
. src/pcre2.h the public PCRE2 header file
|
||||||
. pcre2-config script that shows the building settings such as CFLAGS
|
. pcre2-config script that shows the building settings such as CFLAGS
|
||||||
that were set for "configure"
|
that were set for "configure"
|
||||||
. libpcre2-8.pc )
|
. libpcre2-8.pc )
|
||||||
. libpcre2-16.pc ) data for the pkg-config command
|
. libpcre2-16.pc ) data for the pkg-config command
|
||||||
. libpcre2-32.pc )
|
. libpcre2-32.pc )
|
||||||
. libpcre2-posix.pc )
|
. libpcre2-posix.pc )
|
||||||
|
@ -452,7 +452,7 @@ prints the version number, and
|
||||||
|
|
||||||
outputs information about where the 8-bit library is installed. This command
|
outputs information about where the 8-bit library is installed. This command
|
||||||
can be included in makefiles for programs that use PCRE2, saving the programmer
|
can be included in makefiles for programs that use PCRE2, saving the programmer
|
||||||
from having to remember too many details. Run pcre2-config with no arguments to
|
from having to remember too many details. Run pcre2-config with no arguments to
|
||||||
obtain a list of possible arguments.
|
obtain a list of possible arguments.
|
||||||
|
|
||||||
The pkg-config command is another system for saving and retrieving information
|
The pkg-config command is another system for saving and retrieving information
|
||||||
|
@ -593,7 +593,7 @@ bug in PCRE2.
|
||||||
|
|
||||||
The third set of tests checks pcre2_maketables(), the facility for building a
|
The third set of tests checks pcre2_maketables(), the facility for building a
|
||||||
set of character tables for a specific locale and using them instead of the
|
set of character tables for a specific locale and using them instead of the
|
||||||
default tables. The script uses the "locale" command to check for the
|
default tables. The script uses the "locale" command to check for the
|
||||||
availability of the "fr_FR", "french", or "fr" locale, and uses the first one
|
availability of the "fr_FR", "french", or "fr" locale, and uses the first one
|
||||||
that it finds. If the "locale" command fails, or if its output doesn't include
|
that it finds. If the "locale" command fails, or if its output doesn't include
|
||||||
"fr_FR", "french", or "fr" in the list of available locales, the third test
|
"fr_FR", "french", or "fr" in the list of available locales, the third test
|
||||||
|
@ -609,7 +609,7 @@ of the French locale have been encountered. The test passes if its output
|
||||||
matches any one of them.
|
matches any one of them.
|
||||||
|
|
||||||
The fourth and fifth tests check UTF and Unicode property support, the fourth
|
The fourth and fifth tests check UTF and Unicode property support, the fourth
|
||||||
being compatible with the perltest.pl script, and the fifth checking
|
being compatible with the perltest.pl script, and the fifth checking
|
||||||
PCRE2-specific things.
|
PCRE2-specific things.
|
||||||
|
|
||||||
The sixth and seventh tests check the pcre2_dfa_match() alternative matching
|
The sixth and seventh tests check the pcre2_dfa_match() alternative matching
|
||||||
|
@ -623,8 +623,8 @@ change) and when Unicode support is enabled.
|
||||||
The ninth and tenth tests are run only in 8-bit mode, and the eleventh and
|
The ninth and tenth tests are run only in 8-bit mode, and the eleventh and
|
||||||
twelfth tests are run only in 16-bit and 32-bit modes. These are tests that
|
twelfth tests are run only in 16-bit and 32-bit modes. These are tests that
|
||||||
generate different output in 8-bit mode. Each pair are for general cases and
|
generate different output in 8-bit mode. Each pair are for general cases and
|
||||||
Unicode support, respectively. The thirteenth test checks the handling of
|
Unicode support, respectively. The thirteenth test checks the handling of
|
||||||
non-UTF characters greater than 255 by pcre2_dfa_match() in 16-bit and 32-bit
|
non-UTF characters greater than 255 by pcre2_dfa_match() in 16-bit and 32-bit
|
||||||
modes.
|
modes.
|
||||||
|
|
||||||
The fourteenth test is run only when JIT support is not available, and the
|
The fourteenth test is run only when JIT support is not available, and the
|
||||||
|
@ -632,8 +632,8 @@ fifteenth test is run only when JIT support is available. They test some
|
||||||
JIT-specific features such as information output from pcre2test about JIT
|
JIT-specific features such as information output from pcre2test about JIT
|
||||||
compilation.
|
compilation.
|
||||||
|
|
||||||
The sixteenth and seventeenth tests are run only in 8-bit mode. They check the
|
The sixteenth and seventeenth tests are run only in 8-bit mode. They check the
|
||||||
POSIX interface to the 8-bit library, withouth and with Unicode support,
|
POSIX interface to the 8-bit library, withouth and with Unicode support,
|
||||||
respectively.
|
respectively.
|
||||||
|
|
||||||
|
|
||||||
|
@ -692,9 +692,9 @@ will cause PCRE2 to malfunction.
|
||||||
File manifest
|
File manifest
|
||||||
-------------
|
-------------
|
||||||
|
|
||||||
The distribution should contain the files listed below.
|
The distribution should contain the files listed below.
|
||||||
|
|
||||||
(A) Source files for the PCRE2 library functions and their headers are found in
|
(A) Source files for the PCRE2 library functions and their headers are found in
|
||||||
the src directory:
|
the src directory:
|
||||||
|
|
||||||
src/dftables.c auxiliary program for building pcre2_chartables.c
|
src/dftables.c auxiliary program for building pcre2_chartables.c
|
||||||
|
@ -705,25 +705,25 @@ The distribution should contain the files listed below.
|
||||||
specified, used by copying to pcre2_chartables.c
|
specified, used by copying to pcre2_chartables.c
|
||||||
|
|
||||||
src/pcre2posix.c )
|
src/pcre2posix.c )
|
||||||
src/pcre2_auto_possess.c )
|
src/pcre2_auto_possess.c )
|
||||||
src/pcre2_compile.c )
|
src/pcre2_compile.c )
|
||||||
src/pcre2_config.c )
|
src/pcre2_config.c )
|
||||||
src/pcre2_context.c )
|
src/pcre2_context.c )
|
||||||
src/pcre2_dfa_match.c )
|
src/pcre2_dfa_match.c )
|
||||||
src/pcre2_error.c )
|
src/pcre2_error.c )
|
||||||
src/pcre2_exec.c )
|
src/pcre2_exec.c )
|
||||||
src/pcre2_jit_compile.c )
|
src/pcre2_jit_compile.c )
|
||||||
src/pcre2_jit_match.c ) sources for the functions in the library,
|
src/pcre2_jit_match.c ) sources for the functions in the library,
|
||||||
src/pcre2_jit_misc.c ) and some internal functions that they use
|
src/pcre2_jit_misc.c ) and some internal functions that they use
|
||||||
src/pcre2_maketables.c )
|
src/pcre2_maketables.c )
|
||||||
src/pcre2_match.c )
|
src/pcre2_match.c )
|
||||||
src/pcre2_match_data.c )
|
src/pcre2_match_data.c )
|
||||||
src/pcre2_newline.c )
|
src/pcre2_newline.c )
|
||||||
src/pcre2_ord2utf.c )
|
src/pcre2_ord2utf.c )
|
||||||
src/pcre2_pattern_info.c )
|
src/pcre2_pattern_info.c )
|
||||||
src/pcre2_string_utils.c )
|
src/pcre2_string_utils.c )
|
||||||
src/pcre2_study.c )
|
src/pcre2_study.c )
|
||||||
src/pcre2_substring.c )
|
src/pcre2_substring.c )
|
||||||
src/pcre2_tables.c )
|
src/pcre2_tables.c )
|
||||||
src/pcre2_ucd.c )
|
src/pcre2_ucd.c )
|
||||||
src/pcre2_valid_utf.c )
|
src/pcre2_valid_utf.c )
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
<html>
|
<html>
|
||||||
<!-- This is a manually maintained file that is the root of the HTML version of
|
<!-- This is a manually maintained file that is the root of the HTML version of
|
||||||
the PCRE2 documentation. When the HTML documents are built from the man
|
the PCRE2 documentation. When the HTML documents are built from the man
|
||||||
page versions, the entire doc/html directory is emptied, this file is then
|
page versions, the entire doc/html directory is emptied, this file is then
|
||||||
copied into doc/html/index.html, and the remaining files therein are
|
copied into doc/html/index.html, and the remaining files therein are
|
||||||
created by the 132html script.
|
created by the 132html script.
|
||||||
-->
|
-->
|
||||||
<head>
|
<head>
|
||||||
<title>PCRE2 specification</title>
|
<title>PCRE2 specification</title>
|
||||||
</head>
|
</head>
|
||||||
|
@ -87,7 +87,7 @@ in the library. There is a single page for each triple of 8-bit/16-bit/32-bit
|
||||||
functions.
|
functions.
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<table>
|
<table>
|
||||||
|
|
||||||
<tr><td><a href="pcre2_assign_jit_stack.html">pcre2_assign_jit_stack</a></td>
|
<tr><td><a href="pcre2_assign_jit_stack.html">pcre2_assign_jit_stack</a></td>
|
||||||
<td> Assign stack for JIT matching</td></tr>
|
<td> Assign stack for JIT matching</td></tr>
|
||||||
|
@ -153,7 +153,7 @@ functions.
|
||||||
|
|
||||||
<tr><td><a href="pcre2_maketables.html">pcre2_maketables</a></td>
|
<tr><td><a href="pcre2_maketables.html">pcre2_maketables</a></td>
|
||||||
<td> Build character tables in current locale</td></tr>
|
<td> Build character tables in current locale</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre2_pattern_to_host_byte_order.html">pcre2_pattern_to_host_byte_order</a></td>
|
<tr><td><a href="pcre2_pattern_to_host_byte_order.html">pcre2_pattern_to_host_byte_order</a></td>
|
||||||
<td> Convert compiled pattern to host byte order if necessary</td></tr>
|
<td> Convert compiled pattern to host byte order if necessary</td></tr>
|
||||||
|
|
||||||
|
|
|
@ -43,11 +43,11 @@ of Unicode in use can be discovered by running
|
||||||
</PRE>
|
</PRE>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The three libraries contain identical sets of functions, with names ending in
|
The three libraries contain identical sets of functions, with names ending in
|
||||||
_8, _16, or _32, respectively (for example, <b>pcre2_compile_8()</b>). However,
|
_8, _16, or _32, respectively (for example, <b>pcre2_compile_8()</b>). However,
|
||||||
by defining PCRE2_CODE_UNIT_WIDTH to be 8, 16, or 32, a program that uses just
|
by defining PCRE2_CODE_UNIT_WIDTH to be 8, 16, or 32, a program that uses just
|
||||||
one code unit width can be written using generic names such as
|
one code unit width can be written using generic names such as
|
||||||
<b>pcre2_compile()</b>, and the documentation is written assuming that this is
|
<b>pcre2_compile()</b>, and the documentation is written assuming that this is
|
||||||
the case.
|
the case.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
|
|
@ -306,7 +306,7 @@ unknown should also use the real function names. (Unfortunately, it is not
|
||||||
possible in C code to save and restore the value of a macro.)
|
possible in C code to save and restore the value of a macro.)
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If PCRE2_CODE_UNIT_WIDTH is not defined before including <b>pcre2.h</b>, a
|
If PCRE2_CODE_UNIT_WIDTH is not defined before including <b>pcre2.h</b>, a
|
||||||
compiler error occurs.
|
compiler error occurs.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -443,7 +443,7 @@ below.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The choice of newline convention does not affect the interpretation of
|
The choice of newline convention does not affect the interpretation of
|
||||||
the \n or \r escape sequences, nor does it affect what \R matches, which has
|
the \n or \r escape sequences, nor does it affect what \R matches, which has
|
||||||
its own separate control.
|
its own separate control.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC12" href="#TOC1">MULTITHREADING</a><br>
|
<br><a name="SEC12" href="#TOC1">MULTITHREADING</a><br>
|
||||||
|
@ -553,7 +553,7 @@ The memory used for a general context should be freed by calling:
|
||||||
The compile context
|
The compile context
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
A compile context is required if you want to change the default values of any
|
A compile context is required if you want to change the default values of any
|
||||||
of the following compile-time parameters:
|
of the following compile-time parameters:
|
||||||
<pre>
|
<pre>
|
||||||
What \R matches (Unicode newlines or CR, LF, CRLF only);
|
What \R matches (Unicode newlines or CR, LF, CRLF only);
|
||||||
|
@ -562,7 +562,7 @@ of the following compile-time parameters:
|
||||||
The compile time nested parentheses limit;
|
The compile time nested parentheses limit;
|
||||||
An external function for stack checking.
|
An external function for stack checking.
|
||||||
</pre>
|
</pre>
|
||||||
A compile context is also required if you are using custom memory management.
|
A compile context is also required if you are using custom memory management.
|
||||||
If none of these apply, just pass NULL as the context argument of
|
If none of these apply, just pass NULL as the context argument of
|
||||||
<i>pcre2_compile()</i>.
|
<i>pcre2_compile()</i>.
|
||||||
</P>
|
</P>
|
||||||
|
@ -579,33 +579,33 @@ A compile context is created, copied, and freed by the following functions:
|
||||||
<b>void pcre2_compile_context_free(pcre2_compile_context *<i>ccontext</i>);</b>
|
<b>void pcre2_compile_context_free(pcre2_compile_context *<i>ccontext</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
A compile context is created with default values for its parameters. These can
|
A compile context is created with default values for its parameters. These can
|
||||||
be changed by calling the following functions, which return 0 on success, or
|
be changed by calling the following functions, which return 0 on success, or
|
||||||
PCRE2_ERROR_BADDATA if invalid data is detected.
|
PCRE2_ERROR_BADDATA if invalid data is detected.
|
||||||
<b>int pcre2_set_bsr(pcre2_compile_context *<i>ccontext</i>,</b>
|
<b>int pcre2_set_bsr(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||||
<b> uint32_t <i>value</i>);</b>
|
<b> uint32_t <i>value</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
The value must be PCRE2_BSR_ANYCRLF, to specify that \R matches only CR, LF,
|
The value must be PCRE2_BSR_ANYCRLF, to specify that \R matches only CR, LF,
|
||||||
or CRLF, or PCRE2_BSR_UNICODE, to specify that \R matches any Unicode line
|
or CRLF, or PCRE2_BSR_UNICODE, to specify that \R matches any Unicode line
|
||||||
ending sequence. The value of this parameter does not affect what is compiled;
|
ending sequence. The value of this parameter does not affect what is compiled;
|
||||||
it is just saved with the compiled pattern. The value is used by the JIT
|
it is just saved with the compiled pattern. The value is used by the JIT
|
||||||
compiler and by the two interpreted matching functions, <i>pcre2_match()</i> and
|
compiler and by the two interpreted matching functions, <i>pcre2_match()</i> and
|
||||||
<i>pcre2_dfa_match()</i>.
|
<i>pcre2_dfa_match()</i>.
|
||||||
<b>int pcre2_set_character_tables(pcre2_compile_context *<i>ccontext</i>,</b>
|
<b>int pcre2_set_character_tables(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||||
<b> const unsigned char *<i>tables</i>);</b>
|
<b> const unsigned char *<i>tables</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
The value must be the result of a call to <i>pcre2_maketables()</i>, whose only
|
The value must be the result of a call to <i>pcre2_maketables()</i>, whose only
|
||||||
argument is a general context. This function builds a set of character tables
|
argument is a general context. This function builds a set of character tables
|
||||||
in the current locale.
|
in the current locale.
|
||||||
<b>int pcre2_set_newline(pcre2_compile_context *<i>ccontext</i>,</b>
|
<b>int pcre2_set_newline(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||||
<b> uint32_t <i>value</i>);</b>
|
<b> uint32_t <i>value</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
This specifies which characters or character sequences are to be recognized as
|
This specifies which characters or character sequences are to be recognized as
|
||||||
newlines. The value must be one of PCRE2_NEWLINE_CR (carriage return only),
|
newlines. The value must be one of PCRE2_NEWLINE_CR (carriage return only),
|
||||||
PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the two-character
|
PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the two-character
|
||||||
sequence CR followed by LF), PCRE2_NEWLINE_ANYCRLF (any of the above), or
|
sequence CR followed by LF), PCRE2_NEWLINE_ANYCRLF (any of the above), or
|
||||||
PCRE2_NEWLINE_ANY (any Unicode newline sequence).
|
PCRE2_NEWLINE_ANY (any Unicode newline sequence).
|
||||||
</P>
|
</P>
|
||||||
|
@ -627,7 +627,7 @@ using up too much system stack when being compiled.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
There is at least one application that runs PCRE2 in threads with very limited
|
There is at least one application that runs PCRE2 in threads with very limited
|
||||||
system stack, where running out of stack is to be avoided at all costs. The
|
system stack, where running out of stack is to be avoided at all costs. The
|
||||||
parenthesis limit above cannot take account of how much stack is actually
|
parenthesis limit above cannot take account of how much stack is actually
|
||||||
available. For a finer control, you can supply a function that is called
|
available. For a finer control, you can supply a function that is called
|
||||||
whenever <b>pcre2_compile()</b> starts to compile a parenthesized part of a
|
whenever <b>pcre2_compile()</b> starts to compile a parenthesized part of a
|
||||||
|
@ -638,20 +638,20 @@ function should return zero if all is well, or non-zero to force an error.
|
||||||
The match context
|
The match context
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
A match context is required if you want to change the default values of any
|
A match context is required if you want to change the default values of any
|
||||||
of the following match-time parameters:
|
of the following match-time parameters:
|
||||||
<pre>
|
<pre>
|
||||||
What \R matches (Unicode newlines or CR, LF, CRLF only);
|
What \R matches (Unicode newlines or CR, LF, CRLF only);
|
||||||
A callout function;
|
A callout function;
|
||||||
The limit for calling <i>match()</i>;
|
The limit for calling <i>match()</i>;
|
||||||
The limit for calling <i>match()</i> recursively;
|
The limit for calling <i>match()</i> recursively;
|
||||||
The newline character sequence;
|
The newline character sequence;
|
||||||
</pre>
|
</pre>
|
||||||
A match context is also required if you are using custom memory management.
|
A match context is also required if you are using custom memory management.
|
||||||
If none of these apply, just pass NULL as the context argument of
|
If none of these apply, just pass NULL as the context argument of
|
||||||
<b>pcre2_match()</b>, <b>pcre2_dfa_match()</b>, or <b>pcre2_jit_match()</b>.
|
<b>pcre2_match()</b>, <b>pcre2_dfa_match()</b>, or <b>pcre2_jit_match()</b>.
|
||||||
Changing the newline value or what \R matches at match time disables the use
|
Changing the newline value or what \R matches at match time disables the use
|
||||||
of JIT via <b>pcre2_match()</b>.
|
of JIT via <b>pcre2_match()</b>.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
A match context is created, copied, and freed by the following functions:
|
A match context is created, copied, and freed by the following functions:
|
||||||
|
@ -666,8 +666,8 @@ A match context is created, copied, and freed by the following functions:
|
||||||
<b>void pcre2_match_context_free(pcre2_match_context *<i>mcontext</i>);</b>
|
<b>void pcre2_match_context_free(pcre2_match_context *<i>mcontext</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
A match context is created with default values for its parameters. These can
|
A match context is created with default values for its parameters. These can
|
||||||
be changed by calling the following functions, which return 0 on success, or
|
be changed by calling the following functions, which return 0 on success, or
|
||||||
PCRE2_ERROR_BADDATA if invalid data is detected.
|
PCRE2_ERROR_BADDATA if invalid data is detected.
|
||||||
<b>int pcre2_set_callout(pcre2_match_context *<i>mcontext</i>,</b>
|
<b>int pcre2_set_callout(pcre2_match_context *<i>mcontext</i>,</b>
|
||||||
<b> int (*<i>callout_function</i>)(pcre2_callout_block *),</b>
|
<b> int (*<i>callout_function</i>)(pcre2_callout_block *),</b>
|
||||||
|
@ -693,7 +693,7 @@ calls repeatedly (sometimes recursively). The limit set by <i>match_limit</i> is
|
||||||
imposed on the number of times this function is called during a match, which
|
imposed on the number of times this function is called during a match, which
|
||||||
has the effect of limiting the amount of backtracking that can take place. For
|
has the effect of limiting the amount of backtracking that can take place. For
|
||||||
patterns that are not anchored, the count restarts from zero for each position
|
patterns that are not anchored, the count restarts from zero for each position
|
||||||
in the subject string. This limit is not relevant to <b>pcre2_dfa_match()</b>,
|
in the subject string. This limit is not relevant to <b>pcre2_dfa_match()</b>,
|
||||||
which ignores it.
|
which ignores it.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -730,7 +730,7 @@ This limit is of use only if it is set smaller than <i>match_limit</i>.
|
||||||
Limiting the recursion depth limits the amount of system stack that can be
|
Limiting the recursion depth limits the amount of system stack that can be
|
||||||
used, or, when PCRE2 has been compiled to use memory on the heap instead of the
|
used, or, when PCRE2 has been compiled to use memory on the heap instead of the
|
||||||
stack, the amount of heap memory that can be used. This limit is not relevant,
|
stack, the amount of heap memory that can be used. This limit is not relevant,
|
||||||
and is ignored, when matching is done using JIT compiled code or by the
|
and is ignored, when matching is done using JIT compiled code or by the
|
||||||
<b>pcre2_dfa_match()</b> function.
|
<b>pcre2_dfa_match()</b> function.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -751,9 +751,9 @@ limit is set, less than the default.
|
||||||
<b> void (*<i>private_free</i>)(void *, void *), void *<i>memory_data</i>);</b>
|
<b> void (*<i>private_free</i>)(void *, void *), void *<i>memory_data</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
This function sets up two additional custom memory management functions for use
|
This function sets up two additional custom memory management functions for use
|
||||||
by <b>pcre2_match()</b> when PCRE2 is compiled to use the heap for remembering
|
by <b>pcre2_match()</b> when PCRE2 is compiled to use the heap for remembering
|
||||||
backtracking data, instead of recursive function calls that use the system
|
backtracking data, instead of recursive function calls that use the system
|
||||||
stack. There is a discussion about PCRE2's stack usage in the
|
stack. There is a discussion about PCRE2's stack usage in the
|
||||||
<a href="pcre2stack.html"><b>pcre2stack</b></a>
|
<a href="pcre2stack.html"><b>pcre2stack</b></a>
|
||||||
documentation. See the
|
documentation. See the
|
||||||
|
@ -765,7 +765,7 @@ limited stacks. Because of the greater use of memory management,
|
||||||
general custom memory functions are provided so that special-purpose external
|
general custom memory functions are provided so that special-purpose external
|
||||||
code can be used for this case, because the memory blocks are all the same
|
code can be used for this case, because the memory blocks are all the same
|
||||||
size. The blocks are retained by <b>pcre2_match()</b> until it is about to exit
|
size. The blocks are retained by <b>pcre2_match()</b> until it is about to exit
|
||||||
so that they can be re-used when possible during the match. In the absence of
|
so that they can be re-used when possible during the match. In the absence of
|
||||||
these functions, the normal custom memory management functions are used, if
|
these functions, the normal custom memory management functions are used, if
|
||||||
supplied, otherwise the system functions.
|
supplied, otherwise the system functions.
|
||||||
</P>
|
</P>
|
||||||
|
@ -785,7 +785,7 @@ required. The second argument is a pointer to memory into which the information
|
||||||
is placed. If NULL is passed, the function returns the amount of memory that is
|
is placed. If NULL is passed, the function returns the amount of memory that is
|
||||||
needed for the requested information. For calls that return numerical values,
|
needed for the requested information. For calls that return numerical values,
|
||||||
the value is in bytes; when requesting these values, <i>where</i> should point
|
the value is in bytes; when requesting these values, <i>where</i> should point
|
||||||
to appropriately aligned memory. For calls that return strings, the required
|
to appropriately aligned memory. For calls that return strings, the required
|
||||||
length is given in code units, not counting the terminating zero.
|
length is given in code units, not counting the terminating zero.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -809,7 +809,7 @@ compiling is available; otherwise it is set to zero.
|
||||||
PCRE2_CONFIG_JITTARGET
|
PCRE2_CONFIG_JITTARGET
|
||||||
</pre>
|
</pre>
|
||||||
The <i>where</i> argument should point to a buffer that is at least 48 code
|
The <i>where</i> argument should point to a buffer that is at least 48 code
|
||||||
units long. (The exact length needed can be found by calling
|
units long. (The exact length needed can be found by calling
|
||||||
<b>pcre2_config()</b> with <b>where</b> set to NULL.) The buffer is filled with a
|
<b>pcre2_config()</b> with <b>where</b> set to NULL.) The buffer is filled with a
|
||||||
string that contains the name of the architecture for which the JIT compiler is
|
string that contains the name of the architecture for which the JIT compiler is
|
||||||
configured, for example "x86 32bit (little endian + unaligned)". If JIT support
|
configured, for example "x86 32bit (little endian + unaligned)". If JIT support
|
||||||
|
@ -820,9 +820,9 @@ the string, in code units, is returned.
|
||||||
</pre>
|
</pre>
|
||||||
The output is an integer that contains the number of bytes used for internal
|
The output is an integer that contains the number of bytes used for internal
|
||||||
linkage in compiled regular expressions. When PCRE2 is configured, the value
|
linkage in compiled regular expressions. When PCRE2 is configured, the value
|
||||||
can be set to 2, 3, or 4, with the default being 2. This is the value that is
|
can be set to 2, 3, or 4, with the default being 2. This is the value that is
|
||||||
returned by <b>pcre2_config()</b>. However, when the 16-bit library is compiled,
|
returned by <b>pcre2_config()</b>. However, when the 16-bit library is compiled,
|
||||||
a value of 3 is rounded up to 4, and when the 32-bit library is compiled,
|
a value of 3 is rounded up to 4, and when the 32-bit library is compiled,
|
||||||
internal linkages always use 4 bytes, so the configured value is not relevant.
|
internal linkages always use 4 bytes, so the configured value is not relevant.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -908,16 +908,16 @@ units) is returned.
|
||||||
<b>pcre2_code_free(pcre2_code *<i>code</i>);</b>
|
<b>pcre2_code_free(pcre2_code *<i>code</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
This function compiles a pattern, defined by a pointer to a string of code
|
This function compiles a pattern, defined by a pointer to a string of code
|
||||||
units and a length, into an internal form. If the pattern is zero-terminated,
|
units and a length, into an internal form. If the pattern is zero-terminated,
|
||||||
the length should be specified as PCRE2_ZERO_TERMINATED. The function returns a
|
the length should be specified as PCRE2_ZERO_TERMINATED. The function returns a
|
||||||
pointer to a block of memory that contains the compiled pattern and related
|
pointer to a block of memory that contains the compiled pattern and related
|
||||||
data. The caller must free the memory by calling <b>pcre2_code_free()</b> when
|
data. The caller must free the memory by calling <b>pcre2_code_free()</b> when
|
||||||
it is no longer needed.
|
it is no longer needed.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If the compile context argument <i>ccontext</i> is NULL, the memory is obtained
|
If the compile context argument <i>ccontext</i> is NULL, the memory is obtained
|
||||||
by calling <b>malloc()</b>. Otherwise, it is obtained from the same memory
|
by calling <b>malloc()</b>. Otherwise, it is obtained from the same memory
|
||||||
function that was used for the compile context.
|
function that was used for the compile context.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -927,7 +927,7 @@ options are described below. Some of them (in particular, those that are
|
||||||
compatible with Perl, but some others as well) can also be set and unset from
|
compatible with Perl, but some others as well) can also be set and unset from
|
||||||
within the pattern (see the detailed description in the
|
within the pattern (see the detailed description in the
|
||||||
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
|
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
|
||||||
documentation).
|
documentation).
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
For those options that can be different in different parts of the pattern, the
|
For those options that can be different in different parts of the pattern, the
|
||||||
|
@ -936,7 +936,7 @@ compilation. The PCRE2_ANCHORED and PCRE2_NO_UTF_CHECK options can be set at
|
||||||
the time of matching as well as at compile time.
|
the time of matching as well as at compile time.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Other, less frequently required compile-time parameters (for example, the
|
Other, less frequently required compile-time parameters (for example, the
|
||||||
newline setting) can be provided in a compile context (as described
|
newline setting) can be provided in a compile context (as described
|
||||||
<a href="#compilecontext">above).</a>
|
<a href="#compilecontext">above).</a>
|
||||||
</P>
|
</P>
|
||||||
|
@ -962,10 +962,10 @@ This code fragment shows a typical straightforward call to
|
||||||
<pre>
|
<pre>
|
||||||
pcre2_code *re;
|
pcre2_code *re;
|
||||||
PCRE2_SIZE erroffset;
|
PCRE2_SIZE erroffset;
|
||||||
int errorcode;
|
int errorcode;
|
||||||
re = pcre2_compile(
|
re = pcre2_compile(
|
||||||
"^A.*Z", /* the pattern */
|
"^A.*Z", /* the pattern */
|
||||||
PCRE2_ZERO_TERMINATED, /* the pattern is zero-terminated */
|
PCRE2_ZERO_TERMINATED, /* the pattern is zero-terminated */
|
||||||
0, /* default options */
|
0, /* default options */
|
||||||
&errorcode, /* for error code */
|
&errorcode, /* for error code */
|
||||||
&erroffset, /* for error offset */
|
&erroffset, /* for error offset */
|
||||||
|
@ -984,14 +984,14 @@ Perl.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_ALLOW_EMPTY_CLASS
|
PCRE2_ALLOW_EMPTY_CLASS
|
||||||
</pre>
|
</pre>
|
||||||
By default, for compatibility with Perl, a closing square bracket that
|
By default, for compatibility with Perl, a closing square bracket that
|
||||||
immediately follows an opening one is treated as a data character for the
|
immediately follows an opening one is treated as a data character for the
|
||||||
class. When PCRE2_ALLOW_EMPTY_CLASS is set, it terminates the class, which
|
class. When PCRE2_ALLOW_EMPTY_CLASS is set, it terminates the class, which
|
||||||
therefore contains no characters and so can never match.
|
therefore contains no characters and so can never match.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_ALT_BSUX
|
PCRE2_ALT_BSUX
|
||||||
</pre>
|
</pre>
|
||||||
This option request alternative handling of three escape sequences, which
|
This option request alternative handling of three escape sequences, which
|
||||||
makes PCRE2's behaviour more like ECMAscript (aka JavaScript). When it is set:
|
makes PCRE2's behaviour more like ECMAscript (aka JavaScript). When it is set:
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -1023,7 +1023,7 @@ documentation.
|
||||||
</pre>
|
</pre>
|
||||||
If this bit is set, letters in the pattern match both upper and lower case
|
If this bit is set, letters in the pattern match both upper and lower case
|
||||||
letters in the subject. It is equivalent to Perl's /i option, and it can be
|
letters in the subject. It is equivalent to Perl's /i option, and it can be
|
||||||
changed within a pattern by a (?i) option setting.
|
changed within a pattern by a (?i) option setting.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_DOLLAR_ENDONLY
|
PCRE2_DOLLAR_ENDONLY
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -1076,7 +1076,7 @@ Which characters are interpreted as newlines can be specified by a setting in
|
||||||
the compile context that is passed to <b>pcre2_compile()</b> or by a special
|
the compile context that is passed to <b>pcre2_compile()</b> or by a special
|
||||||
sequence at the start of the pattern, as described in the section entitled
|
sequence at the start of the pattern, as described in the section entitled
|
||||||
<a href="pcrepattern.html#newlines">"Newline conventions"</a>
|
<a href="pcrepattern.html#newlines">"Newline conventions"</a>
|
||||||
in the <b>pcre2pattern</b> documentation. A default is defined when PCRE2 is
|
in the <b>pcre2pattern</b> documentation. A default is defined when PCRE2 is
|
||||||
built.
|
built.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_FIRSTLINE
|
PCRE2_FIRSTLINE
|
||||||
|
@ -1091,7 +1091,7 @@ If this option is set, a back reference to an unset subpattern group matches an
|
||||||
empty string (by default this causes the current matching alternative to fail).
|
empty string (by default this causes the current matching alternative to fail).
|
||||||
A pattern such as (\1)(a) succeeds when this option is set (assuming it can
|
A pattern such as (\1)(a) succeeds when this option is set (assuming it can
|
||||||
find an "a" in the subject), whereas it fails by default, for Perl
|
find an "a" in the subject), whereas it fails by default, for Perl
|
||||||
compatibility. Setting this option makes PCRE2 behave more like ECMAscript (aka
|
compatibility. Setting this option makes PCRE2 behave more like ECMAscript (aka
|
||||||
JavaScript).
|
JavaScript).
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_MULTILINE
|
PCRE2_MULTILINE
|
||||||
|
@ -1116,10 +1116,10 @@ occurrences of ^ or $ in a pattern, setting PCRE2_MULTILINE has no effect.
|
||||||
PCRE2_NEVER_UCP
|
PCRE2_NEVER_UCP
|
||||||
</pre>
|
</pre>
|
||||||
This option locks out the use of Unicode properties for handling \B, \b, \D,
|
This option locks out the use of Unicode properties for handling \B, \b, \D,
|
||||||
\d, \S, \s, \W, \w, and some of the POSIX character classes, as described
|
\d, \S, \s, \W, \w, and some of the POSIX character classes, as described
|
||||||
for the PCRE2_UCP option below. In particular, it prevents the creator of the
|
for the PCRE2_UCP option below. In particular, it prevents the creator of the
|
||||||
pattern from enabling this facility by starting the pattern with (*UCP). This
|
pattern from enabling this facility by starting the pattern with (*UCP). This
|
||||||
may be useful in applications that process patterns from external sources. The
|
may be useful in applications that process patterns from external sources. The
|
||||||
option combination PCRE_UCP and PCRE_NEVER_UCP causes an error.
|
option combination PCRE_UCP and PCRE_NEVER_UCP causes an error.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_NEVER_UTF
|
PCRE2_NEVER_UTF
|
||||||
|
@ -1195,7 +1195,7 @@ pattern
|
||||||
(*MARK:A)(X|Y)
|
(*MARK:A)(X|Y)
|
||||||
</pre>
|
</pre>
|
||||||
The minimum length for a match is one character. If the subject is "ABC", there
|
The minimum length for a match is one character. If the subject is "ABC", there
|
||||||
will be attempts to match "ABC", "BC", and "C". An attempt to match an empty
|
will be attempts to match "ABC", "BC", and "C". An attempt to match an empty
|
||||||
string at the end of the subject does not take place, because PCRE2 knows that
|
string at the end of the subject does not take place, because PCRE2 knows that
|
||||||
the subject is now too short, and so the (*MARK) is never encountered. In this
|
the subject is now too short, and so the (*MARK) is never encountered. In this
|
||||||
case, the optimization does not affect the overall match result, which is still
|
case, the optimization does not affect the overall match result, which is still
|
||||||
|
@ -1211,7 +1211,7 @@ and
|
||||||
<a href="pcre2unicode.html#utf32strings">UTF-32 strings</a>
|
<a href="pcre2unicode.html#utf32strings">UTF-32 strings</a>
|
||||||
in the
|
in the
|
||||||
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
|
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
|
||||||
document.
|
document.
|
||||||
If an invalid UTF sequence is found, <b>pcre2_compile()</b> returns a negative
|
If an invalid UTF sequence is found, <b>pcre2_compile()</b> returns a negative
|
||||||
error code.
|
error code.
|
||||||
</P>
|
</P>
|
||||||
|
@ -1391,9 +1391,9 @@ The possible values for the second argument are defined in <b>pcre2.h</b>, and
|
||||||
are as follows:
|
are as follows:
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_INFO_ALLOPTIONS
|
PCRE2_INFO_ALLOPTIONS
|
||||||
PCRE2_INFO_ARGOPTIONS
|
PCRE2_INFO_ARGOPTIONS
|
||||||
</pre>
|
</pre>
|
||||||
Return a copy of the pattern's options. The third argument should point to a
|
Return a copy of the pattern's options. The third argument should point to a
|
||||||
<b>uint32_t</b> variable. PCRE2_INFO_ARGOPTIONS returns exactly the options that
|
<b>uint32_t</b> variable. PCRE2_INFO_ARGOPTIONS returns exactly the options that
|
||||||
were passed to <b>pcre2_compile()</b>, whereas PCRE2_INFO_ALLOPTIONS returns
|
were passed to <b>pcre2_compile()</b>, whereas PCRE2_INFO_ALLOPTIONS returns
|
||||||
the compile options as modified by any top-level option settings at the start
|
the compile options as modified by any top-level option settings at the start
|
||||||
|
@ -1411,7 +1411,7 @@ alternatives begin with one of the following:
|
||||||
\G always
|
\G always
|
||||||
.* if PCRE2_DOTALL is set and there are no back references to the subpattern in which .* appears
|
.* if PCRE2_DOTALL is set and there are no back references to the subpattern in which .* appears
|
||||||
</pre>
|
</pre>
|
||||||
For such patterns, the PCRE2_ANCHORED bit is set in the options returned for
|
For such patterns, the PCRE2_ANCHORED bit is set in the options returned for
|
||||||
PCRE2_INFO_ALLOPTIONS.
|
PCRE2_INFO_ALLOPTIONS.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_INFO_BACKREFMAX
|
PCRE2_INFO_BACKREFMAX
|
||||||
|
@ -1499,7 +1499,7 @@ return zero. The third argument should point to a <b>size_t</b> variable.
|
||||||
</pre>
|
</pre>
|
||||||
Returns 1 if there is a rightmost literal code unit that must exist in any
|
Returns 1 if there is a rightmost literal code unit that must exist in any
|
||||||
matched string, other than at its start. The third argument should point to an
|
matched string, other than at its start. The third argument should point to an
|
||||||
<b>uint32_t</b> variable. If there is no such value, 0 is returned. When 1 is
|
<b>uint32_t</b> variable. If there is no such value, 0 is returned. When 1 is
|
||||||
returned, the code unit value itself can be retrieved using
|
returned, the code unit value itself can be retrieved using
|
||||||
PCRE2_INFO_LASTCODEUNIT.
|
PCRE2_INFO_LASTCODEUNIT.
|
||||||
</P>
|
</P>
|
||||||
|
@ -1657,11 +1657,11 @@ pattern with the JIT compiler does not alter the value returned by this option.
|
||||||
<b>void pcre2_match_data_free(pcre2_match_data *<i>match_data</i>);</b>
|
<b>void pcre2_match_data_free(pcre2_match_data *<i>match_data</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Information about successful and unsuccessful matches is placed in a match
|
Information about successful and unsuccessful matches is placed in a match
|
||||||
data block, which is an opaque structure that is accessed by function calls. In
|
data block, which is an opaque structure that is accessed by function calls. In
|
||||||
particular, the match data block contains a vector of offsets into the subject
|
particular, the match data block contains a vector of offsets into the subject
|
||||||
string that define the matched part of the subject and any substrings that were
|
string that define the matched part of the subject and any substrings that were
|
||||||
capured. This is know as the <i>ovector</i>.
|
capured. This is know as the <i>ovector</i>.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Before calling <b>pcre2_match()</b> or <b>pcre2_dfa_match()</b> you must create a
|
Before calling <b>pcre2_match()</b> or <b>pcre2_dfa_match()</b> you must create a
|
||||||
|
@ -1676,12 +1676,12 @@ return the overall matched string.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
For <b>pcre2_match_data_create_from_pattern()</b>, the first argument is a
|
For <b>pcre2_match_data_create_from_pattern()</b>, the first argument is a
|
||||||
pointer to a compiled pattern. In this case the ovector is created to be
|
pointer to a compiled pattern. In this case the ovector is created to be
|
||||||
exactly the right size to hold all the substrings a pattern might capture.
|
exactly the right size to hold all the substrings a pattern might capture.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The second argument of both these functions ia a pointer to a general context,
|
The second argument of both these functions ia a pointer to a general context,
|
||||||
which can specify custom memory management for obtaining the memory for the
|
which can specify custom memory management for obtaining the memory for the
|
||||||
match data block. If you are not using custom memory management, pass NULL.
|
match data block. If you are not using custom memory management, pass NULL.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -1728,8 +1728,8 @@ Here is an example of a simple call to <b>pcre2_match()</b>:
|
||||||
match_data, /* the match data block */
|
match_data, /* the match data block */
|
||||||
NULL); /* a match context; NULL means use defaults */
|
NULL); /* a match context; NULL means use defaults */
|
||||||
</pre>
|
</pre>
|
||||||
If the subject string is zero-terminated, the length can be given as
|
If the subject string is zero-terminated, the length can be given as
|
||||||
PCRE2_ZERO_TERMINATED. A match context must be provided if certain less common
|
PCRE2_ZERO_TERMINATED. A match context must be provided if certain less common
|
||||||
matching parameters are to be changed. For details, see the section on
|
matching parameters are to be changed. For details, see the section on
|
||||||
<a href="#matchcontext">the match context</a>
|
<a href="#matchcontext">the match context</a>
|
||||||
above.
|
above.
|
||||||
|
@ -1742,7 +1742,7 @@ The subject string is passed to <b>pcre2_match()</b> as a pointer in
|
||||||
<i>subject</i>, a length in <i>length</i>, and a starting offset in
|
<i>subject</i>, a length in <i>length</i>, and a starting offset in
|
||||||
<i>startoffset</i>. The length and offset are in code units, not characters.
|
<i>startoffset</i>. The length and offset are in code units, not characters.
|
||||||
That is, they are in bytes for the 8-bit library, 16-bit code units for the
|
That is, they are in bytes for the 8-bit library, 16-bit code units for the
|
||||||
16-bit library, and 32-bit code units for the 32-bit library, whether or not
|
16-bit library, and 32-bit code units for the 32-bit library, whether or not
|
||||||
UTF processing is enabled.
|
UTF processing is enabled.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -1752,7 +1752,7 @@ zero, the search for a match starts at the beginning of the subject, and this
|
||||||
is by far the most common case. In UTF-8 or UTF-16 mode, the starting offset
|
is by far the most common case. In UTF-8 or UTF-16 mode, the starting offset
|
||||||
must point to the start of a character, or to the end of the subject (in UTF-32
|
must point to the start of a character, or to the end of the subject (in UTF-32
|
||||||
mode, one code unit equals one character, so all offsets are valid). Like the
|
mode, one code unit equals one character, so all offsets are valid). Like the
|
||||||
pattern string, the subject may contain binary zeroes.
|
pattern string, the subject may contain binary zeroes.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
A non-zero starting offset is useful when searching for another match in the
|
A non-zero starting offset is useful when searching for another match in the
|
||||||
|
@ -1814,7 +1814,7 @@ JIT matching is disabled and the normal interpretive code in
|
||||||
The PCRE2_ANCHORED option limits <b>pcre2_match()</b> to matching at the first
|
The PCRE2_ANCHORED option limits <b>pcre2_match()</b> to matching at the first
|
||||||
matching position. If a pattern was compiled with PCRE2_ANCHORED, or turned out
|
matching position. If a pattern was compiled with PCRE2_ANCHORED, or turned out
|
||||||
to be anchored by virtue of its contents, it cannot be made unachored at
|
to be anchored by virtue of its contents, it cannot be made unachored at
|
||||||
matching time. Note that setting the option at match time disables JIT
|
matching time. Note that setting the option at match time disables JIT
|
||||||
matching.
|
matching.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_NOTBOL
|
PCRE2_NOTBOL
|
||||||
|
@ -1867,14 +1867,14 @@ and
|
||||||
<a href="pcre2unicode.html#utf32strings">UTF-32 strings</a>
|
<a href="pcre2unicode.html#utf32strings">UTF-32 strings</a>
|
||||||
in the
|
in the
|
||||||
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
|
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
|
||||||
page.
|
page.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If you know that your subject is valid, and you want to skip these checks for
|
If you know that your subject is valid, and you want to skip these checks for
|
||||||
performance reasons, you can set the PCRE2_NO_UTF_CHECK option when calling
|
performance reasons, you can set the PCRE2_NO_UTF_CHECK option when calling
|
||||||
<b>pcre2_match()</b>. You might want to do this for the second and subsequent
|
<b>pcre2_match()</b>. You might want to do this for the second and subsequent
|
||||||
calls to <b>pcre2_match()</b> if you are making repeated calls to find all the
|
calls to <b>pcre2_match()</b> if you are making repeated calls to find all the
|
||||||
matches in a single subject string.
|
matches in a single subject string.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
NOTE: When PCRE2_NO_UTF_CHECK is set, the effect of passing an invalid string
|
NOTE: When PCRE2_NO_UTF_CHECK is set, the effect of passing an invalid string
|
||||||
|
@ -1908,9 +1908,9 @@ documentation.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC22" href="#TOC1">NEWLINE HANDLING WHEN MATCHING</a><br>
|
<br><a name="SEC22" href="#TOC1">NEWLINE HANDLING WHEN MATCHING</a><br>
|
||||||
<P>
|
<P>
|
||||||
When PCRE2 is built, a default newline convention is set; this is usually the
|
When PCRE2 is built, a default newline convention is set; this is usually the
|
||||||
standard convention for the operating system. The default can be overridden in
|
standard convention for the operating system. The default can be overridden in
|
||||||
either a
|
either a
|
||||||
<a href="#compilecontext">compile context</a>
|
<a href="#compilecontext">compile context</a>
|
||||||
or a
|
or a
|
||||||
<a href="#matchcontext">match context.</a>
|
<a href="#matchcontext">match context.</a>
|
||||||
|
@ -1953,7 +1953,7 @@ valid newline sequence and explicit \r or \n escapes appear in the pattern.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
In general, a pattern matches a certain portion of the subject, and in
|
In general, a pattern matches a certain portion of the subject, and in
|
||||||
addition, further substrings from the subject may be picked out by
|
addition, further substrings from the subject may be picked out by
|
||||||
parenthesized parts of the pattern. Following the usage in Jeffrey Friedl's
|
parenthesized parts of the pattern. Following the usage in Jeffrey Friedl's
|
||||||
book, this is called "capturing" in what follows, and the phrase "capturing
|
book, this is called "capturing" in what follows, and the phrase "capturing
|
||||||
subpattern" is used for a fragment of a pattern that picks out a substring.
|
subpattern" is used for a fragment of a pattern that picks out a substring.
|
||||||
|
@ -1964,11 +1964,11 @@ pattern.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The overall matched string and any captured substrings are returned to the
|
The overall matched string and any captured substrings are returned to the
|
||||||
caller via a vector of PCRE2_SIZE values, called the <b>ovector</b>. This is
|
caller via a vector of PCRE2_SIZE values, called the <b>ovector</b>. This is
|
||||||
contained within the
|
contained within the
|
||||||
<a href="#matchdatablock">match data block.</a>
|
<a href="#matchdatablock">match data block.</a>
|
||||||
You can obtain direct access to the ovector by calling
|
You can obtain direct access to the ovector by calling
|
||||||
<b>pcre2_get_ovector_pointer()</b> to find its address, and
|
<b>pcre2_get_ovector_pointer()</b> to find its address, and
|
||||||
<b>pcre2_get_ovector_count()</b> to find the number of pairs of values it
|
<b>pcre2_get_ovector_count()</b> to find the number of pairs of values it
|
||||||
contains. Alternatively, you can use the auxiliary functions for accessing
|
contains. Alternatively, you can use the auxiliary functions for accessing
|
||||||
captured substrings
|
captured substrings
|
||||||
|
@ -2044,26 +2044,26 @@ Other information about the match
|
||||||
<b>PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *<i>match_data</i>);</b>
|
<b>PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *<i>match_data</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
In addition to the offsets in the ovector, other information about a match is
|
In addition to the offsets in the ovector, other information about a match is
|
||||||
retained in the match data block and can be retrieved by the above functions.
|
retained in the match data block and can be retrieved by the above functions.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
When a (*MARK) name is to be passed back, <b>pcre2_get_mark()</b> returns a
|
When a (*MARK) name is to be passed back, <b>pcre2_get_mark()</b> returns a
|
||||||
pointer to the zero-terminated name, which is within the compiled pattern.
|
pointer to the zero-terminated name, which is within the compiled pattern.
|
||||||
Otherwise NULL is returned. A (*MARK) name may be available after a failed
|
Otherwise NULL is returned. A (*MARK) name may be available after a failed
|
||||||
match or a partial match, as well as after a successful one.
|
match or a partial match, as well as after a successful one.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The offset of the character at which the successful match started is
|
The offset of the character at which the successful match started is
|
||||||
returned by <b>pcre2_get_startchar()</b>. This can be different to the value of
|
returned by <b>pcre2_get_startchar()</b>. This can be different to the value of
|
||||||
<i>ovector[0]</i> if the pattern contains the \K escape sequence. Note,
|
<i>ovector[0]</i> if the pattern contains the \K escape sequence. Note,
|
||||||
however, the \K has no effect for a partial match.
|
however, the \K has no effect for a partial match.
|
||||||
<a name="errorlist"></a></P>
|
<a name="errorlist"></a></P>
|
||||||
<br><b>
|
<br><b>
|
||||||
Error return values from <b>pcre2_match()</b>
|
Error return values from <b>pcre2_match()</b>
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
If <b>pcre2_match()</b> fails, it returns a negative number. This can be
|
If <b>pcre2_match()</b> fails, it returns a negative number. This can be
|
||||||
converted to a text string by calling <b>pcre2_get_error_message()</b>. Negative
|
converted to a text string by calling <b>pcre2_get_error_message()</b>. Negative
|
||||||
error codes are also returned by other functions, and are documented with them.
|
error codes are also returned by other functions, and are documented with them.
|
||||||
The codes are given names in the header file. If UTF checking is in force and
|
The codes are given names in the header file. If UTF checking is in force and
|
||||||
|
@ -2205,7 +2205,7 @@ argument is a pointer to the match data block, the second is the group number,
|
||||||
and the third is a pointer to a variable into which the length is placed.
|
and the third is a pointer to a variable into which the length is placed.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <b>pcre2_substring_copy_bynumber()</b> function copies one string into a
|
The <b>pcre2_substring_copy_bynumber()</b> function copies one string into a
|
||||||
supplied buffer, whereas <b>pcre2_substring_get_bynumber()</b> copies it into
|
supplied buffer, whereas <b>pcre2_substring_get_bynumber()</b> copies it into
|
||||||
new memory, obtained using the same memory allocation function that was used
|
new memory, obtained using the same memory allocation function that was used
|
||||||
for the match data block. The first two arguments of these functions are a
|
for the match data block. The first two arguments of these functions are a
|
||||||
|
@ -2220,10 +2220,10 @@ This is updated to contain the actual number of code units used, excluding the
|
||||||
terminating zero.
|
terminating zero.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
For <b>pcre2_substring_get_bynumber()</b> the third and fourth arguments point
|
For <b>pcre2_substring_get_bynumber()</b> the third and fourth arguments point
|
||||||
to variables that are updated with a pointer to the new memory and the number
|
to variables that are updated with a pointer to the new memory and the number
|
||||||
of code units that comprise the substring, again excluding the terminating
|
of code units that comprise the substring, again excluding the terminating
|
||||||
zero. When the substring is no longer needed, the memory should be freed by
|
zero. When the substring is no longer needed, the memory should be freed by
|
||||||
calling <b>pcre2_substring_free()</b>.
|
calling <b>pcre2_substring_free()</b>.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -2237,9 +2237,9 @@ attempt to get memory failed for <b>pcre2_substring_get_bynumber()</b>.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_ERROR_NOSUBSTRING
|
PCRE2_ERROR_NOSUBSTRING
|
||||||
</pre>
|
</pre>
|
||||||
No substring with the given number was captured. This could be because there is
|
No substring with the given number was captured. This could be because there is
|
||||||
no capturing group of that number in the pattern, or because the group with
|
no capturing group of that number in the pattern, or because the group with
|
||||||
that number did not participate in the match, or because the ovector was too
|
that number did not participate in the match, or because the ovector was too
|
||||||
small to capture that group.
|
small to capture that group.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC25" href="#TOC1">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a><br>
|
<br><a name="SEC25" href="#TOC1">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a><br>
|
||||||
|
@ -2253,7 +2253,7 @@ small to capture that group.
|
||||||
<P>
|
<P>
|
||||||
The <b>pcre2_substring_list_get()</b> function extracts all available substrings
|
The <b>pcre2_substring_list_get()</b> function extracts all available substrings
|
||||||
and builds a list of pointers to them, and a second list that contains their
|
and builds a list of pointers to them, and a second list that contains their
|
||||||
lengths (in code units), excluding a terminating zero that is added to each of
|
lengths (in code units), excluding a terminating zero that is added to each of
|
||||||
them. All this is done in a single block of memory that is obtained using the
|
them. All this is done in a single block of memory that is obtained using the
|
||||||
same memory allocation function that was used to get the match data block.
|
same memory allocation function that was used to get the match data block.
|
||||||
</P>
|
</P>
|
||||||
|
@ -2265,7 +2265,7 @@ NULL pointer. The address of the list of lengths is returned via
|
||||||
therefore need the lengths, you may supply NULL as the <b>lengthsptr</b>
|
therefore need the lengths, you may supply NULL as the <b>lengthsptr</b>
|
||||||
argument to disable the creation of a list of lengths. The yield of the
|
argument to disable the creation of a list of lengths. The yield of the
|
||||||
function is zero if all went well, or PCRE2_ERROR_NOMEMORY if the memory block
|
function is zero if all went well, or PCRE2_ERROR_NOMEMORY if the memory block
|
||||||
could not be obtained. When the list is no longer needed, it should be freed by
|
could not be obtained. When the list is no longer needed, it should be freed by
|
||||||
calling <b>pcre2_substring_list_free()</b>.
|
calling <b>pcre2_substring_list_free()</b>.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -2312,7 +2312,7 @@ name.
|
||||||
<P>
|
<P>
|
||||||
Given the number, you can extract the substring directly, or use one of the
|
Given the number, you can extract the substring directly, or use one of the
|
||||||
functions described in the previous section. For convenience, there are also
|
functions described in the previous section. For convenience, there are also
|
||||||
"byname" functions that correspond to the "bynumber" functions, the only
|
"byname" functions that correspond to the "bynumber" functions, the only
|
||||||
difference being that the second argument is a name instead of a number.
|
difference being that the second argument is a name instead of a number.
|
||||||
However, if PCRE2_DUPNAMES is set and there are duplicate names,
|
However, if PCRE2_DUPNAMES is set and there are duplicate names,
|
||||||
the behaviour may not be what you want (see the next section).
|
the behaviour may not be what you want (see the next section).
|
||||||
|
@ -2375,7 +2375,7 @@ numbers, and hence the captured data.
|
||||||
<P>
|
<P>
|
||||||
The traditional matching function uses a similar algorithm to Perl, which stops
|
The traditional matching function uses a similar algorithm to Perl, which stops
|
||||||
when it finds the first match, starting at a given point in the subject. If you
|
when it finds the first match, starting at a given point in the subject. If you
|
||||||
want to find all possible matches, or the longest possible match at a given
|
want to find all possible matches, or the longest possible match at a given
|
||||||
position, consider using the alternative matching function (see below) instead.
|
position, consider using the alternative matching function (see below) instead.
|
||||||
If you cannot use the alternative function, you can kludge it up by making use
|
If you cannot use the alternative function, you can kludge it up by making use
|
||||||
of the callout facility, which is described in the
|
of the callout facility, which is described in the
|
||||||
|
@ -2566,8 +2566,8 @@ fail, this error is given.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC30" href="#TOC1">SEE ALSO</a><br>
|
<br><a name="SEC30" href="#TOC1">SEE ALSO</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre2build</b>(3), <b>pcre2libs</b>(3), <b>pcre2callout</b>(3),
|
<b>pcre2build</b>(3), <b>pcre2libs</b>(3), <b>pcre2callout</b>(3),
|
||||||
<b>pcre2matching</b>(3), <b>pcre2partial</b>(3), <b>pcre2posix</b>(3),
|
<b>pcre2matching</b>(3), <b>pcre2partial</b>(3), <b>pcre2posix</b>(3),
|
||||||
<b>pcre2demo(3)</b>, <b>pcre2sample</b>(3), <b>pcre2stack</b>(3).
|
<b>pcre2demo(3)</b>, <b>pcre2sample</b>(3), <b>pcre2stack</b>(3).
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC31" href="#TOC1">AUTHOR</a><br>
|
<br><a name="SEC31" href="#TOC1">AUTHOR</a><br>
|
||||||
|
|
|
@ -88,11 +88,11 @@ single-byte characters, or UTF-8 strings. You can also build two other
|
||||||
libraries, called <b>libpcre2-16</b> and <b>libpcre2-32</b>, which process
|
libraries, called <b>libpcre2-16</b> and <b>libpcre2-32</b>, which process
|
||||||
strings that are contained in vectors of 16-bit and 32-bit code units,
|
strings that are contained in vectors of 16-bit and 32-bit code units,
|
||||||
respectively. These can be interpreted either as single-unit characters or
|
respectively. These can be interpreted either as single-unit characters or
|
||||||
UTF-16/UTF-32 strings. To build these additional libraries, add one or both of
|
UTF-16/UTF-32 strings. To build these additional libraries, add one or both of
|
||||||
the following to the <b>configure</b> command:
|
the following to the <b>configure</b> command:
|
||||||
<pre>
|
<pre>
|
||||||
--enable-pcre16
|
--enable-pcre16
|
||||||
--enable-pcre32
|
--enable-pcre32
|
||||||
</pre>
|
</pre>
|
||||||
If you do not want the 8-bit library, add
|
If you do not want the 8-bit library, add
|
||||||
<pre>
|
<pre>
|
||||||
|
@ -358,7 +358,7 @@ override this value by specifying a run-time option.
|
||||||
If you add one of
|
If you add one of
|
||||||
<pre>
|
<pre>
|
||||||
--enable-pcre2test-libreadline
|
--enable-pcre2test-libreadline
|
||||||
--enable-pcre2test-libedit
|
--enable-pcre2test-libedit
|
||||||
</pre>
|
</pre>
|
||||||
to the <b>configure</b> command, <b>pcre2test</b> is linked with the
|
to the <b>configure</b> command, <b>pcre2test</b> is linked with the
|
||||||
<b>libreadline</b> or<b>libedit</b> library, respectively, and when its input is
|
<b>libreadline</b> or<b>libedit</b> library, respectively, and when its input is
|
||||||
|
@ -376,8 +376,8 @@ unmodified distribution version of readline is in use), some extra
|
||||||
configuration may be necessary. The INSTALL file for <b>libreadline</b> says
|
configuration may be necessary. The INSTALL file for <b>libreadline</b> says
|
||||||
this:
|
this:
|
||||||
<pre>
|
<pre>
|
||||||
"Readline uses the termcap functions, but does not link with
|
"Readline uses the termcap functions, but does not link with
|
||||||
the termcap or curses library itself, allowing applications
|
the termcap or curses library itself, allowing applications
|
||||||
which link with readline the to choose an appropriate library."
|
which link with readline the to choose an appropriate library."
|
||||||
</pre>
|
</pre>
|
||||||
If your environment has not been set up so that an appropriate library is
|
If your environment has not been set up so that an appropriate library is
|
||||||
|
|
|
@ -25,7 +25,7 @@ pcre2sample documentation for a short discussion ("man pcre2sample" if you have
|
||||||
the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is
|
the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is
|
||||||
incompatible with the original PCRE API.
|
incompatible with the original PCRE API.
|
||||||
|
|
||||||
There are actually three libraries, each supporting a different code unit
|
There are actually three libraries, each supporting a different code unit
|
||||||
width. This demonstration program uses the 8-bit library.
|
width. This demonstration program uses the 8-bit library.
|
||||||
|
|
||||||
In Unix-like environments, if PCRE2 is installed in your standard system
|
In Unix-like environments, if PCRE2 is installed in your standard system
|
||||||
|
@ -56,8 +56,8 @@ the following line. */
|
||||||
|
|
||||||
/* #define PCRE2_STATIC */
|
/* #define PCRE2_STATIC */
|
||||||
|
|
||||||
/* This macro must be defined before including pcre2.h. For a program that uses
|
/* This macro must be defined before including pcre2.h. For a program that uses
|
||||||
only one code unit width, it makes it possible to use generic function names
|
only one code unit width, it makes it possible to use generic function names
|
||||||
such as pcre2_compile(). */
|
such as pcre2_compile(). */
|
||||||
|
|
||||||
#define PCRE2_CODE_UNIT_WIDTH 8
|
#define PCRE2_CODE_UNIT_WIDTH 8
|
||||||
|
@ -141,7 +141,7 @@ subject_length = strlen((char *)subject);
|
||||||
|
|
||||||
re = pcre2_compile(
|
re = pcre2_compile(
|
||||||
pattern, /* the pattern */
|
pattern, /* the pattern */
|
||||||
PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
|
PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
|
||||||
0, /* default options */
|
0, /* default options */
|
||||||
&errornumber, /* for error number */
|
&errornumber, /* for error number */
|
||||||
&erroroffset, /* for error offset */
|
&erroroffset, /* for error offset */
|
||||||
|
@ -151,9 +151,9 @@ re = pcre2_compile(
|
||||||
|
|
||||||
if (re == NULL)
|
if (re == NULL)
|
||||||
{
|
{
|
||||||
PCRE2_UCHAR buffer[256];
|
PCRE2_UCHAR buffer[256];
|
||||||
pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
|
pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
|
||||||
printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset,
|
printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset,
|
||||||
buffer);
|
buffer);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -197,7 +197,7 @@ if (rc < 0)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Match succeded. Get a pointer to the output vector, where string offsets are
|
/* Match succeded. Get a pointer to the output vector, where string offsets are
|
||||||
stored. */
|
stored. */
|
||||||
|
|
||||||
ovector = pcre2_get_ovector_pointer(match_data);
|
ovector = pcre2_get_ovector_pointer(match_data);
|
||||||
|
@ -210,7 +210,7 @@ printf("\nMatch succeeded at offset %d\n", (int)ovector[0]);
|
||||||
* captured. *
|
* captured. *
|
||||||
*************************************************************************/
|
*************************************************************************/
|
||||||
|
|
||||||
/* The output vector wasn't big enough. This should not happen, because we used
|
/* The output vector wasn't big enough. This should not happen, because we used
|
||||||
pcre2_match_data_create_from_pattern() above. */
|
pcre2_match_data_create_from_pattern() above. */
|
||||||
|
|
||||||
if (rc == 0)
|
if (rc == 0)
|
||||||
|
@ -261,7 +261,7 @@ if (namecount <= 0) printf("No named substrings\n"); else
|
||||||
&name_entry_size); /* where to put the answer */
|
&name_entry_size); /* where to put the answer */
|
||||||
|
|
||||||
/* Now we can scan the table and, for each entry, print the number, the name,
|
/* Now we can scan the table and, for each entry, print the number, the name,
|
||||||
and the substring itself. In the 8-bit library the number is held in two
|
and the substring itself. In the 8-bit library the number is held in two
|
||||||
bytes, most significant first. */
|
bytes, most significant first. */
|
||||||
|
|
||||||
tabptr = name_table;
|
tabptr = name_table;
|
||||||
|
@ -306,7 +306,7 @@ if (namecount <= 0) printf("No named substrings\n"); else
|
||||||
|
|
||||||
if (!find_all) /* Check for -g */
|
if (!find_all) /* Check for -g */
|
||||||
{
|
{
|
||||||
pcre2_match_data_free(match_data); /* Release the memory that was used */
|
pcre2_match_data_free(match_data); /* Release the memory that was used */
|
||||||
pcre2_code_free(re); /* for the match data and the pattern. */
|
pcre2_code_free(re); /* for the match data and the pattern. */
|
||||||
return 0; /* Exit the program. */
|
return 0; /* Exit the program. */
|
||||||
}
|
}
|
||||||
|
@ -324,7 +324,7 @@ sequence. */
|
||||||
(void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &newline);
|
(void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &newline);
|
||||||
crlf_is_newline = newline == PCRE2_NEWLINE_ANY ||
|
crlf_is_newline = newline == PCRE2_NEWLINE_ANY ||
|
||||||
newline == PCRE2_NEWLINE_CRLF ||
|
newline == PCRE2_NEWLINE_CRLF ||
|
||||||
newline == PCRE2_NEWLINE_ANYCRLF;
|
newline == PCRE2_NEWLINE_ANYCRLF;
|
||||||
|
|
||||||
/* Loop for second and subsequent matches */
|
/* Loop for second and subsequent matches */
|
||||||
|
|
||||||
|
|
|
@ -71,10 +71,10 @@ performance, there is also a "fast path" API that is JIT-specific.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC3" href="#TOC1">SIMPLE USE OF JIT</a><br>
|
<br><a name="SEC3" href="#TOC1">SIMPLE USE OF JIT</a><br>
|
||||||
<P>
|
<P>
|
||||||
To make use of the JIT support in the simplest way, all you have to do is to
|
To make use of the JIT support in the simplest way, all you have to do is to
|
||||||
call <b>pcre2_jit_compile()</b> after successfully compiling a pattern with
|
call <b>pcre2_jit_compile()</b> after successfully compiling a pattern with
|
||||||
<b>pcre2_compile()</b>. This function has two arguments: the first is the
|
<b>pcre2_compile()</b>. This function has two arguments: the first is the
|
||||||
compiled pattern pointer that was returned by <b>pcre2_compile()</b>, and the
|
compiled pattern pointer that was returned by <b>pcre2_compile()</b>, and the
|
||||||
second is a set of option bits, which must include at least one of
|
second is a set of option bits, which must include at least one of
|
||||||
PCRE2_JIT_COMPLETE, PCRE2_JIT_PARTIAL_HARD, or PCRE2_JIT_PARTIAL_SOFT.
|
PCRE2_JIT_COMPLETE, PCRE2_JIT_PARTIAL_HARD, or PCRE2_JIT_PARTIAL_SOFT.
|
||||||
</P>
|
</P>
|
||||||
|
@ -239,7 +239,7 @@ non-default JIT stacks might operate:
|
||||||
</pre>
|
</pre>
|
||||||
All the functions described in this section do nothing if JIT is not available,
|
All the functions described in this section do nothing if JIT is not available,
|
||||||
and <b>pcre2_jit_stack_assign()</b> does nothing unless the <b>code</b> argument
|
and <b>pcre2_jit_stack_assign()</b> does nothing unless the <b>code</b> argument
|
||||||
is non-NULL and points to a <b>pcre2_code</b> block that has been successfully
|
is non-NULL and points to a <b>pcre2_code</b> block that has been successfully
|
||||||
processed by <b>pcre2_jit_compile()</b>.
|
processed by <b>pcre2_jit_compile()</b>.
|
||||||
<a name="stackfaq"></a></P>
|
<a name="stackfaq"></a></P>
|
||||||
<br><a name="SEC7" href="#TOC1">JIT STACK FAQ</a><br>
|
<br><a name="SEC7" href="#TOC1">JIT STACK FAQ</a><br>
|
||||||
|
@ -328,18 +328,18 @@ callback.
|
||||||
<pre>
|
<pre>
|
||||||
int rc;
|
int rc;
|
||||||
pcre2_code *re;
|
pcre2_code *re;
|
||||||
pcre2_match_data *match_data;
|
pcre2_match_data *match_data;
|
||||||
pcre2_jit_stack *jit_stack;
|
pcre2_jit_stack *jit_stack;
|
||||||
|
|
||||||
re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0,
|
re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0,
|
||||||
&errornumber, &erroffset, NULL);
|
&errornumber, &erroffset, NULL);
|
||||||
/* Check for errors */
|
/* Check for errors */
|
||||||
rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
|
rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
|
||||||
/* Check for errors */
|
/* Check for errors */
|
||||||
jit_stack = pcre2_jit_stack_alloc(NULL, 32*1024, 512*1024);
|
jit_stack = pcre2_jit_stack_alloc(NULL, 32*1024, 512*1024);
|
||||||
/* Check for error (NULL) */
|
/* Check for error (NULL) */
|
||||||
pcre2_jit_stack_assign(re, NULL, jit_stack);
|
pcre2_jit_stack_assign(re, NULL, jit_stack);
|
||||||
match_data = pcre2_match_data_create(re, 10);
|
match_data = pcre2_match_data_create(re, 10);
|
||||||
rc = pcre2_match(re, subject, length, 0, 0, match_data, NULL);
|
rc = pcre2_match(re, subject, length, 0, 0, match_data, NULL);
|
||||||
/* Check results */
|
/* Check results */
|
||||||
pcre2_free(re);
|
pcre2_free(re);
|
||||||
|
|
|
@ -89,15 +89,15 @@ empty string at the end of the subject.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
When a partial match is returned, the first two elements in the ovector point
|
When a partial match is returned, the first two elements in the ovector point
|
||||||
to the portion of the subject that was matched. The appearance of \K in the
|
to the portion of the subject that was matched. The appearance of \K in the
|
||||||
pattern has no effect for a partial match. Consider this pattern:
|
pattern has no effect for a partial match. Consider this pattern:
|
||||||
<pre>
|
<pre>
|
||||||
/abc\K123/
|
/abc\K123/
|
||||||
</pre>
|
</pre>
|
||||||
If it is matched against "456abc123xyz" the result is a complete match, and the
|
If it is matched against "456abc123xyz" the result is a complete match, and the
|
||||||
ovector defines the matched string as "123", because \K resets the "start of
|
ovector defines the matched string as "123", because \K resets the "start of
|
||||||
match" point. However, if a partial match is requested and the subject string
|
match" point. However, if a partial match is requested and the subject string
|
||||||
is "456abc12", a partial match is found for the string "abc12", because all
|
is "456abc12", a partial match is found for the string "abc12", because all
|
||||||
these characters are needed for a subsequent re-match with additional
|
these characters are needed for a subsequent re-match with additional
|
||||||
characters.
|
characters.
|
||||||
</P>
|
</P>
|
||||||
|
@ -343,14 +343,14 @@ same point as before.
|
||||||
For example, if the pattern "(?<=123)abc" is partially matched against the
|
For example, if the pattern "(?<=123)abc" is partially matched against the
|
||||||
string "xx123ab", the ovector offsets are 5 and 7 ("ab"). The maximum
|
string "xx123ab", the ovector offsets are 5 and 7 ("ab"). The maximum
|
||||||
lookbehind count is 3, so all characters before offset 2 can be discarded. The
|
lookbehind count is 3, so all characters before offset 2 can be discarded. The
|
||||||
value of <b>startoffset</b> for the next match should be 3. When <b>pcre2test</b>
|
value of <b>startoffset</b> for the next match should be 3. When <b>pcre2test</b>
|
||||||
displays a partial match, it indicates the lookbehind characters with '<'
|
displays a partial match, it indicates the lookbehind characters with '<'
|
||||||
characters:
|
characters:
|
||||||
<pre>
|
<pre>
|
||||||
re> "(?<=123)abc"
|
re> "(?<=123)abc"
|
||||||
data> xx123ab\=ph
|
data> xx123ab\=ph
|
||||||
Partial match: 123ab
|
Partial match: 123ab
|
||||||
<<<
|
<<<
|
||||||
</PRE>
|
</PRE>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
|
|
@ -145,7 +145,7 @@ Unicode newline sequence. The
|
||||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||||
page has
|
page has
|
||||||
<a href="pcre2api.html#newlines">further discussion</a>
|
<a href="pcre2api.html#newlines">further discussion</a>
|
||||||
about newlines, and shows how to set the newline convention when calling
|
about newlines, and shows how to set the newline convention when calling
|
||||||
<b>pcre2_compile()</b>.
|
<b>pcre2_compile()</b>.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -218,7 +218,7 @@ corresponding characters in the subject. As a trivial example, the pattern
|
||||||
</pre>
|
</pre>
|
||||||
matches a portion of a subject string that is identical to itself. When
|
matches a portion of a subject string that is identical to itself. When
|
||||||
caseless matching is specified (the PCRE2_CASELESS option), letters are matched
|
caseless matching is specified (the PCRE2_CASELESS option), letters are matched
|
||||||
independently of case.
|
independently of case.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The power of regular expressions comes from the ability to include alternatives
|
The power of regular expressions comes from the ability to include alternatives
|
||||||
|
@ -1191,8 +1191,8 @@ An opening square bracket introduces a character class, terminated by a closing
|
||||||
square bracket. A closing square bracket on its own is not special by default.
|
square bracket. A closing square bracket on its own is not special by default.
|
||||||
If a closing square bracket is required as a member of the class, it should be
|
If a closing square bracket is required as a member of the class, it should be
|
||||||
the first data character in the class (after an initial circumflex, if present)
|
the first data character in the class (after an initial circumflex, if present)
|
||||||
or escaped with a backslash. This means that, by default, an empty class cannot
|
or escaped with a backslash. This means that, by default, an empty class cannot
|
||||||
be defined. However, if the PCRE2_ALLOW_EMPTY_CLASS option is set, a closing
|
be defined. However, if the PCRE2_ALLOW_EMPTY_CLASS option is set, a closing
|
||||||
square bracket at the start does end the (empty) class.
|
square bracket at the start does end the (empty) class.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -1216,7 +1216,7 @@ string.
|
||||||
When caseless matching is set, any letters in a class represent both their
|
When caseless matching is set, any letters in a class represent both their
|
||||||
upper case and lower case versions, so for example, a caseless [aeiou] matches
|
upper case and lower case versions, so for example, a caseless [aeiou] matches
|
||||||
"A" as well as "a", and a caseless [^aeiou] does not match "A", whereas a
|
"A" as well as "a", and a caseless [^aeiou] does not match "A", whereas a
|
||||||
caseful version would.
|
caseful version would.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Characters that might indicate line breaks are never treated in any special way
|
Characters that might indicate line breaks are never treated in any special way
|
||||||
|
@ -1341,7 +1341,7 @@ classes by other sequences, as follows:
|
||||||
[:alnum:] becomes \p{Xan}
|
[:alnum:] becomes \p{Xan}
|
||||||
[:alpha:] becomes \p{L}
|
[:alpha:] becomes \p{L}
|
||||||
[:blank:] becomes \h
|
[:blank:] becomes \h
|
||||||
[:cntrl:] becomes \p{Cc}
|
[:cntrl:] becomes \p{Cc}
|
||||||
[:digit:] becomes \p{Nd}
|
[:digit:] becomes \p{Nd}
|
||||||
[:lower:] becomes \p{Ll}
|
[:lower:] becomes \p{Ll}
|
||||||
[:space:] becomes \p{Xps}
|
[:space:] becomes \p{Xps}
|
||||||
|
@ -1490,7 +1490,7 @@ match "cataract", "erpillar" or an empty string.
|
||||||
<br>
|
<br>
|
||||||
2. It sets up the subpattern as a capturing subpattern. This means that, when
|
2. It sets up the subpattern as a capturing subpattern. This means that, when
|
||||||
the whole pattern matches, the portion of the subject string that matched the
|
the whole pattern matches, the portion of the subject string that matched the
|
||||||
subpattern is passed back to the caller, separately from the portion that
|
subpattern is passed back to the caller, separately from the portion that
|
||||||
matched the whole pattern. (This applies only to the traditional matching
|
matched the whole pattern. (This applies only to the traditional matching
|
||||||
function; the DFA matching function does not support capturing.)
|
function; the DFA matching function does not support capturing.)
|
||||||
</P>
|
</P>
|
||||||
|
@ -1908,7 +1908,7 @@ at release 5.10.
|
||||||
PCRE2 has an optimization that automatically "possessifies" certain simple
|
PCRE2 has an optimization that automatically "possessifies" certain simple
|
||||||
pattern constructs. For example, the sequence A+B is treated as A++B because
|
pattern constructs. For example, the sequence A+B is treated as A++B because
|
||||||
there is no point in backtracking into a sequence of A's when B must follow.
|
there is no point in backtracking into a sequence of A's when B must follow.
|
||||||
This feature can be disabled by the PCRE2_NO_AUTOPOSSESS option, or starting
|
This feature can be disabled by the PCRE2_NO_AUTOPOSSESS option, or starting
|
||||||
the pattern with (*NO_AUTO_POSSESS).
|
the pattern with (*NO_AUTO_POSSESS).
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -2216,7 +2216,7 @@ if the pattern is written as
|
||||||
<pre>
|
<pre>
|
||||||
^.*+(?<=abcd)
|
^.*+(?<=abcd)
|
||||||
</pre>
|
</pre>
|
||||||
there can be no backtracking for the .*+ item because of the possessive
|
there can be no backtracking for the .*+ item because of the possessive
|
||||||
quantifier; it can match only the entire string. The subsequent lookbehind
|
quantifier; it can match only the entire string. The subsequent lookbehind
|
||||||
assertion does a single test on the last four characters. If it fails, the
|
assertion does a single test on the last four characters. If it fails, the
|
||||||
match fails immediately. For long strings, this approach makes a significant
|
match fails immediately. For long strings, this approach makes a significant
|
||||||
|
@ -2720,8 +2720,8 @@ same pair of parentheses when there is a repetition.
|
||||||
<P>
|
<P>
|
||||||
PCRE2 provides a similar feature, but of course it cannot obey arbitrary Perl
|
PCRE2 provides a similar feature, but of course it cannot obey arbitrary Perl
|
||||||
code. The feature is called "callout". The caller of PCRE2 provides an external
|
code. The feature is called "callout". The caller of PCRE2 provides an external
|
||||||
function by putting its entry point in a match context using the function
|
function by putting its entry point in a match context using the function
|
||||||
<b>pcre2_set_callout()</b> and passing the context to <b>pcre2_match()</b> or
|
<b>pcre2_set_callout()</b> and passing the context to <b>pcre2_match()</b> or
|
||||||
<b>pcre2_dfa_match()</b>. If no match context is passed, or if the callout entry
|
<b>pcre2_dfa_match()</b>. If no match context is passed, or if the callout entry
|
||||||
point is set to NULL, callouts are disabled.
|
point is set to NULL, callouts are disabled.
|
||||||
</P>
|
</P>
|
||||||
|
@ -2961,7 +2961,7 @@ output from <b>pcre2test</b>:
|
||||||
re> /(*COMMIT)abc/
|
re> /(*COMMIT)abc/
|
||||||
data> xyzabc
|
data> xyzabc
|
||||||
0: abc
|
0: abc
|
||||||
data>
|
data>
|
||||||
re> /(*COMMIT)abc/no_start_optimize
|
re> /(*COMMIT)abc/no_start_optimize
|
||||||
data> xyzabc
|
data> xyzabc
|
||||||
No match
|
No match
|
||||||
|
@ -2989,7 +2989,7 @@ as (*COMMIT).
|
||||||
<P>
|
<P>
|
||||||
The behaviour of (*PRUNE:NAME) is the not the same as (*MARK:NAME)(*PRUNE).
|
The behaviour of (*PRUNE:NAME) is the not the same as (*MARK:NAME)(*PRUNE).
|
||||||
It is like (*MARK:NAME) in that the name is remembered for passing back to the
|
It is like (*MARK:NAME) in that the name is remembered for passing back to the
|
||||||
caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
|
caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
|
||||||
ignoring those set by (*PRUNE) or (*THEN).
|
ignoring those set by (*PRUNE) or (*THEN).
|
||||||
<pre>
|
<pre>
|
||||||
(*SKIP)
|
(*SKIP)
|
||||||
|
@ -3041,7 +3041,7 @@ group. If (*THEN) is not inside an alternation, it acts like (*PRUNE).
|
||||||
<P>
|
<P>
|
||||||
The behaviour of (*THEN:NAME) is the not the same as (*MARK:NAME)(*THEN).
|
The behaviour of (*THEN:NAME) is the not the same as (*MARK:NAME)(*THEN).
|
||||||
It is like (*MARK:NAME) in that the name is remembered for passing back to the
|
It is like (*MARK:NAME) in that the name is remembered for passing back to the
|
||||||
caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
|
caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
|
||||||
ignoring those set by (*PRUNE) and (*THEN).
|
ignoring those set by (*PRUNE) and (*THEN).
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
|
|
@ -103,17 +103,17 @@ PCRE2 to use heap memory instead of stack for remembering back-up points when
|
||||||
of how to do this are given in the
|
of how to do this are given in the
|
||||||
<a href="pcre2build.html"><b>pcre2build</b></a>
|
<a href="pcre2build.html"><b>pcre2build</b></a>
|
||||||
documentation. When built in this way, instead of using the stack, PCRE2
|
documentation. When built in this way, instead of using the stack, PCRE2
|
||||||
gets memory for remembering backup points from the heap. By default, the memory
|
gets memory for remembering backup points from the heap. By default, the memory
|
||||||
is obtained by calling the system <b>malloc()</b> function, but you can arrange
|
is obtained by calling the system <b>malloc()</b> function, but you can arrange
|
||||||
to supply your own memory management function. For details, see the section
|
to supply your own memory management function. For details, see the section
|
||||||
entitled
|
entitled
|
||||||
<a href="pcre2api.html#matchcontext">"The match context"</a>
|
<a href="pcre2api.html#matchcontext">"The match context"</a>
|
||||||
in the
|
in the
|
||||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||||
documentation. Since the block sizes are always the same, it may be possible to
|
documentation. Since the block sizes are always the same, it may be possible to
|
||||||
implement customized a memory handler that is more efficient than the standard
|
implement customized a memory handler that is more efficient than the standard
|
||||||
function. The memory blocks obtained for this purpose are retained and re-used
|
function. The memory blocks obtained for this purpose are retained and re-used
|
||||||
if possible while <b>pcre2_match()</b> is running. They are all freed just
|
if possible while <b>pcre2_match()</b> is running. They are all freed just
|
||||||
before it exits.
|
before it exits.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
|
|
|
@ -414,7 +414,7 @@ appear.
|
||||||
(*LIMIT_MATCH=d) set the match limit to d (decimal number)
|
(*LIMIT_MATCH=d) set the match limit to d (decimal number)
|
||||||
(*LIMIT_RECURSION=d) set the recursion limit to d (decimal number)
|
(*LIMIT_RECURSION=d) set the recursion limit to d (decimal number)
|
||||||
(*NOTEMPTY) set PCRE2_NOTEMPTY when matching
|
(*NOTEMPTY) set PCRE2_NOTEMPTY when matching
|
||||||
(*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching
|
(*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching
|
||||||
(*NO_AUTO_POSSESS) no auto-possessification (PCRE2_NO_AUTO_POSSESS)
|
(*NO_AUTO_POSSESS) no auto-possessification (PCRE2_NO_AUTO_POSSESS)
|
||||||
(*NO_START_OPT) no start-match optimization (PCRE2_NO_START_OPTIMIZE)
|
(*NO_START_OPT) no start-match optimization (PCRE2_NO_START_OPTIMIZE)
|
||||||
(*UTF) set appropriate UTF mode for the library in use
|
(*UTF) set appropriate UTF mode for the library in use
|
||||||
|
|
|
@ -476,7 +476,7 @@ about the pattern:
|
||||||
/I info show info about compiled pattern
|
/I info show info about compiled pattern
|
||||||
hex pattern is coded in hexadecimal
|
hex pattern is coded in hexadecimal
|
||||||
jit[=<number>] use JIT
|
jit[=<number>] use JIT
|
||||||
jitverify verify JIT use
|
jitverify verify JIT use
|
||||||
locale=<name> use this locale
|
locale=<name> use this locale
|
||||||
memory show memory used
|
memory show memory used
|
||||||
newline=<type> set newline type
|
newline=<type> set newline type
|
||||||
|
@ -565,7 +565,7 @@ number in the range 0 to 7:
|
||||||
7 all three modes
|
7 all three modes
|
||||||
</pre>
|
</pre>
|
||||||
If no number is given, 7 is assumed. If JIT compilation is successful, the
|
If no number is given, 7 is assumed. If JIT compilation is successful, the
|
||||||
compiled JIT code will automatically be used when <b>pcre2_match()</b> is run
|
compiled JIT code will automatically be used when <b>pcre2_match()</b> is run
|
||||||
for the appropriate type of match, except when incompatible run-time options
|
for the appropriate type of match, except when incompatible run-time options
|
||||||
are specified. For more details, see the
|
are specified. For more details, see the
|
||||||
<a href="pcre2jit.html"><b>pcre2jit</b></a>
|
<a href="pcre2jit.html"><b>pcre2jit</b></a>
|
||||||
|
@ -710,7 +710,7 @@ for a description of their effects.
|
||||||
partial_hard (or ph) set PCRE2_PARTIAL_HARD
|
partial_hard (or ph) set PCRE2_PARTIAL_HARD
|
||||||
partial_soft (or ps) set PCRE2_PARTIAL_SOFT
|
partial_soft (or ps) set PCRE2_PARTIAL_SOFT
|
||||||
</pre>
|
</pre>
|
||||||
The partial matching modifiers are provided with abbreviations because they
|
The partial matching modifiers are provided with abbreviations because they
|
||||||
appear frequently in tests.
|
appear frequently in tests.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -892,8 +892,8 @@ until it finds the minimum values for each parameter that allow
|
||||||
<b>pcre2_match()</b> to complete without error.
|
<b>pcre2_match()</b> to complete without error.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If JIT is being used, only the match limit is relevant. If DFA matching is
|
If JIT is being used, only the match limit is relevant. If DFA matching is
|
||||||
being used, neither limit is relevant, and this modifier is ignored (with a
|
being used, neither limit is relevant, and this modifier is ignored (with a
|
||||||
warning message).
|
warning message).
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -939,10 +939,10 @@ appears, though of course it can also be used to set a default in a
|
||||||
available for storing matching information. The default is 15.
|
available for storing matching information. The default is 15.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
At least one pair of offsets is always created by
|
At least one pair of offsets is always created by
|
||||||
<b>pcre2_match_data_create()</b>, for matching with PCRE2's native API, so a
|
<b>pcre2_match_data_create()</b>, for matching with PCRE2's native API, so a
|
||||||
value of 0 is the same as 1. However a value of 0 is useful when testing the
|
value of 0 is the same as 1. However a value of 0 is useful when testing the
|
||||||
POSIX API because it causes <b>regexec()</b> to be called with a NULL capture
|
POSIX API because it causes <b>regexec()</b> to be called with a NULL capture
|
||||||
vector.
|
vector.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC12" href="#TOC1">THE ALTERNATIVE MATCHING FUNCTION</a><br>
|
<br><a name="SEC12" href="#TOC1">THE ALTERNATIVE MATCHING FUNCTION</a><br>
|
||||||
|
|
|
@ -67,7 +67,7 @@ In UTF modes, the dot metacharacter matches one UTF character instead of a
|
||||||
single code unit.
|
single code unit.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The escape sequence \C can be used to match a single code unit, in a UTF mode,
|
The escape sequence \C can be used to match a single code unit, in a UTF mode,
|
||||||
but its use can lead to some strange effects because it breaks up multi-unit
|
but its use can lead to some strange effects because it breaks up multi-unit
|
||||||
characters (see the description of \C in the
|
characters (see the description of \C in the
|
||||||
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
|
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
|
||||||
|
@ -114,8 +114,8 @@ VALIDITY OF UTF STRINGS
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
When the PCRE2_UTF option is set, the strings passed as patterns and subjects
|
When the PCRE2_UTF option is set, the strings passed as patterns and subjects
|
||||||
are (by default) checked for validity on entry to the relevant functions.
|
are (by default) checked for validity on entry to the relevant functions.
|
||||||
If an invalid UTF string is passed, an error return is given.
|
If an invalid UTF string is passed, an error return is given.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
UTF-16 and UTF-32 strings can indicate their endianness by special code knows
|
UTF-16 and UTF-32 strings can indicate their endianness by special code knows
|
||||||
|
|
|
@ -23,11 +23,11 @@ of Unicode in use can be discovered by running
|
||||||
.sp
|
.sp
|
||||||
pcre2test -C
|
pcre2test -C
|
||||||
.P
|
.P
|
||||||
The three libraries contain identical sets of functions, with names ending in
|
The three libraries contain identical sets of functions, with names ending in
|
||||||
_8, _16, or _32, respectively (for example, \fBpcre2_compile_8()\fP). However,
|
_8, _16, or _32, respectively (for example, \fBpcre2_compile_8()\fP). However,
|
||||||
by defining PCRE2_CODE_UNIT_WIDTH to be 8, 16, or 32, a program that uses just
|
by defining PCRE2_CODE_UNIT_WIDTH to be 8, 16, or 32, a program that uses just
|
||||||
one code unit width can be written using generic names such as
|
one code unit width can be written using generic names such as
|
||||||
\fBpcre2_compile()\fP, and the documentation is written assuming that this is
|
\fBpcre2_compile()\fP, and the documentation is written assuming that this is
|
||||||
the case.
|
the case.
|
||||||
.P
|
.P
|
||||||
In addition to the Perl-compatible matching function, PCRE2 contains an
|
In addition to the Perl-compatible matching function, PCRE2 contains an
|
||||||
|
|
|
@ -158,8 +158,8 @@ REVISION
|
||||||
Last updated: 28 September 2014
|
Last updated: 28 September 2014
|
||||||
Copyright (c) 1997-2014 University of Cambridge.
|
Copyright (c) 1997-2014 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2API(3) Library Functions Manual PCRE2API(3)
|
PCRE2API(3) Library Functions Manual PCRE2API(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -2529,8 +2529,8 @@ REVISION
|
||||||
Last updated: 16 October 2014
|
Last updated: 16 October 2014
|
||||||
Copyright (c) 1997-2014 University of Cambridge.
|
Copyright (c) 1997-2014 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2BUILD(3) Library Functions Manual PCRE2BUILD(3)
|
PCRE2BUILD(3) Library Functions Manual PCRE2BUILD(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -2981,8 +2981,8 @@ REVISION
|
||||||
Last updated: 28 September 2014
|
Last updated: 28 September 2014
|
||||||
Copyright (c) 1997-2014 University of Cambridge.
|
Copyright (c) 1997-2014 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2CALLOUT(3) Library Functions Manual PCRE2CALLOUT(3)
|
PCRE2CALLOUT(3) Library Functions Manual PCRE2CALLOUT(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -3217,8 +3217,8 @@ REVISION
|
||||||
Last updated: 19 October 2014
|
Last updated: 19 October 2014
|
||||||
Copyright (c) 1997-2014 University of Cambridge.
|
Copyright (c) 1997-2014 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2COMPAT(3) Library Functions Manual PCRE2COMPAT(3)
|
PCRE2COMPAT(3) Library Functions Manual PCRE2COMPAT(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -3403,8 +3403,8 @@ REVISION
|
||||||
Last updated: 28 September 2014
|
Last updated: 28 September 2014
|
||||||
Copyright (c) 1997-2014 University of Cambridge.
|
Copyright (c) 1997-2014 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2JIT(3) Library Functions Manual PCRE2JIT(3)
|
PCRE2JIT(3) Library Functions Manual PCRE2JIT(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -3758,8 +3758,8 @@ REVISION
|
||||||
Last updated: 29 September 2014
|
Last updated: 29 September 2014
|
||||||
Copyright (c) 1997-2014 University of Cambridge.
|
Copyright (c) 1997-2014 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2LIMITS(3) Library Functions Manual PCRE2LIMITS(3)
|
PCRE2LIMITS(3) Library Functions Manual PCRE2LIMITS(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -3826,8 +3826,8 @@ REVISION
|
||||||
Last updated: 29 September 2014
|
Last updated: 29 September 2014
|
||||||
Copyright (c) 1997-2014 University of Cambridge.
|
Copyright (c) 1997-2014 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2MATCHING(3) Library Functions Manual PCRE2MATCHING(3)
|
PCRE2MATCHING(3) Library Functions Manual PCRE2MATCHING(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -4045,8 +4045,8 @@ REVISION
|
||||||
Last updated: 29 September 2014
|
Last updated: 29 September 2014
|
||||||
Copyright (c) 1997-2014 University of Cambridge.
|
Copyright (c) 1997-2014 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2PARTIAL(3) Library Functions Manual PCRE2PARTIAL(3)
|
PCRE2PARTIAL(3) Library Functions Manual PCRE2PARTIAL(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -4485,8 +4485,8 @@ REVISION
|
||||||
Last updated: 14 October 2014
|
Last updated: 14 October 2014
|
||||||
Copyright (c) 1997-2014 University of Cambridge.
|
Copyright (c) 1997-2014 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2UNICODE(3) Library Functions Manual PCRE2UNICODE(3)
|
PCRE2UNICODE(3) Library Functions Manual PCRE2UNICODE(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -4711,5 +4711,5 @@ REVISION
|
||||||
Last updated: 16 September 2014
|
Last updated: 16 September 2014
|
||||||
Copyright (c) 1997-2014 University of Cambridge.
|
Copyright (c) 1997-2014 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
|
206
doc/pcre2api.3
206
doc/pcre2api.3
|
@ -250,7 +250,7 @@ to be included in an environment where the value of PCRE2_CODE_UNIT_WIDTH is
|
||||||
unknown should also use the real function names. (Unfortunately, it is not
|
unknown should also use the real function names. (Unfortunately, it is not
|
||||||
possible in C code to save and restore the value of a macro.)
|
possible in C code to save and restore the value of a macro.)
|
||||||
.P
|
.P
|
||||||
If PCRE2_CODE_UNIT_WIDTH is not defined before including \fBpcre2.h\fP, a
|
If PCRE2_CODE_UNIT_WIDTH is not defined before including \fBpcre2.h\fP, a
|
||||||
compiler error occurs.
|
compiler error occurs.
|
||||||
.P
|
.P
|
||||||
When using multiple libraries in an application, you must take care when
|
When using multiple libraries in an application, you must take care when
|
||||||
|
@ -392,7 +392,7 @@ section on \fBpcre2_match()\fP options
|
||||||
below.
|
below.
|
||||||
.P
|
.P
|
||||||
The choice of newline convention does not affect the interpretation of
|
The choice of newline convention does not affect the interpretation of
|
||||||
the \en or \er escape sequences, nor does it affect what \eR matches, which has
|
the \en or \er escape sequences, nor does it affect what \eR matches, which has
|
||||||
its own separate control.
|
its own separate control.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
@ -509,7 +509,7 @@ The memory used for a general context should be freed by calling:
|
||||||
.SS "The compile context"
|
.SS "The compile context"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
A compile context is required if you want to change the default values of any
|
A compile context is required if you want to change the default values of any
|
||||||
of the following compile-time parameters:
|
of the following compile-time parameters:
|
||||||
.sp
|
.sp
|
||||||
What \eR matches (Unicode newlines or CR, LF, CRLF only);
|
What \eR matches (Unicode newlines or CR, LF, CRLF only);
|
||||||
|
@ -518,7 +518,7 @@ of the following compile-time parameters:
|
||||||
The compile time nested parentheses limit;
|
The compile time nested parentheses limit;
|
||||||
An external function for stack checking.
|
An external function for stack checking.
|
||||||
.sp
|
.sp
|
||||||
A compile context is also required if you are using custom memory management.
|
A compile context is also required if you are using custom memory management.
|
||||||
If none of these apply, just pass NULL as the context argument of
|
If none of these apply, just pass NULL as the context argument of
|
||||||
\fIpcre2_compile()\fP.
|
\fIpcre2_compile()\fP.
|
||||||
.P
|
.P
|
||||||
|
@ -534,8 +534,8 @@ A compile context is created, copied, and freed by the following functions:
|
||||||
.B void pcre2_compile_context_free(pcre2_compile_context *\fIccontext\fP);
|
.B void pcre2_compile_context_free(pcre2_compile_context *\fIccontext\fP);
|
||||||
.fi
|
.fi
|
||||||
.sp
|
.sp
|
||||||
A compile context is created with default values for its parameters. These can
|
A compile context is created with default values for its parameters. These can
|
||||||
be changed by calling the following functions, which return 0 on success, or
|
be changed by calling the following functions, which return 0 on success, or
|
||||||
PCRE2_ERROR_BADDATA if invalid data is detected.
|
PCRE2_ERROR_BADDATA if invalid data is detected.
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
|
@ -543,11 +543,11 @@ PCRE2_ERROR_BADDATA if invalid data is detected.
|
||||||
.B " uint32_t \fIvalue\fP);"
|
.B " uint32_t \fIvalue\fP);"
|
||||||
.fi
|
.fi
|
||||||
.sp
|
.sp
|
||||||
The value must be PCRE2_BSR_ANYCRLF, to specify that \eR matches only CR, LF,
|
The value must be PCRE2_BSR_ANYCRLF, to specify that \eR matches only CR, LF,
|
||||||
or CRLF, or PCRE2_BSR_UNICODE, to specify that \eR matches any Unicode line
|
or CRLF, or PCRE2_BSR_UNICODE, to specify that \eR matches any Unicode line
|
||||||
ending sequence. The value of this parameter does not affect what is compiled;
|
ending sequence. The value of this parameter does not affect what is compiled;
|
||||||
it is just saved with the compiled pattern. The value is used by the JIT
|
it is just saved with the compiled pattern. The value is used by the JIT
|
||||||
compiler and by the two interpreted matching functions, \fIpcre2_match()\fP and
|
compiler and by the two interpreted matching functions, \fIpcre2_match()\fP and
|
||||||
\fIpcre2_dfa_match()\fP.
|
\fIpcre2_dfa_match()\fP.
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
|
@ -555,7 +555,7 @@ compiler and by the two interpreted matching functions, \fIpcre2_match()\fP and
|
||||||
.B " const unsigned char *\fItables\fP);"
|
.B " const unsigned char *\fItables\fP);"
|
||||||
.fi
|
.fi
|
||||||
.sp
|
.sp
|
||||||
The value must be the result of a call to \fIpcre2_maketables()\fP, whose only
|
The value must be the result of a call to \fIpcre2_maketables()\fP, whose only
|
||||||
argument is a general context. This function builds a set of character tables
|
argument is a general context. This function builds a set of character tables
|
||||||
in the current locale.
|
in the current locale.
|
||||||
.sp
|
.sp
|
||||||
|
@ -564,9 +564,9 @@ in the current locale.
|
||||||
.B " uint32_t \fIvalue\fP);"
|
.B " uint32_t \fIvalue\fP);"
|
||||||
.fi
|
.fi
|
||||||
.sp
|
.sp
|
||||||
This specifies which characters or character sequences are to be recognized as
|
This specifies which characters or character sequences are to be recognized as
|
||||||
newlines. The value must be one of PCRE2_NEWLINE_CR (carriage return only),
|
newlines. The value must be one of PCRE2_NEWLINE_CR (carriage return only),
|
||||||
PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the two-character
|
PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the two-character
|
||||||
sequence CR followed by LF), PCRE2_NEWLINE_ANYCRLF (any of the above), or
|
sequence CR followed by LF), PCRE2_NEWLINE_ANYCRLF (any of the above), or
|
||||||
PCRE2_NEWLINE_ANY (any Unicode newline sequence).
|
PCRE2_NEWLINE_ANY (any Unicode newline sequence).
|
||||||
.P
|
.P
|
||||||
|
@ -591,7 +591,7 @@ using up too much system stack when being compiled.
|
||||||
.fi
|
.fi
|
||||||
.sp
|
.sp
|
||||||
There is at least one application that runs PCRE2 in threads with very limited
|
There is at least one application that runs PCRE2 in threads with very limited
|
||||||
system stack, where running out of stack is to be avoided at all costs. The
|
system stack, where running out of stack is to be avoided at all costs. The
|
||||||
parenthesis limit above cannot take account of how much stack is actually
|
parenthesis limit above cannot take account of how much stack is actually
|
||||||
available. For a finer control, you can supply a function that is called
|
available. For a finer control, you can supply a function that is called
|
||||||
whenever \fBpcre2_compile()\fP starts to compile a parenthesized part of a
|
whenever \fBpcre2_compile()\fP starts to compile a parenthesized part of a
|
||||||
|
@ -603,20 +603,20 @@ function should return zero if all is well, or non-zero to force an error.
|
||||||
.SS "The match context"
|
.SS "The match context"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
A match context is required if you want to change the default values of any
|
A match context is required if you want to change the default values of any
|
||||||
of the following match-time parameters:
|
of the following match-time parameters:
|
||||||
.sp
|
.sp
|
||||||
What \eR matches (Unicode newlines or CR, LF, CRLF only);
|
What \eR matches (Unicode newlines or CR, LF, CRLF only);
|
||||||
A callout function;
|
A callout function;
|
||||||
The limit for calling \fImatch()\fP;
|
The limit for calling \fImatch()\fP;
|
||||||
The limit for calling \fImatch()\fP recursively;
|
The limit for calling \fImatch()\fP recursively;
|
||||||
The newline character sequence;
|
The newline character sequence;
|
||||||
.sp
|
.sp
|
||||||
A match context is also required if you are using custom memory management.
|
A match context is also required if you are using custom memory management.
|
||||||
If none of these apply, just pass NULL as the context argument of
|
If none of these apply, just pass NULL as the context argument of
|
||||||
\fBpcre2_match()\fP, \fBpcre2_dfa_match()\fP, or \fBpcre2_jit_match()\fP.
|
\fBpcre2_match()\fP, \fBpcre2_dfa_match()\fP, or \fBpcre2_jit_match()\fP.
|
||||||
Changing the newline value or what \eR matches at match time disables the use
|
Changing the newline value or what \eR matches at match time disables the use
|
||||||
of JIT via \fBpcre2_match()\fP.
|
of JIT via \fBpcre2_match()\fP.
|
||||||
.P
|
.P
|
||||||
A match context is created, copied, and freed by the following functions:
|
A match context is created, copied, and freed by the following functions:
|
||||||
.sp
|
.sp
|
||||||
|
@ -630,8 +630,8 @@ A match context is created, copied, and freed by the following functions:
|
||||||
.B void pcre2_match_context_free(pcre2_match_context *\fImcontext\fP);
|
.B void pcre2_match_context_free(pcre2_match_context *\fImcontext\fP);
|
||||||
.fi
|
.fi
|
||||||
.sp
|
.sp
|
||||||
A match context is created with default values for its parameters. These can
|
A match context is created with default values for its parameters. These can
|
||||||
be changed by calling the following functions, which return 0 on success, or
|
be changed by calling the following functions, which return 0 on success, or
|
||||||
PCRE2_ERROR_BADDATA if invalid data is detected.
|
PCRE2_ERROR_BADDATA if invalid data is detected.
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
|
@ -662,7 +662,7 @@ calls repeatedly (sometimes recursively). The limit set by \fImatch_limit\fP is
|
||||||
imposed on the number of times this function is called during a match, which
|
imposed on the number of times this function is called during a match, which
|
||||||
has the effect of limiting the amount of backtracking that can take place. For
|
has the effect of limiting the amount of backtracking that can take place. For
|
||||||
patterns that are not anchored, the count restarts from zero for each position
|
patterns that are not anchored, the count restarts from zero for each position
|
||||||
in the subject string. This limit is not relevant to \fBpcre2_dfa_match()\fP,
|
in the subject string. This limit is not relevant to \fBpcre2_dfa_match()\fP,
|
||||||
which ignores it.
|
which ignores it.
|
||||||
.P
|
.P
|
||||||
When \fBpcre2_match()\fP is called with a pattern that was successfully studied
|
When \fBpcre2_match()\fP is called with a pattern that was successfully studied
|
||||||
|
@ -698,7 +698,7 @@ This limit is of use only if it is set smaller than \fImatch_limit\fP.
|
||||||
Limiting the recursion depth limits the amount of system stack that can be
|
Limiting the recursion depth limits the amount of system stack that can be
|
||||||
used, or, when PCRE2 has been compiled to use memory on the heap instead of the
|
used, or, when PCRE2 has been compiled to use memory on the heap instead of the
|
||||||
stack, the amount of heap memory that can be used. This limit is not relevant,
|
stack, the amount of heap memory that can be used. This limit is not relevant,
|
||||||
and is ignored, when matching is done using JIT compiled code or by the
|
and is ignored, when matching is done using JIT compiled code or by the
|
||||||
\fBpcre2_dfa_match()\fP function.
|
\fBpcre2_dfa_match()\fP function.
|
||||||
.P
|
.P
|
||||||
The default value for \fIrecursion_limit\fP can be set when PCRE2 is built; the
|
The default value for \fIrecursion_limit\fP can be set when PCRE2 is built; the
|
||||||
|
@ -720,9 +720,9 @@ limit is set, less than the default.
|
||||||
.B " void (*\fIprivate_free\fP)(void *, void *), void *\fImemory_data\fP);"
|
.B " void (*\fIprivate_free\fP)(void *, void *), void *\fImemory_data\fP);"
|
||||||
.fi
|
.fi
|
||||||
.sp
|
.sp
|
||||||
This function sets up two additional custom memory management functions for use
|
This function sets up two additional custom memory management functions for use
|
||||||
by \fBpcre2_match()\fP when PCRE2 is compiled to use the heap for remembering
|
by \fBpcre2_match()\fP when PCRE2 is compiled to use the heap for remembering
|
||||||
backtracking data, instead of recursive function calls that use the system
|
backtracking data, instead of recursive function calls that use the system
|
||||||
stack. There is a discussion about PCRE2's stack usage in the
|
stack. There is a discussion about PCRE2's stack usage in the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
\fBpcre2stack\fP
|
\fBpcre2stack\fP
|
||||||
|
@ -738,7 +738,7 @@ limited stacks. Because of the greater use of memory management,
|
||||||
general custom memory functions are provided so that special-purpose external
|
general custom memory functions are provided so that special-purpose external
|
||||||
code can be used for this case, because the memory blocks are all the same
|
code can be used for this case, because the memory blocks are all the same
|
||||||
size. The blocks are retained by \fBpcre2_match()\fP until it is about to exit
|
size. The blocks are retained by \fBpcre2_match()\fP until it is about to exit
|
||||||
so that they can be re-used when possible during the match. In the absence of
|
so that they can be re-used when possible during the match. In the absence of
|
||||||
these functions, the normal custom memory management functions are used, if
|
these functions, the normal custom memory management functions are used, if
|
||||||
supplied, otherwise the system functions.
|
supplied, otherwise the system functions.
|
||||||
.
|
.
|
||||||
|
@ -760,7 +760,7 @@ required. The second argument is a pointer to memory into which the information
|
||||||
is placed. If NULL is passed, the function returns the amount of memory that is
|
is placed. If NULL is passed, the function returns the amount of memory that is
|
||||||
needed for the requested information. For calls that return numerical values,
|
needed for the requested information. For calls that return numerical values,
|
||||||
the value is in bytes; when requesting these values, \fIwhere\fP should point
|
the value is in bytes; when requesting these values, \fIwhere\fP should point
|
||||||
to appropriately aligned memory. For calls that return strings, the required
|
to appropriately aligned memory. For calls that return strings, the required
|
||||||
length is given in code units, not counting the terminating zero.
|
length is given in code units, not counting the terminating zero.
|
||||||
.P
|
.P
|
||||||
When requesting information, the returned value from \fBpcre2_config()\fP is
|
When requesting information, the returned value from \fBpcre2_config()\fP is
|
||||||
|
@ -783,7 +783,7 @@ compiling is available; otherwise it is set to zero.
|
||||||
PCRE2_CONFIG_JITTARGET
|
PCRE2_CONFIG_JITTARGET
|
||||||
.sp
|
.sp
|
||||||
The \fIwhere\fP argument should point to a buffer that is at least 48 code
|
The \fIwhere\fP argument should point to a buffer that is at least 48 code
|
||||||
units long. (The exact length needed can be found by calling
|
units long. (The exact length needed can be found by calling
|
||||||
\fBpcre2_config()\fP with \fBwhere\fP set to NULL.) The buffer is filled with a
|
\fBpcre2_config()\fP with \fBwhere\fP set to NULL.) The buffer is filled with a
|
||||||
string that contains the name of the architecture for which the JIT compiler is
|
string that contains the name of the architecture for which the JIT compiler is
|
||||||
configured, for example "x86 32bit (little endian + unaligned)". If JIT support
|
configured, for example "x86 32bit (little endian + unaligned)". If JIT support
|
||||||
|
@ -794,9 +794,9 @@ the string, in code units, is returned.
|
||||||
.sp
|
.sp
|
||||||
The output is an integer that contains the number of bytes used for internal
|
The output is an integer that contains the number of bytes used for internal
|
||||||
linkage in compiled regular expressions. When PCRE2 is configured, the value
|
linkage in compiled regular expressions. When PCRE2 is configured, the value
|
||||||
can be set to 2, 3, or 4, with the default being 2. This is the value that is
|
can be set to 2, 3, or 4, with the default being 2. This is the value that is
|
||||||
returned by \fBpcre2_config()\fP. However, when the 16-bit library is compiled,
|
returned by \fBpcre2_config()\fP. However, when the 16-bit library is compiled,
|
||||||
a value of 3 is rounded up to 4, and when the 32-bit library is compiled,
|
a value of 3 is rounded up to 4, and when the 32-bit library is compiled,
|
||||||
internal linkages always use 4 bytes, so the configured value is not relevant.
|
internal linkages always use 4 bytes, so the configured value is not relevant.
|
||||||
.P
|
.P
|
||||||
The default value of 2 for the 8-bit and 16-bit libraries is sufficient for all
|
The default value of 2 for the 8-bit and 16-bit libraries is sufficient for all
|
||||||
|
@ -820,7 +820,7 @@ that is recognized as meaning "newline". The values are:
|
||||||
3 Carriage return, linefeed (CRLF)
|
3 Carriage return, linefeed (CRLF)
|
||||||
4 Any Unicode line ending
|
4 Any Unicode line ending
|
||||||
5 Any of CR, LF, or CRLF
|
5 Any of CR, LF, or CRLF
|
||||||
.sp
|
.sp
|
||||||
The default should normally correspond to the standard sequence for your
|
The default should normally correspond to the standard sequence for your
|
||||||
operating system.
|
operating system.
|
||||||
.sp
|
.sp
|
||||||
|
@ -849,7 +849,7 @@ compiled. The output is zero if PCRE2 was compiled to use blocks of data on the
|
||||||
heap instead of recursive function calls.
|
heap instead of recursive function calls.
|
||||||
.sp
|
.sp
|
||||||
PCRE2_CONFIG_UNICODE_VERSION
|
PCRE2_CONFIG_UNICODE_VERSION
|
||||||
.sp
|
.sp
|
||||||
The \fIwhere\fP argument should point to a buffer that is at least 24 code
|
The \fIwhere\fP argument should point to a buffer that is at least 24 code
|
||||||
units long. (The exact length needed can be found by calling
|
units long. (The exact length needed can be found by calling
|
||||||
\fBpcre2_config()\fP with \fBwhere\fP set to NULL.) If PCRE2 has been compiled
|
\fBpcre2_config()\fP with \fBwhere\fP set to NULL.) If PCRE2 has been compiled
|
||||||
|
@ -884,15 +884,15 @@ units) is returned.
|
||||||
.B pcre2_code_free(pcre2_code *\fIcode\fP);
|
.B pcre2_code_free(pcre2_code *\fIcode\fP);
|
||||||
.fi
|
.fi
|
||||||
.P
|
.P
|
||||||
This function compiles a pattern, defined by a pointer to a string of code
|
This function compiles a pattern, defined by a pointer to a string of code
|
||||||
units and a length, into an internal form. If the pattern is zero-terminated,
|
units and a length, into an internal form. If the pattern is zero-terminated,
|
||||||
the length should be specified as PCRE2_ZERO_TERMINATED. The function returns a
|
the length should be specified as PCRE2_ZERO_TERMINATED. The function returns a
|
||||||
pointer to a block of memory that contains the compiled pattern and related
|
pointer to a block of memory that contains the compiled pattern and related
|
||||||
data. The caller must free the memory by calling \fBpcre2_code_free()\fP when
|
data. The caller must free the memory by calling \fBpcre2_code_free()\fP when
|
||||||
it is no longer needed.
|
it is no longer needed.
|
||||||
.P
|
.P
|
||||||
If the compile context argument \fIccontext\fP is NULL, the memory is obtained
|
If the compile context argument \fIccontext\fP is NULL, the memory is obtained
|
||||||
by calling \fBmalloc()\fP. Otherwise, it is obtained from the same memory
|
by calling \fBmalloc()\fP. Otherwise, it is obtained from the same memory
|
||||||
function that was used for the compile context.
|
function that was used for the compile context.
|
||||||
.P
|
.P
|
||||||
The \fIoptions\fP argument contains various bit settings that affect the
|
The \fIoptions\fP argument contains various bit settings that affect the
|
||||||
|
@ -903,14 +903,14 @@ within the pattern (see the detailed description in the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
\fBpcre2pattern\fP
|
\fBpcre2pattern\fP
|
||||||
.\"
|
.\"
|
||||||
documentation).
|
documentation).
|
||||||
.P
|
.P
|
||||||
For those options that can be different in different parts of the pattern, the
|
For those options that can be different in different parts of the pattern, the
|
||||||
contents of the \fIoptions\fP argument specifies their settings at the start of
|
contents of the \fIoptions\fP argument specifies their settings at the start of
|
||||||
compilation. The PCRE2_ANCHORED and PCRE2_NO_UTF_CHECK options can be set at
|
compilation. The PCRE2_ANCHORED and PCRE2_NO_UTF_CHECK options can be set at
|
||||||
the time of matching as well as at compile time.
|
the time of matching as well as at compile time.
|
||||||
.P
|
.P
|
||||||
Other, less frequently required compile-time parameters (for example, the
|
Other, less frequently required compile-time parameters (for example, the
|
||||||
newline setting) can be provided in a compile context (as described
|
newline setting) can be provided in a compile context (as described
|
||||||
.\" HTML <a href="#compilecontext">
|
.\" HTML <a href="#compilecontext">
|
||||||
.\" </a>
|
.\" </a>
|
||||||
|
@ -936,10 +936,10 @@ This code fragment shows a typical straightforward call to
|
||||||
.sp
|
.sp
|
||||||
pcre2_code *re;
|
pcre2_code *re;
|
||||||
PCRE2_SIZE erroffset;
|
PCRE2_SIZE erroffset;
|
||||||
int errorcode;
|
int errorcode;
|
||||||
re = pcre2_compile(
|
re = pcre2_compile(
|
||||||
"^A.*Z", /* the pattern */
|
"^A.*Z", /* the pattern */
|
||||||
PCRE2_ZERO_TERMINATED, /* the pattern is zero-terminated */
|
PCRE2_ZERO_TERMINATED, /* the pattern is zero-terminated */
|
||||||
0, /* default options */
|
0, /* default options */
|
||||||
&errorcode, /* for error code */
|
&errorcode, /* for error code */
|
||||||
&erroffset, /* for error offset */
|
&erroffset, /* for error offset */
|
||||||
|
@ -958,14 +958,14 @@ Perl.
|
||||||
.sp
|
.sp
|
||||||
PCRE2_ALLOW_EMPTY_CLASS
|
PCRE2_ALLOW_EMPTY_CLASS
|
||||||
.sp
|
.sp
|
||||||
By default, for compatibility with Perl, a closing square bracket that
|
By default, for compatibility with Perl, a closing square bracket that
|
||||||
immediately follows an opening one is treated as a data character for the
|
immediately follows an opening one is treated as a data character for the
|
||||||
class. When PCRE2_ALLOW_EMPTY_CLASS is set, it terminates the class, which
|
class. When PCRE2_ALLOW_EMPTY_CLASS is set, it terminates the class, which
|
||||||
therefore contains no characters and so can never match.
|
therefore contains no characters and so can never match.
|
||||||
.sp
|
.sp
|
||||||
PCRE2_ALT_BSUX
|
PCRE2_ALT_BSUX
|
||||||
.sp
|
.sp
|
||||||
This option request alternative handling of three escape sequences, which
|
This option request alternative handling of three escape sequences, which
|
||||||
makes PCRE2's behaviour more like ECMAscript (aka JavaScript). When it is set:
|
makes PCRE2's behaviour more like ECMAscript (aka JavaScript). When it is set:
|
||||||
.P
|
.P
|
||||||
(1) \eU matches an upper case "U" character; by default \eU causes a compile
|
(1) \eU matches an upper case "U" character; by default \eU causes a compile
|
||||||
|
@ -996,7 +996,7 @@ documentation.
|
||||||
.sp
|
.sp
|
||||||
If this bit is set, letters in the pattern match both upper and lower case
|
If this bit is set, letters in the pattern match both upper and lower case
|
||||||
letters in the subject. It is equivalent to Perl's /i option, and it can be
|
letters in the subject. It is equivalent to Perl's /i option, and it can be
|
||||||
changed within a pattern by a (?i) option setting.
|
changed within a pattern by a (?i) option setting.
|
||||||
.sp
|
.sp
|
||||||
PCRE2_DOLLAR_ENDONLY
|
PCRE2_DOLLAR_ENDONLY
|
||||||
.sp
|
.sp
|
||||||
|
@ -1052,7 +1052,7 @@ sequence at the start of the pattern, as described in the section entitled
|
||||||
.\" </a>
|
.\" </a>
|
||||||
"Newline conventions"
|
"Newline conventions"
|
||||||
.\"
|
.\"
|
||||||
in the \fBpcre2pattern\fP documentation. A default is defined when PCRE2 is
|
in the \fBpcre2pattern\fP documentation. A default is defined when PCRE2 is
|
||||||
built.
|
built.
|
||||||
.sp
|
.sp
|
||||||
PCRE2_FIRSTLINE
|
PCRE2_FIRSTLINE
|
||||||
|
@ -1067,7 +1067,7 @@ If this option is set, a back reference to an unset subpattern group matches an
|
||||||
empty string (by default this causes the current matching alternative to fail).
|
empty string (by default this causes the current matching alternative to fail).
|
||||||
A pattern such as (\e1)(a) succeeds when this option is set (assuming it can
|
A pattern such as (\e1)(a) succeeds when this option is set (assuming it can
|
||||||
find an "a" in the subject), whereas it fails by default, for Perl
|
find an "a" in the subject), whereas it fails by default, for Perl
|
||||||
compatibility. Setting this option makes PCRE2 behave more like ECMAscript (aka
|
compatibility. Setting this option makes PCRE2 behave more like ECMAscript (aka
|
||||||
JavaScript).
|
JavaScript).
|
||||||
.sp
|
.sp
|
||||||
PCRE2_MULTILINE
|
PCRE2_MULTILINE
|
||||||
|
@ -1091,10 +1091,10 @@ occurrences of ^ or $ in a pattern, setting PCRE2_MULTILINE has no effect.
|
||||||
PCRE2_NEVER_UCP
|
PCRE2_NEVER_UCP
|
||||||
.sp
|
.sp
|
||||||
This option locks out the use of Unicode properties for handling \eB, \eb, \eD,
|
This option locks out the use of Unicode properties for handling \eB, \eb, \eD,
|
||||||
\ed, \eS, \es, \eW, \ew, and some of the POSIX character classes, as described
|
\ed, \eS, \es, \eW, \ew, and some of the POSIX character classes, as described
|
||||||
for the PCRE2_UCP option below. In particular, it prevents the creator of the
|
for the PCRE2_UCP option below. In particular, it prevents the creator of the
|
||||||
pattern from enabling this facility by starting the pattern with (*UCP). This
|
pattern from enabling this facility by starting the pattern with (*UCP). This
|
||||||
may be useful in applications that process patterns from external sources. The
|
may be useful in applications that process patterns from external sources. The
|
||||||
option combination PCRE_UCP and PCRE_NEVER_UCP causes an error.
|
option combination PCRE_UCP and PCRE_NEVER_UCP causes an error.
|
||||||
.sp
|
.sp
|
||||||
PCRE2_NEVER_UTF
|
PCRE2_NEVER_UTF
|
||||||
|
@ -1167,7 +1167,7 @@ pattern
|
||||||
(*MARK:A)(X|Y)
|
(*MARK:A)(X|Y)
|
||||||
.sp
|
.sp
|
||||||
The minimum length for a match is one character. If the subject is "ABC", there
|
The minimum length for a match is one character. If the subject is "ABC", there
|
||||||
will be attempts to match "ABC", "BC", and "C". An attempt to match an empty
|
will be attempts to match "ABC", "BC", and "C". An attempt to match an empty
|
||||||
string at the end of the subject does not take place, because PCRE2 knows that
|
string at the end of the subject does not take place, because PCRE2 knows that
|
||||||
the subject is now too short, and so the (*MARK) is never encountered. In this
|
the subject is now too short, and so the (*MARK) is never encountered. In this
|
||||||
case, the optimization does not affect the overall match result, which is still
|
case, the optimization does not affect the overall match result, which is still
|
||||||
|
@ -1194,7 +1194,7 @@ in the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
\fBpcre2unicode\fP
|
\fBpcre2unicode\fP
|
||||||
.\"
|
.\"
|
||||||
document.
|
document.
|
||||||
If an invalid UTF sequence is found, \fBpcre2_compile()\fP returns a negative
|
If an invalid UTF sequence is found, \fBpcre2_compile()\fP returns a negative
|
||||||
error code.
|
error code.
|
||||||
.P
|
.P
|
||||||
|
@ -1385,9 +1385,9 @@ The possible values for the second argument are defined in \fBpcre2.h\fP, and
|
||||||
are as follows:
|
are as follows:
|
||||||
.sp
|
.sp
|
||||||
PCRE2_INFO_ALLOPTIONS
|
PCRE2_INFO_ALLOPTIONS
|
||||||
PCRE2_INFO_ARGOPTIONS
|
PCRE2_INFO_ARGOPTIONS
|
||||||
.sp
|
.sp
|
||||||
Return a copy of the pattern's options. The third argument should point to a
|
Return a copy of the pattern's options. The third argument should point to a
|
||||||
\fBuint32_t\fP variable. PCRE2_INFO_ARGOPTIONS returns exactly the options that
|
\fBuint32_t\fP variable. PCRE2_INFO_ARGOPTIONS returns exactly the options that
|
||||||
were passed to \fBpcre2_compile()\fP, whereas PCRE2_INFO_ALLOPTIONS returns
|
were passed to \fBpcre2_compile()\fP, whereas PCRE2_INFO_ALLOPTIONS returns
|
||||||
the compile options as modified by any top-level option settings at the start
|
the compile options as modified by any top-level option settings at the start
|
||||||
|
@ -1406,7 +1406,7 @@ alternatives begin with one of the following:
|
||||||
.* if PCRE2_DOTALL is set and there are no back
|
.* if PCRE2_DOTALL is set and there are no back
|
||||||
references to the subpattern in which .* appears
|
references to the subpattern in which .* appears
|
||||||
.sp
|
.sp
|
||||||
For such patterns, the PCRE2_ANCHORED bit is set in the options returned for
|
For such patterns, the PCRE2_ANCHORED bit is set in the options returned for
|
||||||
PCRE2_INFO_ALLOPTIONS.
|
PCRE2_INFO_ALLOPTIONS.
|
||||||
.sp
|
.sp
|
||||||
PCRE2_INFO_BACKREFMAX
|
PCRE2_INFO_BACKREFMAX
|
||||||
|
@ -1490,7 +1490,7 @@ return zero. The third argument should point to a \fBsize_t\fP variable.
|
||||||
.sp
|
.sp
|
||||||
Returns 1 if there is a rightmost literal code unit that must exist in any
|
Returns 1 if there is a rightmost literal code unit that must exist in any
|
||||||
matched string, other than at its start. The third argument should point to an
|
matched string, other than at its start. The third argument should point to an
|
||||||
\fBuint32_t\fP variable. If there is no such value, 0 is returned. When 1 is
|
\fBuint32_t\fP variable. If there is no such value, 0 is returned. When 1 is
|
||||||
returned, the code unit value itself can be retrieved using
|
returned, the code unit value itself can be retrieved using
|
||||||
PCRE2_INFO_LASTCODEUNIT.
|
PCRE2_INFO_LASTCODEUNIT.
|
||||||
.P
|
.P
|
||||||
|
@ -1617,7 +1617,7 @@ values are:
|
||||||
3 Carriage return, linefeed (CRLF)
|
3 Carriage return, linefeed (CRLF)
|
||||||
4 Any Unicode line ending
|
4 Any Unicode line ending
|
||||||
5 Any of CR, LF, or CRLF
|
5 Any of CR, LF, or CRLF
|
||||||
.sp
|
.sp
|
||||||
The default can be overridden when a pattern is matched.
|
The default can be overridden when a pattern is matched.
|
||||||
.sp
|
.sp
|
||||||
PCRE2_INFO_RECURSIONLIMIT
|
PCRE2_INFO_RECURSIONLIMIT
|
||||||
|
@ -1652,11 +1652,11 @@ pattern with the JIT compiler does not alter the value returned by this option.
|
||||||
.B void pcre2_match_data_free(pcre2_match_data *\fImatch_data\fP);
|
.B void pcre2_match_data_free(pcre2_match_data *\fImatch_data\fP);
|
||||||
.fi
|
.fi
|
||||||
.P
|
.P
|
||||||
Information about successful and unsuccessful matches is placed in a match
|
Information about successful and unsuccessful matches is placed in a match
|
||||||
data block, which is an opaque structure that is accessed by function calls. In
|
data block, which is an opaque structure that is accessed by function calls. In
|
||||||
particular, the match data block contains a vector of offsets into the subject
|
particular, the match data block contains a vector of offsets into the subject
|
||||||
string that define the matched part of the subject and any substrings that were
|
string that define the matched part of the subject and any substrings that were
|
||||||
capured. This is know as the \fIovector\fP.
|
capured. This is know as the \fIovector\fP.
|
||||||
.P
|
.P
|
||||||
Before calling \fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP you must create a
|
Before calling \fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP you must create a
|
||||||
match data block by calling one of the creation functions above. For
|
match data block by calling one of the creation functions above. For
|
||||||
|
@ -1669,11 +1669,11 @@ pair is imposed by \fBpcre2_match_data_create()\fP, so it is always possible to
|
||||||
return the overall matched string.
|
return the overall matched string.
|
||||||
.P
|
.P
|
||||||
For \fBpcre2_match_data_create_from_pattern()\fP, the first argument is a
|
For \fBpcre2_match_data_create_from_pattern()\fP, the first argument is a
|
||||||
pointer to a compiled pattern. In this case the ovector is created to be
|
pointer to a compiled pattern. In this case the ovector is created to be
|
||||||
exactly the right size to hold all the substrings a pattern might capture.
|
exactly the right size to hold all the substrings a pattern might capture.
|
||||||
.P
|
.P
|
||||||
The second argument of both these functions ia a pointer to a general context,
|
The second argument of both these functions ia a pointer to a general context,
|
||||||
which can specify custom memory management for obtaining the memory for the
|
which can specify custom memory management for obtaining the memory for the
|
||||||
match data block. If you are not using custom memory management, pass NULL.
|
match data block. If you are not using custom memory management, pass NULL.
|
||||||
.P
|
.P
|
||||||
A match data block can be used many times, with the same or different compiled
|
A match data block can be used many times, with the same or different compiled
|
||||||
|
@ -1729,8 +1729,8 @@ Here is an example of a simple call to \fBpcre2_match()\fP:
|
||||||
match_data, /* the match data block */
|
match_data, /* the match data block */
|
||||||
NULL); /* a match context; NULL means use defaults */
|
NULL); /* a match context; NULL means use defaults */
|
||||||
.sp
|
.sp
|
||||||
If the subject string is zero-terminated, the length can be given as
|
If the subject string is zero-terminated, the length can be given as
|
||||||
PCRE2_ZERO_TERMINATED. A match context must be provided if certain less common
|
PCRE2_ZERO_TERMINATED. A match context must be provided if certain less common
|
||||||
matching parameters are to be changed. For details, see the section on
|
matching parameters are to be changed. For details, see the section on
|
||||||
.\" HTML <a href="#matchcontext">
|
.\" HTML <a href="#matchcontext">
|
||||||
.\" </a>
|
.\" </a>
|
||||||
|
@ -1746,7 +1746,7 @@ The subject string is passed to \fBpcre2_match()\fP as a pointer in
|
||||||
\fIsubject\fP, a length in \fIlength\fP, and a starting offset in
|
\fIsubject\fP, a length in \fIlength\fP, and a starting offset in
|
||||||
\fIstartoffset\fP. The length and offset are in code units, not characters.
|
\fIstartoffset\fP. The length and offset are in code units, not characters.
|
||||||
That is, they are in bytes for the 8-bit library, 16-bit code units for the
|
That is, they are in bytes for the 8-bit library, 16-bit code units for the
|
||||||
16-bit library, and 32-bit code units for the 32-bit library, whether or not
|
16-bit library, and 32-bit code units for the 32-bit library, whether or not
|
||||||
UTF processing is enabled.
|
UTF processing is enabled.
|
||||||
.P
|
.P
|
||||||
If \fIstartoffset\fP is greater than the length of the subject,
|
If \fIstartoffset\fP is greater than the length of the subject,
|
||||||
|
@ -1755,7 +1755,7 @@ zero, the search for a match starts at the beginning of the subject, and this
|
||||||
is by far the most common case. In UTF-8 or UTF-16 mode, the starting offset
|
is by far the most common case. In UTF-8 or UTF-16 mode, the starting offset
|
||||||
must point to the start of a character, or to the end of the subject (in UTF-32
|
must point to the start of a character, or to the end of the subject (in UTF-32
|
||||||
mode, one code unit equals one character, so all offsets are valid). Like the
|
mode, one code unit equals one character, so all offsets are valid). Like the
|
||||||
pattern string, the subject may contain binary zeroes.
|
pattern string, the subject may contain binary zeroes.
|
||||||
.P
|
.P
|
||||||
A non-zero starting offset is useful when searching for another match in the
|
A non-zero starting offset is useful when searching for another match in the
|
||||||
same subject by calling \fBpcre2_match()\fP again after a previous success.
|
same subject by calling \fBpcre2_match()\fP again after a previous success.
|
||||||
|
@ -1816,7 +1816,7 @@ JIT matching is disabled and the normal interpretive code in
|
||||||
The PCRE2_ANCHORED option limits \fBpcre2_match()\fP to matching at the first
|
The PCRE2_ANCHORED option limits \fBpcre2_match()\fP to matching at the first
|
||||||
matching position. If a pattern was compiled with PCRE2_ANCHORED, or turned out
|
matching position. If a pattern was compiled with PCRE2_ANCHORED, or turned out
|
||||||
to be anchored by virtue of its contents, it cannot be made unachored at
|
to be anchored by virtue of its contents, it cannot be made unachored at
|
||||||
matching time. Note that setting the option at match time disables JIT
|
matching time. Note that setting the option at match time disables JIT
|
||||||
matching.
|
matching.
|
||||||
.sp
|
.sp
|
||||||
PCRE2_NOTBOL
|
PCRE2_NOTBOL
|
||||||
|
@ -1880,13 +1880,13 @@ in the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
\fBpcre2unicode\fP
|
\fBpcre2unicode\fP
|
||||||
.\"
|
.\"
|
||||||
page.
|
page.
|
||||||
.P
|
.P
|
||||||
If you know that your subject is valid, and you want to skip these checks for
|
If you know that your subject is valid, and you want to skip these checks for
|
||||||
performance reasons, you can set the PCRE2_NO_UTF_CHECK option when calling
|
performance reasons, you can set the PCRE2_NO_UTF_CHECK option when calling
|
||||||
\fBpcre2_match()\fP. You might want to do this for the second and subsequent
|
\fBpcre2_match()\fP. You might want to do this for the second and subsequent
|
||||||
calls to \fBpcre2_match()\fP if you are making repeated calls to find all the
|
calls to \fBpcre2_match()\fP if you are making repeated calls to find all the
|
||||||
matches in a single subject string.
|
matches in a single subject string.
|
||||||
.P
|
.P
|
||||||
NOTE: When PCRE2_NO_UTF_CHECK is set, the effect of passing an invalid string
|
NOTE: When PCRE2_NO_UTF_CHECK is set, the effect of passing an invalid string
|
||||||
as a subject, or an invalid value of \fIstartoffset\fP, is undefined. Your
|
as a subject, or an invalid value of \fIstartoffset\fP, is undefined. Your
|
||||||
|
@ -1921,10 +1921,10 @@ documentation.
|
||||||
.
|
.
|
||||||
.SH "NEWLINE HANDLING WHEN MATCHING"
|
.SH "NEWLINE HANDLING WHEN MATCHING"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
When PCRE2 is built, a default newline convention is set; this is usually the
|
When PCRE2 is built, a default newline convention is set; this is usually the
|
||||||
standard convention for the operating system. The default can be overridden in
|
standard convention for the operating system. The default can be overridden in
|
||||||
either a
|
either a
|
||||||
.\" HTML <a href="#compilecontext">
|
.\" HTML <a href="#compilecontext">
|
||||||
.\" </a>
|
.\" </a>
|
||||||
compile context
|
compile context
|
||||||
|
@ -1972,7 +1972,7 @@ valid newline sequence and explicit \er or \en escapes appear in the pattern.
|
||||||
.fi
|
.fi
|
||||||
.P
|
.P
|
||||||
In general, a pattern matches a certain portion of the subject, and in
|
In general, a pattern matches a certain portion of the subject, and in
|
||||||
addition, further substrings from the subject may be picked out by
|
addition, further substrings from the subject may be picked out by
|
||||||
parenthesized parts of the pattern. Following the usage in Jeffrey Friedl's
|
parenthesized parts of the pattern. Following the usage in Jeffrey Friedl's
|
||||||
book, this is called "capturing" in what follows, and the phrase "capturing
|
book, this is called "capturing" in what follows, and the phrase "capturing
|
||||||
subpattern" is used for a fragment of a pattern that picks out a substring.
|
subpattern" is used for a fragment of a pattern that picks out a substring.
|
||||||
|
@ -1982,14 +1982,14 @@ used to find out how many capturing subpatterns there are in a compiled
|
||||||
pattern.
|
pattern.
|
||||||
.P
|
.P
|
||||||
The overall matched string and any captured substrings are returned to the
|
The overall matched string and any captured substrings are returned to the
|
||||||
caller via a vector of PCRE2_SIZE values, called the \fBovector\fP. This is
|
caller via a vector of PCRE2_SIZE values, called the \fBovector\fP. This is
|
||||||
contained within the
|
contained within the
|
||||||
.\" HTML <a href="#matchdatablock">
|
.\" HTML <a href="#matchdatablock">
|
||||||
.\" </a>
|
.\" </a>
|
||||||
match data block.
|
match data block.
|
||||||
.\"
|
.\"
|
||||||
You can obtain direct access to the ovector by calling
|
You can obtain direct access to the ovector by calling
|
||||||
\fBpcre2_get_ovector_pointer()\fP to find its address, and
|
\fBpcre2_get_ovector_pointer()\fP to find its address, and
|
||||||
\fBpcre2_get_ovector_count()\fP to find the number of pairs of values it
|
\fBpcre2_get_ovector_count()\fP to find the number of pairs of values it
|
||||||
contains. Alternatively, you can use the auxiliary functions for accessing
|
contains. Alternatively, you can use the auxiliary functions for accessing
|
||||||
captured substrings
|
captured substrings
|
||||||
|
@ -2065,17 +2065,17 @@ had.
|
||||||
.B PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *\fImatch_data\fP);
|
.B PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *\fImatch_data\fP);
|
||||||
.fi
|
.fi
|
||||||
.P
|
.P
|
||||||
In addition to the offsets in the ovector, other information about a match is
|
In addition to the offsets in the ovector, other information about a match is
|
||||||
retained in the match data block and can be retrieved by the above functions.
|
retained in the match data block and can be retrieved by the above functions.
|
||||||
.P
|
.P
|
||||||
When a (*MARK) name is to be passed back, \fBpcre2_get_mark()\fP returns a
|
When a (*MARK) name is to be passed back, \fBpcre2_get_mark()\fP returns a
|
||||||
pointer to the zero-terminated name, which is within the compiled pattern.
|
pointer to the zero-terminated name, which is within the compiled pattern.
|
||||||
Otherwise NULL is returned. A (*MARK) name may be available after a failed
|
Otherwise NULL is returned. A (*MARK) name may be available after a failed
|
||||||
match or a partial match, as well as after a successful one.
|
match or a partial match, as well as after a successful one.
|
||||||
.P
|
.P
|
||||||
The offset of the character at which the successful match started is
|
The offset of the character at which the successful match started is
|
||||||
returned by \fBpcre2_get_startchar()\fP. This can be different to the value of
|
returned by \fBpcre2_get_startchar()\fP. This can be different to the value of
|
||||||
\fIovector[0]\fP if the pattern contains the \eK escape sequence. Note,
|
\fIovector[0]\fP if the pattern contains the \eK escape sequence. Note,
|
||||||
however, the \eK has no effect for a partial match.
|
however, the \eK has no effect for a partial match.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
@ -2083,7 +2083,7 @@ however, the \eK has no effect for a partial match.
|
||||||
.SS "Error return values from \fBpcre2_match()\fP"
|
.SS "Error return values from \fBpcre2_match()\fP"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
If \fBpcre2_match()\fP fails, it returns a negative number. This can be
|
If \fBpcre2_match()\fP fails, it returns a negative number. This can be
|
||||||
converted to a text string by calling \fBpcre2_get_error_message()\fP. Negative
|
converted to a text string by calling \fBpcre2_get_error_message()\fP. Negative
|
||||||
error codes are also returned by other functions, and are documented with them.
|
error codes are also returned by other functions, and are documented with them.
|
||||||
The codes are given names in the header file. If UTF checking is in force and
|
The codes are given names in the header file. If UTF checking is in force and
|
||||||
|
@ -2237,7 +2237,7 @@ extracting it by calling \fBpcre2_substring_length_bynumber()\fP. The first
|
||||||
argument is a pointer to the match data block, the second is the group number,
|
argument is a pointer to the match data block, the second is the group number,
|
||||||
and the third is a pointer to a variable into which the length is placed.
|
and the third is a pointer to a variable into which the length is placed.
|
||||||
.P
|
.P
|
||||||
The \fBpcre2_substring_copy_bynumber()\fP function copies one string into a
|
The \fBpcre2_substring_copy_bynumber()\fP function copies one string into a
|
||||||
supplied buffer, whereas \fBpcre2_substring_get_bynumber()\fP copies it into
|
supplied buffer, whereas \fBpcre2_substring_get_bynumber()\fP copies it into
|
||||||
new memory, obtained using the same memory allocation function that was used
|
new memory, obtained using the same memory allocation function that was used
|
||||||
for the match data block. The first two arguments of these functions are a
|
for the match data block. The first two arguments of these functions are a
|
||||||
|
@ -2250,10 +2250,10 @@ the buffer and a pointer to a variable that contains its length in code units.
|
||||||
This is updated to contain the actual number of code units used, excluding the
|
This is updated to contain the actual number of code units used, excluding the
|
||||||
terminating zero.
|
terminating zero.
|
||||||
.P
|
.P
|
||||||
For \fBpcre2_substring_get_bynumber()\fP the third and fourth arguments point
|
For \fBpcre2_substring_get_bynumber()\fP the third and fourth arguments point
|
||||||
to variables that are updated with a pointer to the new memory and the number
|
to variables that are updated with a pointer to the new memory and the number
|
||||||
of code units that comprise the substring, again excluding the terminating
|
of code units that comprise the substring, again excluding the terminating
|
||||||
zero. When the substring is no longer needed, the memory should be freed by
|
zero. When the substring is no longer needed, the memory should be freed by
|
||||||
calling \fBpcre2_substring_free()\fP.
|
calling \fBpcre2_substring_free()\fP.
|
||||||
.P
|
.P
|
||||||
The return value from these functions is zero for success, or one of these
|
The return value from these functions is zero for success, or one of these
|
||||||
|
@ -2266,9 +2266,9 @@ attempt to get memory failed for \fBpcre2_substring_get_bynumber()\fP.
|
||||||
.sp
|
.sp
|
||||||
PCRE2_ERROR_NOSUBSTRING
|
PCRE2_ERROR_NOSUBSTRING
|
||||||
.sp
|
.sp
|
||||||
No substring with the given number was captured. This could be because there is
|
No substring with the given number was captured. This could be because there is
|
||||||
no capturing group of that number in the pattern, or because the group with
|
no capturing group of that number in the pattern, or because the group with
|
||||||
that number did not participate in the match, or because the ovector was too
|
that number did not participate in the match, or because the ovector was too
|
||||||
small to capture that group.
|
small to capture that group.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
@ -2284,7 +2284,7 @@ small to capture that group.
|
||||||
.P
|
.P
|
||||||
The \fBpcre2_substring_list_get()\fP function extracts all available substrings
|
The \fBpcre2_substring_list_get()\fP function extracts all available substrings
|
||||||
and builds a list of pointers to them, and a second list that contains their
|
and builds a list of pointers to them, and a second list that contains their
|
||||||
lengths (in code units), excluding a terminating zero that is added to each of
|
lengths (in code units), excluding a terminating zero that is added to each of
|
||||||
them. All this is done in a single block of memory that is obtained using the
|
them. All this is done in a single block of memory that is obtained using the
|
||||||
same memory allocation function that was used to get the match data block.
|
same memory allocation function that was used to get the match data block.
|
||||||
.P
|
.P
|
||||||
|
@ -2295,7 +2295,7 @@ NULL pointer. The address of the list of lengths is returned via
|
||||||
therefore need the lengths, you may supply NULL as the \fBlengthsptr\fP
|
therefore need the lengths, you may supply NULL as the \fBlengthsptr\fP
|
||||||
argument to disable the creation of a list of lengths. The yield of the
|
argument to disable the creation of a list of lengths. The yield of the
|
||||||
function is zero if all went well, or PCRE2_ERROR_NOMEMORY if the memory block
|
function is zero if all went well, or PCRE2_ERROR_NOMEMORY if the memory block
|
||||||
could not be obtained. When the list is no longer needed, it should be freed by
|
could not be obtained. When the list is no longer needed, it should be freed by
|
||||||
calling \fBpcre2_substring_list_free()\fP.
|
calling \fBpcre2_substring_list_free()\fP.
|
||||||
.P
|
.P
|
||||||
If this function encounters a substring that is unset, which can happen when
|
If this function encounters a substring that is unset, which can happen when
|
||||||
|
@ -2340,7 +2340,7 @@ name.
|
||||||
.P
|
.P
|
||||||
Given the number, you can extract the substring directly, or use one of the
|
Given the number, you can extract the substring directly, or use one of the
|
||||||
functions described in the previous section. For convenience, there are also
|
functions described in the previous section. For convenience, there are also
|
||||||
"byname" functions that correspond to the "bynumber" functions, the only
|
"byname" functions that correspond to the "bynumber" functions, the only
|
||||||
difference being that the second argument is a name instead of a number.
|
difference being that the second argument is a name instead of a number.
|
||||||
However, if PCRE2_DUPNAMES is set and there are duplicate names,
|
However, if PCRE2_DUPNAMES is set and there are duplicate names,
|
||||||
the behaviour may not be what you want (see the next section).
|
the behaviour may not be what you want (see the next section).
|
||||||
|
@ -2413,7 +2413,7 @@ numbers, and hence the captured data.
|
||||||
.sp
|
.sp
|
||||||
The traditional matching function uses a similar algorithm to Perl, which stops
|
The traditional matching function uses a similar algorithm to Perl, which stops
|
||||||
when it finds the first match, starting at a given point in the subject. If you
|
when it finds the first match, starting at a given point in the subject. If you
|
||||||
want to find all possible matches, or the longest possible match at a given
|
want to find all possible matches, or the longest possible match at a given
|
||||||
position, consider using the alternative matching function (see below) instead.
|
position, consider using the alternative matching function (see below) instead.
|
||||||
If you cannot use the alternative function, you can kludge it up by making use
|
If you cannot use the alternative function, you can kludge it up by making use
|
||||||
of the callout facility, which is described in the
|
of the callout facility, which is described in the
|
||||||
|
@ -2614,8 +2614,8 @@ fail, this error is given.
|
||||||
.SH "SEE ALSO"
|
.SH "SEE ALSO"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
\fBpcre2build\fP(3), \fBpcre2libs\fP(3), \fBpcre2callout\fP(3),
|
\fBpcre2build\fP(3), \fBpcre2libs\fP(3), \fBpcre2callout\fP(3),
|
||||||
\fBpcre2matching\fP(3), \fBpcre2partial\fP(3), \fBpcre2posix\fP(3),
|
\fBpcre2matching\fP(3), \fBpcre2partial\fP(3), \fBpcre2posix\fP(3),
|
||||||
\fBpcre2demo(3)\fP, \fBpcre2sample\fP(3), \fBpcre2stack\fP(3).
|
\fBpcre2demo(3)\fP, \fBpcre2sample\fP(3), \fBpcre2stack\fP(3).
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
|
|
@ -71,11 +71,11 @@ single-byte characters, or UTF-8 strings. You can also build two other
|
||||||
libraries, called \fBlibpcre2-16\fP and \fBlibpcre2-32\fP, which process
|
libraries, called \fBlibpcre2-16\fP and \fBlibpcre2-32\fP, which process
|
||||||
strings that are contained in vectors of 16-bit and 32-bit code units,
|
strings that are contained in vectors of 16-bit and 32-bit code units,
|
||||||
respectively. These can be interpreted either as single-unit characters or
|
respectively. These can be interpreted either as single-unit characters or
|
||||||
UTF-16/UTF-32 strings. To build these additional libraries, add one or both of
|
UTF-16/UTF-32 strings. To build these additional libraries, add one or both of
|
||||||
the following to the \fBconfigure\fP command:
|
the following to the \fBconfigure\fP command:
|
||||||
.sp
|
.sp
|
||||||
--enable-pcre16
|
--enable-pcre16
|
||||||
--enable-pcre32
|
--enable-pcre32
|
||||||
.sp
|
.sp
|
||||||
If you do not want the 8-bit library, add
|
If you do not want the 8-bit library, add
|
||||||
.sp
|
.sp
|
||||||
|
@ -367,7 +367,7 @@ override this value by specifying a run-time option.
|
||||||
If you add one of
|
If you add one of
|
||||||
.sp
|
.sp
|
||||||
--enable-pcre2test-libreadline
|
--enable-pcre2test-libreadline
|
||||||
--enable-pcre2test-libedit
|
--enable-pcre2test-libedit
|
||||||
.sp
|
.sp
|
||||||
to the \fBconfigure\fP command, \fBpcre2test\fP is linked with the
|
to the \fBconfigure\fP command, \fBpcre2test\fP is linked with the
|
||||||
\fBlibreadline\fP or\fBlibedit\fP library, respectively, and when its input is
|
\fBlibreadline\fP or\fBlibedit\fP library, respectively, and when its input is
|
||||||
|
@ -384,8 +384,8 @@ unmodified distribution version of readline is in use), some extra
|
||||||
configuration may be necessary. The INSTALL file for \fBlibreadline\fP says
|
configuration may be necessary. The INSTALL file for \fBlibreadline\fP says
|
||||||
this:
|
this:
|
||||||
.sp
|
.sp
|
||||||
"Readline uses the termcap functions, but does not link with
|
"Readline uses the termcap functions, but does not link with
|
||||||
the termcap or curses library itself, allowing applications
|
the termcap or curses library itself, allowing applications
|
||||||
which link with readline the to choose an appropriate library."
|
which link with readline the to choose an appropriate library."
|
||||||
.sp
|
.sp
|
||||||
If your environment has not been set up so that an appropriate library is
|
If your environment has not been set up so that an appropriate library is
|
||||||
|
|
|
@ -16,9 +16,9 @@ PCRE2 provides a feature called "callout", which is a means of temporarily
|
||||||
passing control to the caller of PCRE2 in the middle of pattern matching. The
|
passing control to the caller of PCRE2 in the middle of pattern matching. The
|
||||||
caller of PCRE2 provides an external function by putting its entry point in
|
caller of PCRE2 provides an external function by putting its entry point in
|
||||||
a match context (see \fBpcre2_set_callout()\fP) in the
|
a match context (see \fBpcre2_set_callout()\fP) in the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
\fBpcre2api\fP
|
\fBpcre2api\fP
|
||||||
.\"
|
.\"
|
||||||
documentation).
|
documentation).
|
||||||
.P
|
.P
|
||||||
Within a regular expression, (?C) indicates the points at which the external
|
Within a regular expression, (?C) indicates the points at which the external
|
||||||
|
|
|
@ -25,7 +25,7 @@ pcre2sample documentation for a short discussion ("man pcre2sample" if you have
|
||||||
the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is
|
the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is
|
||||||
incompatible with the original PCRE API.
|
incompatible with the original PCRE API.
|
||||||
|
|
||||||
There are actually three libraries, each supporting a different code unit
|
There are actually three libraries, each supporting a different code unit
|
||||||
width. This demonstration program uses the 8-bit library.
|
width. This demonstration program uses the 8-bit library.
|
||||||
|
|
||||||
In Unix-like environments, if PCRE2 is installed in your standard system
|
In Unix-like environments, if PCRE2 is installed in your standard system
|
||||||
|
@ -56,8 +56,8 @@ the following line. */
|
||||||
|
|
||||||
/* #define PCRE2_STATIC */
|
/* #define PCRE2_STATIC */
|
||||||
|
|
||||||
/* This macro must be defined before including pcre2.h. For a program that uses
|
/* This macro must be defined before including pcre2.h. For a program that uses
|
||||||
only one code unit width, it makes it possible to use generic function names
|
only one code unit width, it makes it possible to use generic function names
|
||||||
such as pcre2_compile(). */
|
such as pcre2_compile(). */
|
||||||
|
|
||||||
#define PCRE2_CODE_UNIT_WIDTH 8
|
#define PCRE2_CODE_UNIT_WIDTH 8
|
||||||
|
@ -141,7 +141,7 @@ subject_length = strlen((char *)subject);
|
||||||
|
|
||||||
re = pcre2_compile(
|
re = pcre2_compile(
|
||||||
pattern, /* the pattern */
|
pattern, /* the pattern */
|
||||||
PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
|
PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
|
||||||
0, /* default options */
|
0, /* default options */
|
||||||
&errornumber, /* for error number */
|
&errornumber, /* for error number */
|
||||||
&erroroffset, /* for error offset */
|
&erroroffset, /* for error offset */
|
||||||
|
@ -151,9 +151,9 @@ re = pcre2_compile(
|
||||||
|
|
||||||
if (re == NULL)
|
if (re == NULL)
|
||||||
{
|
{
|
||||||
PCRE2_UCHAR buffer[256];
|
PCRE2_UCHAR buffer[256];
|
||||||
pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
|
pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
|
||||||
printf("PCRE2 compilation failed at offset %d: %s\en", (int)erroroffset,
|
printf("PCRE2 compilation failed at offset %d: %s\en", (int)erroroffset,
|
||||||
buffer);
|
buffer);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -197,7 +197,7 @@ if (rc < 0)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Match succeded. Get a pointer to the output vector, where string offsets are
|
/* Match succeded. Get a pointer to the output vector, where string offsets are
|
||||||
stored. */
|
stored. */
|
||||||
|
|
||||||
ovector = pcre2_get_ovector_pointer(match_data);
|
ovector = pcre2_get_ovector_pointer(match_data);
|
||||||
|
@ -210,7 +210,7 @@ printf("\enMatch succeeded at offset %d\en", (int)ovector[0]);
|
||||||
* captured. *
|
* captured. *
|
||||||
*************************************************************************/
|
*************************************************************************/
|
||||||
|
|
||||||
/* The output vector wasn't big enough. This should not happen, because we used
|
/* The output vector wasn't big enough. This should not happen, because we used
|
||||||
pcre2_match_data_create_from_pattern() above. */
|
pcre2_match_data_create_from_pattern() above. */
|
||||||
|
|
||||||
if (rc == 0)
|
if (rc == 0)
|
||||||
|
@ -261,7 +261,7 @@ if (namecount <= 0) printf("No named substrings\en"); else
|
||||||
&name_entry_size); /* where to put the answer */
|
&name_entry_size); /* where to put the answer */
|
||||||
|
|
||||||
/* Now we can scan the table and, for each entry, print the number, the name,
|
/* Now we can scan the table and, for each entry, print the number, the name,
|
||||||
and the substring itself. In the 8-bit library the number is held in two
|
and the substring itself. In the 8-bit library the number is held in two
|
||||||
bytes, most significant first. */
|
bytes, most significant first. */
|
||||||
|
|
||||||
tabptr = name_table;
|
tabptr = name_table;
|
||||||
|
@ -306,7 +306,7 @@ if (namecount <= 0) printf("No named substrings\en"); else
|
||||||
|
|
||||||
if (!find_all) /* Check for -g */
|
if (!find_all) /* Check for -g */
|
||||||
{
|
{
|
||||||
pcre2_match_data_free(match_data); /* Release the memory that was used */
|
pcre2_match_data_free(match_data); /* Release the memory that was used */
|
||||||
pcre2_code_free(re); /* for the match data and the pattern. */
|
pcre2_code_free(re); /* for the match data and the pattern. */
|
||||||
return 0; /* Exit the program. */
|
return 0; /* Exit the program. */
|
||||||
}
|
}
|
||||||
|
@ -324,7 +324,7 @@ sequence. */
|
||||||
(void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &newline);
|
(void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &newline);
|
||||||
crlf_is_newline = newline == PCRE2_NEWLINE_ANY ||
|
crlf_is_newline = newline == PCRE2_NEWLINE_ANY ||
|
||||||
newline == PCRE2_NEWLINE_CRLF ||
|
newline == PCRE2_NEWLINE_CRLF ||
|
||||||
newline == PCRE2_NEWLINE_ANYCRLF;
|
newline == PCRE2_NEWLINE_ANYCRLF;
|
||||||
|
|
||||||
/* Loop for second and subsequent matches */
|
/* Loop for second and subsequent matches */
|
||||||
|
|
||||||
|
|
|
@ -48,10 +48,10 @@ performance, there is also a "fast path" API that is JIT-specific.
|
||||||
.SH "SIMPLE USE OF JIT"
|
.SH "SIMPLE USE OF JIT"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
To make use of the JIT support in the simplest way, all you have to do is to
|
To make use of the JIT support in the simplest way, all you have to do is to
|
||||||
call \fBpcre2_jit_compile()\fP after successfully compiling a pattern with
|
call \fBpcre2_jit_compile()\fP after successfully compiling a pattern with
|
||||||
\fBpcre2_compile()\fP. This function has two arguments: the first is the
|
\fBpcre2_compile()\fP. This function has two arguments: the first is the
|
||||||
compiled pattern pointer that was returned by \fBpcre2_compile()\fP, and the
|
compiled pattern pointer that was returned by \fBpcre2_compile()\fP, and the
|
||||||
second is a set of option bits, which must include at least one of
|
second is a set of option bits, which must include at least one of
|
||||||
PCRE2_JIT_COMPLETE, PCRE2_JIT_PARTIAL_HARD, or PCRE2_JIT_PARTIAL_SOFT.
|
PCRE2_JIT_COMPLETE, PCRE2_JIT_PARTIAL_HARD, or PCRE2_JIT_PARTIAL_SOFT.
|
||||||
.P
|
.P
|
||||||
|
@ -221,7 +221,7 @@ non-default JIT stacks might operate:
|
||||||
.sp
|
.sp
|
||||||
All the functions described in this section do nothing if JIT is not available,
|
All the functions described in this section do nothing if JIT is not available,
|
||||||
and \fBpcre2_jit_stack_assign()\fP does nothing unless the \fBcode\fP argument
|
and \fBpcre2_jit_stack_assign()\fP does nothing unless the \fBcode\fP argument
|
||||||
is non-NULL and points to a \fBpcre2_code\fP block that has been successfully
|
is non-NULL and points to a \fBpcre2_code\fP block that has been successfully
|
||||||
processed by \fBpcre2_jit_compile()\fP.
|
processed by \fBpcre2_jit_compile()\fP.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
@ -302,18 +302,18 @@ callback.
|
||||||
.sp
|
.sp
|
||||||
int rc;
|
int rc;
|
||||||
pcre2_code *re;
|
pcre2_code *re;
|
||||||
pcre2_match_data *match_data;
|
pcre2_match_data *match_data;
|
||||||
pcre2_jit_stack *jit_stack;
|
pcre2_jit_stack *jit_stack;
|
||||||
.sp
|
.sp
|
||||||
re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0,
|
re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0,
|
||||||
&errornumber, &erroffset, NULL);
|
&errornumber, &erroffset, NULL);
|
||||||
/* Check for errors */
|
/* Check for errors */
|
||||||
rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
|
rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
|
||||||
/* Check for errors */
|
/* Check for errors */
|
||||||
jit_stack = pcre2_jit_stack_alloc(NULL, 32*1024, 512*1024);
|
jit_stack = pcre2_jit_stack_alloc(NULL, 32*1024, 512*1024);
|
||||||
/* Check for error (NULL) */
|
/* Check for error (NULL) */
|
||||||
pcre2_jit_stack_assign(re, NULL, jit_stack);
|
pcre2_jit_stack_assign(re, NULL, jit_stack);
|
||||||
match_data = pcre2_match_data_create(re, 10);
|
match_data = pcre2_match_data_create(re, 10);
|
||||||
rc = pcre2_match(re, subject, length, 0, 0, match_data, NULL);
|
rc = pcre2_match(re, subject, length, 0, 0, match_data, NULL);
|
||||||
/* Check results */
|
/* Check results */
|
||||||
pcre2_free(re);
|
pcre2_free(re);
|
||||||
|
|
|
@ -64,15 +64,15 @@ matched; without such a restriction there would always be a partial match of an
|
||||||
empty string at the end of the subject.
|
empty string at the end of the subject.
|
||||||
.P
|
.P
|
||||||
When a partial match is returned, the first two elements in the ovector point
|
When a partial match is returned, the first two elements in the ovector point
|
||||||
to the portion of the subject that was matched. The appearance of \eK in the
|
to the portion of the subject that was matched. The appearance of \eK in the
|
||||||
pattern has no effect for a partial match. Consider this pattern:
|
pattern has no effect for a partial match. Consider this pattern:
|
||||||
.sp
|
.sp
|
||||||
/abc\eK123/
|
/abc\eK123/
|
||||||
.sp
|
.sp
|
||||||
If it is matched against "456abc123xyz" the result is a complete match, and the
|
If it is matched against "456abc123xyz" the result is a complete match, and the
|
||||||
ovector defines the matched string as "123", because \eK resets the "start of
|
ovector defines the matched string as "123", because \eK resets the "start of
|
||||||
match" point. However, if a partial match is requested and the subject string
|
match" point. However, if a partial match is requested and the subject string
|
||||||
is "456abc12", a partial match is found for the string "abc12", because all
|
is "456abc12", a partial match is found for the string "abc12", because all
|
||||||
these characters are needed for a subsequent re-match with additional
|
these characters are needed for a subsequent re-match with additional
|
||||||
characters.
|
characters.
|
||||||
.P
|
.P
|
||||||
|
@ -316,14 +316,14 @@ same point as before.
|
||||||
For example, if the pattern "(?<=123)abc" is partially matched against the
|
For example, if the pattern "(?<=123)abc" is partially matched against the
|
||||||
string "xx123ab", the ovector offsets are 5 and 7 ("ab"). The maximum
|
string "xx123ab", the ovector offsets are 5 and 7 ("ab"). The maximum
|
||||||
lookbehind count is 3, so all characters before offset 2 can be discarded. The
|
lookbehind count is 3, so all characters before offset 2 can be discarded. The
|
||||||
value of \fBstartoffset\fP for the next match should be 3. When \fBpcre2test\fP
|
value of \fBstartoffset\fP for the next match should be 3. When \fBpcre2test\fP
|
||||||
displays a partial match, it indicates the lookbehind characters with '<'
|
displays a partial match, it indicates the lookbehind characters with '<'
|
||||||
characters:
|
characters:
|
||||||
.sp
|
.sp
|
||||||
re> "(?<=123)abc"
|
re> "(?<=123)abc"
|
||||||
data> xx123ab\e=ph
|
data> xx123ab\e=ph
|
||||||
Partial match: 123ab
|
Partial match: 123ab
|
||||||
<<<
|
<<<
|
||||||
.P
|
.P
|
||||||
3. Because a partial match must always contain at least one character, what
|
3. Because a partial match must always contain at least one character, what
|
||||||
might be considered a partial match of an empty string actually gives a "no
|
might be considered a partial match of an empty string actually gives a "no
|
||||||
|
|
|
@ -118,7 +118,7 @@ page has
|
||||||
.\" </a>
|
.\" </a>
|
||||||
further discussion
|
further discussion
|
||||||
.\"
|
.\"
|
||||||
about newlines, and shows how to set the newline convention when calling
|
about newlines, and shows how to set the newline convention when calling
|
||||||
\fBpcre2_compile()\fP.
|
\fBpcre2_compile()\fP.
|
||||||
.P
|
.P
|
||||||
It is also possible to specify a newline convention by starting a pattern
|
It is also possible to specify a newline convention by starting a pattern
|
||||||
|
@ -196,7 +196,7 @@ corresponding characters in the subject. As a trivial example, the pattern
|
||||||
.sp
|
.sp
|
||||||
matches a portion of a subject string that is identical to itself. When
|
matches a portion of a subject string that is identical to itself. When
|
||||||
caseless matching is specified (the PCRE2_CASELESS option), letters are matched
|
caseless matching is specified (the PCRE2_CASELESS option), letters are matched
|
||||||
independently of case.
|
independently of case.
|
||||||
.P
|
.P
|
||||||
The power of regular expressions comes from the ability to include alternatives
|
The power of regular expressions comes from the ability to include alternatives
|
||||||
and repetitions in the pattern. These are encoded in the pattern by the use of
|
and repetitions in the pattern. These are encoded in the pattern by the use of
|
||||||
|
@ -1199,8 +1199,8 @@ An opening square bracket introduces a character class, terminated by a closing
|
||||||
square bracket. A closing square bracket on its own is not special by default.
|
square bracket. A closing square bracket on its own is not special by default.
|
||||||
If a closing square bracket is required as a member of the class, it should be
|
If a closing square bracket is required as a member of the class, it should be
|
||||||
the first data character in the class (after an initial circumflex, if present)
|
the first data character in the class (after an initial circumflex, if present)
|
||||||
or escaped with a backslash. This means that, by default, an empty class cannot
|
or escaped with a backslash. This means that, by default, an empty class cannot
|
||||||
be defined. However, if the PCRE2_ALLOW_EMPTY_CLASS option is set, a closing
|
be defined. However, if the PCRE2_ALLOW_EMPTY_CLASS option is set, a closing
|
||||||
square bracket at the start does end the (empty) class.
|
square bracket at the start does end the (empty) class.
|
||||||
.P
|
.P
|
||||||
A character class matches a single character in the subject. A matched
|
A character class matches a single character in the subject. A matched
|
||||||
|
@ -1221,7 +1221,7 @@ string.
|
||||||
When caseless matching is set, any letters in a class represent both their
|
When caseless matching is set, any letters in a class represent both their
|
||||||
upper case and lower case versions, so for example, a caseless [aeiou] matches
|
upper case and lower case versions, so for example, a caseless [aeiou] matches
|
||||||
"A" as well as "a", and a caseless [^aeiou] does not match "A", whereas a
|
"A" as well as "a", and a caseless [^aeiou] does not match "A", whereas a
|
||||||
caseful version would.
|
caseful version would.
|
||||||
.P
|
.P
|
||||||
Characters that might indicate line breaks are never treated in any special way
|
Characters that might indicate line breaks are never treated in any special way
|
||||||
when matching character classes, whatever line-ending sequence is in use, and
|
when matching character classes, whatever line-ending sequence is in use, and
|
||||||
|
@ -1340,7 +1340,7 @@ classes by other sequences, as follows:
|
||||||
[:alnum:] becomes \ep{Xan}
|
[:alnum:] becomes \ep{Xan}
|
||||||
[:alpha:] becomes \ep{L}
|
[:alpha:] becomes \ep{L}
|
||||||
[:blank:] becomes \eh
|
[:blank:] becomes \eh
|
||||||
[:cntrl:] becomes \ep{Cc}
|
[:cntrl:] becomes \ep{Cc}
|
||||||
[:digit:] becomes \ep{Nd}
|
[:digit:] becomes \ep{Nd}
|
||||||
[:lower:] becomes \ep{Ll}
|
[:lower:] becomes \ep{Ll}
|
||||||
[:space:] becomes \ep{Xps}
|
[:space:] becomes \ep{Xps}
|
||||||
|
@ -1496,7 +1496,7 @@ match "cataract", "erpillar" or an empty string.
|
||||||
.sp
|
.sp
|
||||||
2. It sets up the subpattern as a capturing subpattern. This means that, when
|
2. It sets up the subpattern as a capturing subpattern. This means that, when
|
||||||
the whole pattern matches, the portion of the subject string that matched the
|
the whole pattern matches, the portion of the subject string that matched the
|
||||||
subpattern is passed back to the caller, separately from the portion that
|
subpattern is passed back to the caller, separately from the portion that
|
||||||
matched the whole pattern. (This applies only to the traditional matching
|
matched the whole pattern. (This applies only to the traditional matching
|
||||||
function; the DFA matching function does not support capturing.)
|
function; the DFA matching function does not support capturing.)
|
||||||
.P
|
.P
|
||||||
|
@ -1916,7 +1916,7 @@ at release 5.10.
|
||||||
PCRE2 has an optimization that automatically "possessifies" certain simple
|
PCRE2 has an optimization that automatically "possessifies" certain simple
|
||||||
pattern constructs. For example, the sequence A+B is treated as A++B because
|
pattern constructs. For example, the sequence A+B is treated as A++B because
|
||||||
there is no point in backtracking into a sequence of A's when B must follow.
|
there is no point in backtracking into a sequence of A's when B must follow.
|
||||||
This feature can be disabled by the PCRE2_NO_AUTOPOSSESS option, or starting
|
This feature can be disabled by the PCRE2_NO_AUTOPOSSESS option, or starting
|
||||||
the pattern with (*NO_AUTO_POSSESS).
|
the pattern with (*NO_AUTO_POSSESS).
|
||||||
.P
|
.P
|
||||||
When a pattern contains an unlimited repeat inside a subpattern that can itself
|
When a pattern contains an unlimited repeat inside a subpattern that can itself
|
||||||
|
@ -2238,7 +2238,7 @@ if the pattern is written as
|
||||||
.sp
|
.sp
|
||||||
^.*+(?<=abcd)
|
^.*+(?<=abcd)
|
||||||
.sp
|
.sp
|
||||||
there can be no backtracking for the .*+ item because of the possessive
|
there can be no backtracking for the .*+ item because of the possessive
|
||||||
quantifier; it can match only the entire string. The subsequent lookbehind
|
quantifier; it can match only the entire string. The subsequent lookbehind
|
||||||
assertion does a single test on the last four characters. If it fails, the
|
assertion does a single test on the last four characters. If it fails, the
|
||||||
match fails immediately. For long strings, this approach makes a significant
|
match fails immediately. For long strings, this approach makes a significant
|
||||||
|
@ -2754,8 +2754,8 @@ same pair of parentheses when there is a repetition.
|
||||||
.P
|
.P
|
||||||
PCRE2 provides a similar feature, but of course it cannot obey arbitrary Perl
|
PCRE2 provides a similar feature, but of course it cannot obey arbitrary Perl
|
||||||
code. The feature is called "callout". The caller of PCRE2 provides an external
|
code. The feature is called "callout". The caller of PCRE2 provides an external
|
||||||
function by putting its entry point in a match context using the function
|
function by putting its entry point in a match context using the function
|
||||||
\fBpcre2_set_callout()\fP and passing the context to \fBpcre2_match()\fP or
|
\fBpcre2_set_callout()\fP and passing the context to \fBpcre2_match()\fP or
|
||||||
\fBpcre2_dfa_match()\fP. If no match context is passed, or if the callout entry
|
\fBpcre2_dfa_match()\fP. If no match context is passed, or if the callout entry
|
||||||
point is set to NULL, callouts are disabled.
|
point is set to NULL, callouts are disabled.
|
||||||
.P
|
.P
|
||||||
|
@ -3008,7 +3008,7 @@ output from \fBpcre2test\fP:
|
||||||
re> /(*COMMIT)abc/
|
re> /(*COMMIT)abc/
|
||||||
data> xyzabc
|
data> xyzabc
|
||||||
0: abc
|
0: abc
|
||||||
data>
|
data>
|
||||||
re> /(*COMMIT)abc/no_start_optimize
|
re> /(*COMMIT)abc/no_start_optimize
|
||||||
data> xyzabc
|
data> xyzabc
|
||||||
No match
|
No match
|
||||||
|
@ -3035,7 +3035,7 @@ as (*COMMIT).
|
||||||
.P
|
.P
|
||||||
The behaviour of (*PRUNE:NAME) is the not the same as (*MARK:NAME)(*PRUNE).
|
The behaviour of (*PRUNE:NAME) is the not the same as (*MARK:NAME)(*PRUNE).
|
||||||
It is like (*MARK:NAME) in that the name is remembered for passing back to the
|
It is like (*MARK:NAME) in that the name is remembered for passing back to the
|
||||||
caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
|
caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
|
||||||
ignoring those set by (*PRUNE) or (*THEN).
|
ignoring those set by (*PRUNE) or (*THEN).
|
||||||
.sp
|
.sp
|
||||||
(*SKIP)
|
(*SKIP)
|
||||||
|
@ -3085,7 +3085,7 @@ group. If (*THEN) is not inside an alternation, it acts like (*PRUNE).
|
||||||
.P
|
.P
|
||||||
The behaviour of (*THEN:NAME) is the not the same as (*MARK:NAME)(*THEN).
|
The behaviour of (*THEN:NAME) is the not the same as (*MARK:NAME)(*THEN).
|
||||||
It is like (*MARK:NAME) in that the name is remembered for passing back to the
|
It is like (*MARK:NAME) in that the name is remembered for passing back to the
|
||||||
caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
|
caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
|
||||||
ignoring those set by (*PRUNE) and (*THEN).
|
ignoring those set by (*PRUNE) and (*THEN).
|
||||||
.P
|
.P
|
||||||
A subpattern that does not contain a | character is just a part of the
|
A subpattern that does not contain a | character is just a part of the
|
||||||
|
|
|
@ -90,10 +90,10 @@ of how to do this are given in the
|
||||||
\fBpcre2build\fP
|
\fBpcre2build\fP
|
||||||
.\"
|
.\"
|
||||||
documentation. When built in this way, instead of using the stack, PCRE2
|
documentation. When built in this way, instead of using the stack, PCRE2
|
||||||
gets memory for remembering backup points from the heap. By default, the memory
|
gets memory for remembering backup points from the heap. By default, the memory
|
||||||
is obtained by calling the system \fBmalloc()\fP function, but you can arrange
|
is obtained by calling the system \fBmalloc()\fP function, but you can arrange
|
||||||
to supply your own memory management function. For details, see the section
|
to supply your own memory management function. For details, see the section
|
||||||
entitled
|
entitled
|
||||||
.\" HTML <a href="pcre2api.html#matchcontext">
|
.\" HTML <a href="pcre2api.html#matchcontext">
|
||||||
.\" </a>
|
.\" </a>
|
||||||
"The match context"
|
"The match context"
|
||||||
|
@ -104,8 +104,8 @@ in the
|
||||||
.\"
|
.\"
|
||||||
documentation. Since the block sizes are always the same, it may be possible to
|
documentation. Since the block sizes are always the same, it may be possible to
|
||||||
implement customized a memory handler that is more efficient than the standard
|
implement customized a memory handler that is more efficient than the standard
|
||||||
function. The memory blocks obtained for this purpose are retained and re-used
|
function. The memory blocks obtained for this purpose are retained and re-used
|
||||||
if possible while \fBpcre2_match()\fP is running. They are all freed just
|
if possible while \fBpcre2_match()\fP is running. They are all freed just
|
||||||
before it exits.
|
before it exits.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
|
|
@ -387,7 +387,7 @@ appear.
|
||||||
(*LIMIT_MATCH=d) set the match limit to d (decimal number)
|
(*LIMIT_MATCH=d) set the match limit to d (decimal number)
|
||||||
(*LIMIT_RECURSION=d) set the recursion limit to d (decimal number)
|
(*LIMIT_RECURSION=d) set the recursion limit to d (decimal number)
|
||||||
(*NOTEMPTY) set PCRE2_NOTEMPTY when matching
|
(*NOTEMPTY) set PCRE2_NOTEMPTY when matching
|
||||||
(*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching
|
(*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching
|
||||||
(*NO_AUTO_POSSESS) no auto-possessification (PCRE2_NO_AUTO_POSSESS)
|
(*NO_AUTO_POSSESS) no auto-possessification (PCRE2_NO_AUTO_POSSESS)
|
||||||
(*NO_START_OPT) no start-match optimization (PCRE2_NO_START_OPTIMIZE)
|
(*NO_START_OPT) no start-match optimization (PCRE2_NO_START_OPTIMIZE)
|
||||||
(*UTF) set appropriate UTF mode for the library in use
|
(*UTF) set appropriate UTF mode for the library in use
|
||||||
|
|
|
@ -433,7 +433,7 @@ about the pattern:
|
||||||
/I info show info about compiled pattern
|
/I info show info about compiled pattern
|
||||||
hex pattern is coded in hexadecimal
|
hex pattern is coded in hexadecimal
|
||||||
jit[=<number>] use JIT
|
jit[=<number>] use JIT
|
||||||
jitverify verify JIT use
|
jitverify verify JIT use
|
||||||
locale=<name> use this locale
|
locale=<name> use this locale
|
||||||
memory show memory used
|
memory show memory used
|
||||||
newline=<type> set newline type
|
newline=<type> set newline type
|
||||||
|
@ -518,7 +518,7 @@ number in the range 0 to 7:
|
||||||
7 all three modes
|
7 all three modes
|
||||||
.sp
|
.sp
|
||||||
If no number is given, 7 is assumed. If JIT compilation is successful, the
|
If no number is given, 7 is assumed. If JIT compilation is successful, the
|
||||||
compiled JIT code will automatically be used when \fBpcre2_match()\fP is run
|
compiled JIT code will automatically be used when \fBpcre2_match()\fP is run
|
||||||
for the appropriate type of match, except when incompatible run-time options
|
for the appropriate type of match, except when incompatible run-time options
|
||||||
are specified. For more details, see the
|
are specified. For more details, see the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
|
@ -670,7 +670,7 @@ for a description of their effects.
|
||||||
partial_hard (or ph) set PCRE2_PARTIAL_HARD
|
partial_hard (or ph) set PCRE2_PARTIAL_HARD
|
||||||
partial_soft (or ps) set PCRE2_PARTIAL_SOFT
|
partial_soft (or ps) set PCRE2_PARTIAL_SOFT
|
||||||
.sp
|
.sp
|
||||||
The partial matching modifiers are provided with abbreviations because they
|
The partial matching modifiers are provided with abbreviations because they
|
||||||
appear frequently in tests.
|
appear frequently in tests.
|
||||||
.P
|
.P
|
||||||
If the \fB/posix\fP modifier was present on the pattern, causing the POSIX
|
If the \fB/posix\fP modifier was present on the pattern, causing the POSIX
|
||||||
|
@ -844,8 +844,8 @@ context via \fBpcre2_set_match_limit()\fP and \fBpcre2_set_recursion_limit()\fP
|
||||||
until it finds the minimum values for each parameter that allow
|
until it finds the minimum values for each parameter that allow
|
||||||
\fBpcre2_match()\fP to complete without error.
|
\fBpcre2_match()\fP to complete without error.
|
||||||
.P
|
.P
|
||||||
If JIT is being used, only the match limit is relevant. If DFA matching is
|
If JIT is being used, only the match limit is relevant. If DFA matching is
|
||||||
being used, neither limit is relevant, and this modifier is ignored (with a
|
being used, neither limit is relevant, and this modifier is ignored (with a
|
||||||
warning message).
|
warning message).
|
||||||
.P
|
.P
|
||||||
The \fImatch_limit\fP number is a measure of the amount of backtracking
|
The \fImatch_limit\fP number is a measure of the amount of backtracking
|
||||||
|
@ -890,10 +890,10 @@ appears, though of course it can also be used to set a default in a
|
||||||
\fB#subject\fP command. It specifies the number of pairs of offsets that are
|
\fB#subject\fP command. It specifies the number of pairs of offsets that are
|
||||||
available for storing matching information. The default is 15.
|
available for storing matching information. The default is 15.
|
||||||
.P
|
.P
|
||||||
At least one pair of offsets is always created by
|
At least one pair of offsets is always created by
|
||||||
\fBpcre2_match_data_create()\fP, for matching with PCRE2's native API, so a
|
\fBpcre2_match_data_create()\fP, for matching with PCRE2's native API, so a
|
||||||
value of 0 is the same as 1. However a value of 0 is useful when testing the
|
value of 0 is the same as 1. However a value of 0 is useful when testing the
|
||||||
POSIX API because it causes \fBregexec()\fP to be called with a NULL capture
|
POSIX API because it causes \fBregexec()\fP to be called with a NULL capture
|
||||||
vector.
|
vector.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
|
|
@ -57,7 +57,7 @@ individual code units.
|
||||||
In UTF modes, the dot metacharacter matches one UTF character instead of a
|
In UTF modes, the dot metacharacter matches one UTF character instead of a
|
||||||
single code unit.
|
single code unit.
|
||||||
.P
|
.P
|
||||||
The escape sequence \eC can be used to match a single code unit, in a UTF mode,
|
The escape sequence \eC can be used to match a single code unit, in a UTF mode,
|
||||||
but its use can lead to some strange effects because it breaks up multi-unit
|
but its use can lead to some strange effects because it breaks up multi-unit
|
||||||
characters (see the description of \eC in the
|
characters (see the description of \eC in the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
|
@ -107,8 +107,8 @@ case-equivalent, and these are treated as such.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
When the PCRE2_UTF option is set, the strings passed as patterns and subjects
|
When the PCRE2_UTF option is set, the strings passed as patterns and subjects
|
||||||
are (by default) checked for validity on entry to the relevant functions.
|
are (by default) checked for validity on entry to the relevant functions.
|
||||||
If an invalid UTF string is passed, an error return is given.
|
If an invalid UTF string is passed, an error return is given.
|
||||||
.P
|
.P
|
||||||
UTF-16 and UTF-32 strings can indicate their endianness by special code knows
|
UTF-16 and UTF-32 strings can indicate their endianness by special code knows
|
||||||
as a byte-order mark (BOM). The PCRE2 functions do not handle this, expecting
|
as a byte-order mark (BOM). The PCRE2 functions do not handle this, expecting
|
||||||
|
|
12
perltest.pl
12
perltest.pl
|
@ -82,13 +82,13 @@ for (;;)
|
||||||
|
|
||||||
chomp($pattern);
|
chomp($pattern);
|
||||||
$pattern =~ s/\s+$//;
|
$pattern =~ s/\s+$//;
|
||||||
|
|
||||||
# Split the pattern from the modifiers and adjust them as necessary.
|
# Split the pattern from the modifiers and adjust them as necessary.
|
||||||
|
|
||||||
$pattern =~ /^\s*((.).*\2)(.*)$/s;
|
$pattern =~ /^\s*((.).*\2)(.*)$/s;
|
||||||
$pat = $1;
|
$pat = $1;
|
||||||
$mod = $3;
|
$mod = $3;
|
||||||
|
|
||||||
# The private "aftertext" modifier means "print $' afterwards".
|
# The private "aftertext" modifier means "print $' afterwards".
|
||||||
|
|
||||||
$showrest = ($mod =~ s/aftertext,?//);
|
$showrest = ($mod =~ s/aftertext,?//);
|
||||||
|
@ -131,9 +131,9 @@ for (;;)
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
last if ! ($_ = <$infile>);
|
last if ! ($_ = <$infile>);
|
||||||
last if $_ =~ /^\s*$/;
|
last if $_ =~ /^\s*$/;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
next NEXT_RE;
|
next NEXT_RE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -41,7 +41,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
/* This is a freestanding support program to generate a file containing
|
/* This is a freestanding support program to generate a file containing
|
||||||
character tables for PCRE2. The tables are built according to the current
|
character tables for PCRE2. The tables are built according to the current
|
||||||
locale using the pcre2_maketables() function, which is part of the PCRE2 API.
|
locale using the pcre2_maketables() function, which is part of the PCRE2 API.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifdef HAVE_CONFIG_H
|
#ifdef HAVE_CONFIG_H
|
||||||
|
|
|
@ -38,7 +38,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||||
-----------------------------------------------------------------------------
|
-----------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* This module contains functions that scan a compiled pattern and change
|
/* This module contains functions that scan a compiled pattern and change
|
||||||
repeats into possessive repeats where possible. */
|
repeats into possessive repeats where possible. */
|
||||||
|
|
||||||
|
|
||||||
|
@ -359,8 +359,8 @@ Returns: points to the start of the next opcode if *code is accepted
|
||||||
NULL if *code is not accepted
|
NULL if *code is not accepted
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static PCRE2_SPTR
|
static PCRE2_SPTR
|
||||||
get_chr_property_list(PCRE2_SPTR code, BOOL utf, const uint8_t *fcc,
|
get_chr_property_list(PCRE2_SPTR code, BOOL utf, const uint8_t *fcc,
|
||||||
uint32_t *list)
|
uint32_t *list)
|
||||||
{
|
{
|
||||||
PCRE2_UCHAR c = *code;
|
PCRE2_UCHAR c = *code;
|
||||||
|
@ -387,7 +387,7 @@ if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
|
||||||
if (c == OP_UPTO || c == OP_MINUPTO || c == OP_EXACT || c == OP_POSUPTO)
|
if (c == OP_UPTO || c == OP_MINUPTO || c == OP_EXACT || c == OP_POSUPTO)
|
||||||
code += IMM2_SIZE;
|
code += IMM2_SIZE;
|
||||||
|
|
||||||
list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT &&
|
list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT &&
|
||||||
c != OP_POSPLUS);
|
c != OP_POSPLUS);
|
||||||
|
|
||||||
switch(base)
|
switch(base)
|
||||||
|
@ -595,7 +595,7 @@ for(;;)
|
||||||
Therefore infinite recursions are not possible. */
|
Therefore infinite recursions are not possible. */
|
||||||
|
|
||||||
c = *code;
|
c = *code;
|
||||||
|
|
||||||
/* Skip over callouts */
|
/* Skip over callouts */
|
||||||
|
|
||||||
if (c == OP_CALLOUT)
|
if (c == OP_CALLOUT)
|
||||||
|
@ -624,7 +624,7 @@ for(;;)
|
||||||
/* If the bracket is capturing, and referenced by an OP_RECURSE, or
|
/* If the bracket is capturing, and referenced by an OP_RECURSE, or
|
||||||
it is an atomic sub-pattern (assert, once, etc.) the non-greedy case
|
it is an atomic sub-pattern (assert, once, etc.) the non-greedy case
|
||||||
cannot be converted to a possessive form. */
|
cannot be converted to a possessive form. */
|
||||||
|
|
||||||
if (base_list[1] == 0) return FALSE;
|
if (base_list[1] == 0) return FALSE;
|
||||||
|
|
||||||
switch(*(code - GET(code, 1)))
|
switch(*(code - GET(code, 1)))
|
||||||
|
@ -636,7 +636,7 @@ for(;;)
|
||||||
case OP_ONCE:
|
case OP_ONCE:
|
||||||
case OP_ONCE_NC:
|
case OP_ONCE_NC:
|
||||||
/* Atomic sub-patterns and assertions can always auto-possessify their
|
/* Atomic sub-patterns and assertions can always auto-possessify their
|
||||||
last iterator. However, if the group was entered as a result of checking
|
last iterator. However, if the group was entered as a result of checking
|
||||||
a previous iterator, this is not possible. */
|
a previous iterator, this is not possible. */
|
||||||
|
|
||||||
return !entered_a_group;
|
return !entered_a_group;
|
||||||
|
@ -672,7 +672,7 @@ for(;;)
|
||||||
do next_code += GET(next_code, 1); while (*next_code == OP_ALT);
|
do next_code += GET(next_code, 1); while (*next_code == OP_ALT);
|
||||||
|
|
||||||
/* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */
|
/* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */
|
||||||
|
|
||||||
next_code += 1 + LINK_SIZE;
|
next_code += 1 + LINK_SIZE;
|
||||||
if (!compare_opcodes(next_code, utf, cb, base_list, base_end))
|
if (!compare_opcodes(next_code, utf, cb, base_list, base_end))
|
||||||
return FALSE;
|
return FALSE;
|
||||||
|
@ -681,14 +681,14 @@ for(;;)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Check for a supported opcode, and load its properties. */
|
/* Check for a supported opcode, and load its properties. */
|
||||||
|
|
||||||
code = get_chr_property_list(code, utf, cb->fcc, list);
|
code = get_chr_property_list(code, utf, cb->fcc, list);
|
||||||
if (code == NULL) return FALSE; /* Unsupported */
|
if (code == NULL) return FALSE; /* Unsupported */
|
||||||
|
|
||||||
/* If either opcode is a small character list, set pointers for comparing
|
/* If either opcode is a small character list, set pointers for comparing
|
||||||
characters from that list with another list, or with a property. */
|
characters from that list with another list, or with a property. */
|
||||||
|
|
||||||
|
@ -778,7 +778,7 @@ for(;;)
|
||||||
|
|
||||||
/* Because the bit sets are unaligned bytes, we need to perform byte
|
/* Because the bit sets are unaligned bytes, we need to perform byte
|
||||||
comparison here. */
|
comparison here. */
|
||||||
|
|
||||||
set_end = set1 + 32;
|
set_end = set1 + 32;
|
||||||
if (invert_bits)
|
if (invert_bits)
|
||||||
{
|
{
|
||||||
|
@ -922,7 +922,7 @@ for(;;)
|
||||||
autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP];
|
autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP];
|
||||||
|
|
||||||
if (!accepted) return FALSE;
|
if (!accepted) return FALSE;
|
||||||
|
|
||||||
if (list[1] == 0) return TRUE;
|
if (list[1] == 0) return TRUE;
|
||||||
/* Might be an empty repeat. */
|
/* Might be an empty repeat. */
|
||||||
continue;
|
continue;
|
||||||
|
@ -1093,8 +1093,8 @@ but some compilers complain about an unreachable statement. */
|
||||||
if appropriate. This function modifies the compiled opcode!
|
if appropriate. This function modifies the compiled opcode!
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
code points to start of the byte code
|
code points to start of the byte code
|
||||||
utf TRUE in UTF mode
|
utf TRUE in UTF mode
|
||||||
cb compile data block
|
cb compile data block
|
||||||
|
|
||||||
Returns: nothing
|
Returns: nothing
|
||||||
|
@ -1111,7 +1111,7 @@ uint32_t list[8];
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
c = *code;
|
c = *code;
|
||||||
|
|
||||||
if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
|
if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
|
||||||
{
|
{
|
||||||
c -= get_repeat_base(c) - OP_STAR;
|
c -= get_repeat_base(c) - OP_STAR;
|
||||||
|
@ -1244,7 +1244,7 @@ for (;;)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Add in the fixed length from the table */
|
/* Add in the fixed length from the table */
|
||||||
|
|
||||||
code += PRIV(OP_lengths)[c];
|
code += PRIV(OP_lengths)[c];
|
||||||
|
|
||||||
/* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
|
/* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
|
||||||
|
|
|
@ -594,7 +594,7 @@ static pso pso_list[] = {
|
||||||
{ (uint8_t *)STRING_UTF_RIGHTPAR, 4, PSO_OPT, PCRE2_UTF },
|
{ (uint8_t *)STRING_UTF_RIGHTPAR, 4, PSO_OPT, PCRE2_UTF },
|
||||||
{ (uint8_t *)STRING_UCP_RIGHTPAR, 4, PSO_OPT, PCRE2_UCP },
|
{ (uint8_t *)STRING_UCP_RIGHTPAR, 4, PSO_OPT, PCRE2_UCP },
|
||||||
{ (uint8_t *)STRING_NOTEMPTY_RIGHTPAR, 9, PSO_FLG, PCRE2_NOTEMPTY_SET },
|
{ (uint8_t *)STRING_NOTEMPTY_RIGHTPAR, 9, PSO_FLG, PCRE2_NOTEMPTY_SET },
|
||||||
{ (uint8_t *)STRING_NOTEMPTY_ATSTART_RIGHTPAR,17, PSO_FLG, PCRE2_NE_ATST_SET },
|
{ (uint8_t *)STRING_NOTEMPTY_ATSTART_RIGHTPAR,17, PSO_FLG, PCRE2_NE_ATST_SET },
|
||||||
{ (uint8_t *)STRING_NO_AUTO_POSSESS_RIGHTPAR, 16, PSO_OPT, PCRE2_NO_AUTO_POSSESS },
|
{ (uint8_t *)STRING_NO_AUTO_POSSESS_RIGHTPAR, 16, PSO_OPT, PCRE2_NO_AUTO_POSSESS },
|
||||||
{ (uint8_t *)STRING_NO_START_OPT_RIGHTPAR, 13, PSO_OPT, PCRE2_NO_START_OPTIMIZE },
|
{ (uint8_t *)STRING_NO_START_OPT_RIGHTPAR, 13, PSO_OPT, PCRE2_NO_START_OPTIMIZE },
|
||||||
{ (uint8_t *)STRING_LIMIT_MATCH_EQ, 12, PSO_LIMM, 0 },
|
{ (uint8_t *)STRING_LIMIT_MATCH_EQ, 12, PSO_LIMM, 0 },
|
||||||
|
@ -675,12 +675,12 @@ static const uint8_t opcode_possessify[] = {
|
||||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||||
pcre2_code_free(pcre2_code *code)
|
pcre2_code_free(pcre2_code *code)
|
||||||
{
|
{
|
||||||
if (code != NULL)
|
if (code != NULL)
|
||||||
{
|
{
|
||||||
if (code->executable_jit != NULL)
|
if (code->executable_jit != NULL)
|
||||||
PRIV(jit_free)(code->executable_jit, &code->memctl);
|
PRIV(jit_free)(code->executable_jit, &code->memctl);
|
||||||
code->memctl.free(code, code->memctl.memory_data);
|
code->memctl.free(code, code->memctl.memory_data);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -4462,7 +4462,7 @@ for (;; ptr++)
|
||||||
syntax, so we just ignore the repeat. */
|
syntax, so we just ignore the repeat. */
|
||||||
|
|
||||||
if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_FALSE &&
|
if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_FALSE &&
|
||||||
previous[GET(previous, 1)] != OP_ALT)
|
previous[GET(previous, 1)] != OP_ALT)
|
||||||
goto END_REPEAT;
|
goto END_REPEAT;
|
||||||
|
|
||||||
/* There is no sense in actually repeating assertions. The only potential
|
/* There is no sense in actually repeating assertions. The only potential
|
||||||
|
@ -5169,64 +5169,64 @@ for (;; ptr++)
|
||||||
namelen = -1; /* => not a name; must set to avoid warning */
|
namelen = -1; /* => not a name; must set to avoid warning */
|
||||||
name = NULL; /* Always set to avoid warning */
|
name = NULL; /* Always set to avoid warning */
|
||||||
recno = 0; /* Always set to avoid warning */
|
recno = 0; /* Always set to avoid warning */
|
||||||
|
|
||||||
/* Point at character after (?( */
|
/* Point at character after (?( */
|
||||||
|
|
||||||
ptr++;
|
ptr++;
|
||||||
|
|
||||||
/* Check for (?(VERSION[>]=n.m), which is a facility whereby indirect
|
/* Check for (?(VERSION[>]=n.m), which is a facility whereby indirect
|
||||||
users of PCRE2 via an application can discover which release of PCRE2
|
users of PCRE2 via an application can discover which release of PCRE2
|
||||||
is being used. */
|
is being used. */
|
||||||
|
|
||||||
if (PRIV(strncmp_c8)(ptr, STRING_VERSION, 7) == 0 &&
|
if (PRIV(strncmp_c8)(ptr, STRING_VERSION, 7) == 0 &&
|
||||||
ptr[7] != CHAR_RIGHT_PARENTHESIS)
|
ptr[7] != CHAR_RIGHT_PARENTHESIS)
|
||||||
{
|
{
|
||||||
BOOL ge = FALSE;
|
BOOL ge = FALSE;
|
||||||
int major = 0;
|
int major = 0;
|
||||||
int minor = 0;
|
int minor = 0;
|
||||||
|
|
||||||
ptr += 7;
|
ptr += 7;
|
||||||
if (*ptr == CHAR_GREATER_THAN_SIGN)
|
if (*ptr == CHAR_GREATER_THAN_SIGN)
|
||||||
{
|
{
|
||||||
ge = TRUE;
|
ge = TRUE;
|
||||||
ptr++;
|
ptr++;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* NOTE: cannot write IS_DIGIT(*(++ptr)) here because IS_DIGIT
|
/* NOTE: cannot write IS_DIGIT(*(++ptr)) here because IS_DIGIT
|
||||||
references its argument twice. */
|
references its argument twice. */
|
||||||
|
|
||||||
if (*ptr != CHAR_EQUALS_SIGN || (ptr++, !IS_DIGIT(*ptr)))
|
if (*ptr != CHAR_EQUALS_SIGN || (ptr++, !IS_DIGIT(*ptr)))
|
||||||
{
|
{
|
||||||
*errorcodeptr = ERR79;
|
*errorcodeptr = ERR79;
|
||||||
goto FAILED;
|
goto FAILED;
|
||||||
}
|
}
|
||||||
|
|
||||||
while (IS_DIGIT(*ptr)) major = major * 10 + *ptr++ - '0';
|
while (IS_DIGIT(*ptr)) major = major * 10 + *ptr++ - '0';
|
||||||
if (*ptr == CHAR_DOT)
|
if (*ptr == CHAR_DOT)
|
||||||
{
|
{
|
||||||
ptr++;
|
ptr++;
|
||||||
while (IS_DIGIT(*ptr)) minor = minor * 10 + *ptr++ - '0';
|
while (IS_DIGIT(*ptr)) minor = minor * 10 + *ptr++ - '0';
|
||||||
}
|
}
|
||||||
|
|
||||||
if (*ptr != CHAR_RIGHT_PARENTHESIS)
|
if (*ptr != CHAR_RIGHT_PARENTHESIS)
|
||||||
{
|
{
|
||||||
*errorcodeptr = ERR79;
|
*errorcodeptr = ERR79;
|
||||||
goto FAILED;
|
goto FAILED;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ge)
|
if (ge)
|
||||||
code[1+LINK_SIZE] = ((PCRE2_MAJOR > major) ||
|
code[1+LINK_SIZE] = ((PCRE2_MAJOR > major) ||
|
||||||
(PCRE2_MAJOR == major && PCRE2_MINOR >= minor))?
|
(PCRE2_MAJOR == major && PCRE2_MINOR >= minor))?
|
||||||
OP_TRUE : OP_FALSE;
|
OP_TRUE : OP_FALSE;
|
||||||
else
|
else
|
||||||
code[1+LINK_SIZE] = (PCRE2_MAJOR == major && PCRE2_MINOR == minor)?
|
code[1+LINK_SIZE] = (PCRE2_MAJOR == major && PCRE2_MINOR == minor)?
|
||||||
OP_TRUE : OP_FALSE;
|
OP_TRUE : OP_FALSE;
|
||||||
|
|
||||||
ptr++;
|
ptr++;
|
||||||
skipbytes = 1;
|
skipbytes = 1;
|
||||||
break; /* End of condition processing */
|
break; /* End of condition processing */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Check for a test for recursion in a named group. */
|
/* Check for a test for recursion in a named group. */
|
||||||
|
|
||||||
if (*ptr == CHAR_R && ptr[1] == CHAR_AMPERSAND)
|
if (*ptr == CHAR_R && ptr[1] == CHAR_AMPERSAND)
|
||||||
|
@ -5404,8 +5404,8 @@ for (;; ptr++)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Similarly, check for the (?(DEFINE) "condition", which is always
|
/* Similarly, check for the (?(DEFINE) "condition", which is always
|
||||||
false. During compilation we set OP_DEFINE to distinguish this from
|
false. During compilation we set OP_DEFINE to distinguish this from
|
||||||
other OP_FALSE conditions so that it can be checked for having only one
|
other OP_FALSE conditions so that it can be checked for having only one
|
||||||
branch, but after that the opcode is changed to OP_FALSE. */
|
branch, but after that the opcode is changed to OP_FALSE. */
|
||||||
|
|
||||||
else if (namelen == 6 && PRIV(strncmp_c8)(name, STRING_DEFINE, 6) == 0)
|
else if (namelen == 6 && PRIV(strncmp_c8)(name, STRING_DEFINE, 6) == 0)
|
||||||
|
@ -6133,7 +6133,7 @@ for (;; ptr++)
|
||||||
while (*tc != OP_KET);
|
while (*tc != OP_KET);
|
||||||
|
|
||||||
/* A DEFINE group is never obeyed inline (the "condition" is always
|
/* A DEFINE group is never obeyed inline (the "condition" is always
|
||||||
false). It must have only one branch. Having checked this, change the
|
false). It must have only one branch. Having checked this, change the
|
||||||
opcode to OP_FALSE. */
|
opcode to OP_FALSE. */
|
||||||
|
|
||||||
if (code[LINK_SIZE+1] == OP_DEFINE)
|
if (code[LINK_SIZE+1] == OP_DEFINE)
|
||||||
|
@ -6143,7 +6143,7 @@ for (;; ptr++)
|
||||||
*errorcodeptr = ERR54;
|
*errorcodeptr = ERR54;
|
||||||
goto FAILED;
|
goto FAILED;
|
||||||
}
|
}
|
||||||
code[LINK_SIZE+1] = OP_FALSE;
|
code[LINK_SIZE+1] = OP_FALSE;
|
||||||
bravalue = OP_DEFINE; /* Just a flag to suppress char handling below */
|
bravalue = OP_DEFINE; /* Just a flag to suppress char handling below */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6219,7 +6219,7 @@ for (;; ptr++)
|
||||||
than one can replicate it as reqcu if necessary. If the subpattern has
|
than one can replicate it as reqcu if necessary. If the subpattern has
|
||||||
no firstcu, set "none" for the whole branch. In both cases, a zero
|
no firstcu, set "none" for the whole branch. In both cases, a zero
|
||||||
repeat forces firstcu to "none". */
|
repeat forces firstcu to "none". */
|
||||||
|
|
||||||
if (firstcuflags == REQ_UNSET && subfirstcuflags != REQ_UNSET)
|
if (firstcuflags == REQ_UNSET && subfirstcuflags != REQ_UNSET)
|
||||||
{
|
{
|
||||||
if (subfirstcuflags >= 0)
|
if (subfirstcuflags >= 0)
|
||||||
|
@ -6759,7 +6759,7 @@ for (;;)
|
||||||
reqcu = firstcu;
|
reqcu = firstcu;
|
||||||
reqcuflags = firstcuflags;
|
reqcuflags = firstcuflags;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
firstcuflags = REQ_NONE;
|
firstcuflags = REQ_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7389,12 +7389,12 @@ if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0)
|
||||||
|
|
||||||
/* A NULL compile context means "use a default context" */
|
/* A NULL compile context means "use a default context" */
|
||||||
|
|
||||||
if (ccontext == NULL)
|
if (ccontext == NULL)
|
||||||
ccontext = (pcre2_compile_context *)(&PRIV(default_compile_context));
|
ccontext = (pcre2_compile_context *)(&PRIV(default_compile_context));
|
||||||
|
|
||||||
/* A zero-terminated pattern is indicated by the special length value
|
/* A zero-terminated pattern is indicated by the special length value
|
||||||
PCRE2_ZERO_TERMINATED. Otherwise, we make a copy of the pattern and add a zero,
|
PCRE2_ZERO_TERMINATED. Otherwise, we make a copy of the pattern and add a zero,
|
||||||
to ensure that it is always possible to look one code unit beyond the end of
|
to ensure that it is always possible to look one code unit beyond the end of
|
||||||
the pattern's characters. */
|
the pattern's characters. */
|
||||||
|
|
||||||
if (patlen == PCRE2_ZERO_TERMINATED) patlen = PRIV(strlen)(pattern); else
|
if (patlen == PCRE2_ZERO_TERMINATED) patlen = PRIV(strlen)(pattern); else
|
||||||
|
@ -7481,19 +7481,19 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
|
||||||
case PSO_OPT:
|
case PSO_OPT:
|
||||||
cb.external_options |= p->value;
|
cb.external_options |= p->value;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PSO_FLG:
|
case PSO_FLG:
|
||||||
setflags |= p->value;
|
setflags |= p->value;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PSO_NL:
|
case PSO_NL:
|
||||||
newline = p->value;
|
newline = p->value;
|
||||||
setflags |= PCRE2_NL_SET;
|
setflags |= PCRE2_NL_SET;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PSO_BSR:
|
case PSO_BSR:
|
||||||
bsr = p->value;
|
bsr = p->value;
|
||||||
setflags |= PCRE2_BSR_SET;
|
setflags |= PCRE2_BSR_SET;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PSO_LIMM:
|
case PSO_LIMM:
|
||||||
|
@ -7883,8 +7883,8 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0 &&
|
||||||
|
|
||||||
/* If the pattern is still not anchored and we do not have a first code unit,
|
/* If the pattern is still not anchored and we do not have a first code unit,
|
||||||
see if there is one that is asserted (these are not saved during the compile
|
see if there is one that is asserted (these are not saved during the compile
|
||||||
because they can cause conflicts with actual literals that follow). This code
|
because they can cause conflicts with actual literals that follow). This code
|
||||||
need not be obeyed if PCRE2_NO_START_OPTIMIZE is set, as the data it would
|
need not be obeyed if PCRE2_NO_START_OPTIMIZE is set, as the data it would
|
||||||
create will not be used. */
|
create will not be used. */
|
||||||
|
|
||||||
if ((re->overall_options & (PCRE2_ANCHORED|PCRE2_NO_START_OPTIMIZE)) == 0)
|
if ((re->overall_options & (PCRE2_ANCHORED|PCRE2_NO_START_OPTIMIZE)) == 0)
|
||||||
|
@ -7930,7 +7930,7 @@ if ((re->overall_options & (PCRE2_ANCHORED|PCRE2_NO_START_OPTIMIZE)) == 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Handle the "required code unit", if one is set. In the case of an anchored
|
/* Handle the "required code unit", if one is set. In the case of an anchored
|
||||||
pattern, do this only if it follows a variable length item in the pattern.
|
pattern, do this only if it follows a variable length item in the pattern.
|
||||||
Again, skip this if PCRE2_NO_START_OPTIMIZE is set. */
|
Again, skip this if PCRE2_NO_START_OPTIMIZE is set. */
|
||||||
|
|
||||||
if (reqcuflags >= 0 &&
|
if (reqcuflags >= 0 &&
|
||||||
|
@ -7973,7 +7973,7 @@ while (*codestart == OP_ALT);
|
||||||
to set up information such as a bitmap of starting code units and a minimum
|
to set up information such as a bitmap of starting code units and a minimum
|
||||||
matching length. */
|
matching length. */
|
||||||
|
|
||||||
if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 &&
|
if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 &&
|
||||||
PRIV(study)(re) != 0)
|
PRIV(study)(re) != 0)
|
||||||
{
|
{
|
||||||
errorcode = ERR31;
|
errorcode = ERR31;
|
||||||
|
|
|
@ -42,7 +42,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Save the configured link size, which is in bytes. In 16-bit and 32-bit modes
|
/* Save the configured link size, which is in bytes. In 16-bit and 32-bit modes
|
||||||
its value gets changed by pcre2_internal.h to be in code units. */
|
its value gets changed by pcre2_internal.h to be in code units. */
|
||||||
|
|
||||||
static int configured_link_size = LINK_SIZE;
|
static int configured_link_size = LINK_SIZE;
|
||||||
|
@ -69,7 +69,7 @@ Arguments:
|
||||||
Returns: 0 if data returned
|
Returns: 0 if data returned
|
||||||
>= 0 if where is NULL, giving length required
|
>= 0 if where is NULL, giving length required
|
||||||
PCRE2_ERROR_BADOPTION if "where" not recognized
|
PCRE2_ERROR_BADOPTION if "where" not recognized
|
||||||
or JIT target requested when JIT not enabled
|
or JIT target requested when JIT not enabled
|
||||||
*/
|
*/
|
||||||
|
|
||||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
@ -80,33 +80,33 @@ if (where == NULL) /* Requests a length */
|
||||||
switch(what)
|
switch(what)
|
||||||
{
|
{
|
||||||
default:
|
default:
|
||||||
return PCRE2_ERROR_BADOPTION;
|
return PCRE2_ERROR_BADOPTION;
|
||||||
|
|
||||||
case PCRE2_CONFIG_BSR:
|
case PCRE2_CONFIG_BSR:
|
||||||
case PCRE2_CONFIG_JIT:
|
case PCRE2_CONFIG_JIT:
|
||||||
case PCRE2_CONFIG_LINKSIZE:
|
case PCRE2_CONFIG_LINKSIZE:
|
||||||
case PCRE2_CONFIG_NEWLINE:
|
case PCRE2_CONFIG_NEWLINE:
|
||||||
case PCRE2_CONFIG_STACKRECURSE:
|
case PCRE2_CONFIG_STACKRECURSE:
|
||||||
case PCRE2_CONFIG_UNICODE:
|
case PCRE2_CONFIG_UNICODE:
|
||||||
return sizeof(int);
|
return sizeof(int);
|
||||||
|
|
||||||
case PCRE2_CONFIG_MATCHLIMIT:
|
case PCRE2_CONFIG_MATCHLIMIT:
|
||||||
case PCRE2_CONFIG_PARENSLIMIT:
|
case PCRE2_CONFIG_PARENSLIMIT:
|
||||||
case PCRE2_CONFIG_RECURSIONLIMIT:
|
case PCRE2_CONFIG_RECURSIONLIMIT:
|
||||||
return sizeof(long int);
|
return sizeof(long int);
|
||||||
|
|
||||||
/* These are handled below */
|
/* These are handled below */
|
||||||
|
|
||||||
case PCRE2_CONFIG_JITTARGET:
|
case PCRE2_CONFIG_JITTARGET:
|
||||||
case PCRE2_CONFIG_UNICODE_VERSION:
|
case PCRE2_CONFIG_UNICODE_VERSION:
|
||||||
case PCRE2_CONFIG_VERSION:
|
case PCRE2_CONFIG_VERSION:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (what)
|
switch (what)
|
||||||
{
|
{
|
||||||
default:
|
default:
|
||||||
return PCRE2_ERROR_BADOPTION;
|
return PCRE2_ERROR_BADOPTION;
|
||||||
|
|
||||||
case PCRE2_CONFIG_BSR:
|
case PCRE2_CONFIG_BSR:
|
||||||
|
@ -129,9 +129,9 @@ switch (what)
|
||||||
#ifdef SUPPORT_JIT
|
#ifdef SUPPORT_JIT
|
||||||
{
|
{
|
||||||
const char *v = PRIV(jit_get_target)();
|
const char *v = PRIV(jit_get_target)();
|
||||||
return (where == NULL)? (int)strlen(v) :
|
return (where == NULL)? (int)strlen(v) :
|
||||||
PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v);
|
PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
return PCRE2_ERROR_BADOPTION;
|
return PCRE2_ERROR_BADOPTION;
|
||||||
#endif
|
#endif
|
||||||
|
@ -163,9 +163,9 @@ switch (what)
|
||||||
*((int *)where) = 1;
|
*((int *)where) = 1;
|
||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PCRE2_CONFIG_UNICODE_VERSION:
|
case PCRE2_CONFIG_UNICODE_VERSION:
|
||||||
{
|
{
|
||||||
#if defined SUPPORT_UNICODE
|
#if defined SUPPORT_UNICODE
|
||||||
const char *v = PRIV(unicode_version);
|
const char *v = PRIV(unicode_version);
|
||||||
#else
|
#else
|
||||||
|
@ -183,15 +183,15 @@ switch (what)
|
||||||
*((int *)where) = 0;
|
*((int *)where) = 0;
|
||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* The hackery in setting "v" below is to cope with the case when
|
/* The hackery in setting "v" below is to cope with the case when
|
||||||
PCRE2_PRERELEASE is set to an empty string (which it is for real releases).
|
PCRE2_PRERELEASE is set to an empty string (which it is for real releases).
|
||||||
If the second alternative is used in this case, it does not leave a space
|
If the second alternative is used in this case, it does not leave a space
|
||||||
before the date. On the other hand, if all four macros are put into a single
|
before the date. On the other hand, if all four macros are put into a single
|
||||||
XSTRING when PCRE2_PRERELEASE is not empty, an unwanted space is inserted.
|
XSTRING when PCRE2_PRERELEASE is not empty, an unwanted space is inserted.
|
||||||
There are problems using an "obvious" approach like this:
|
There are problems using an "obvious" approach like this:
|
||||||
|
|
||||||
XSTRING(PCRE2_MAJOR) "." XSTRING(PCRE_MINOR)
|
XSTRING(PCRE2_MAJOR) "." XSTRING(PCRE_MINOR)
|
||||||
XSTRING(PCRE2_PRERELEASE) " " XSTRING(PCRE_DATE)
|
XSTRING(PCRE2_PRERELEASE) " " XSTRING(PCRE_DATE)
|
||||||
|
|
||||||
because, when PCRE2_PRERELEASE is empty, this leads to an attempted expansion
|
because, when PCRE2_PRERELEASE is empty, this leads to an attempted expansion
|
||||||
|
@ -199,18 +199,18 @@ switch (what)
|
||||||
argument consists of no preprocessing tokens, the behavior is undefined." It
|
argument consists of no preprocessing tokens, the behavior is undefined." It
|
||||||
turns out the gcc treats this case as a single empty string - which is what
|
turns out the gcc treats this case as a single empty string - which is what
|
||||||
we really want - but Visual C grumbles about the lack of an argument for the
|
we really want - but Visual C grumbles about the lack of an argument for the
|
||||||
macro. Unfortunately, both are within their rights. As there seems to be no
|
macro. Unfortunately, both are within their rights. As there seems to be no
|
||||||
way to test for a macro's value being empty at compile time, we have to
|
way to test for a macro's value being empty at compile time, we have to
|
||||||
resort to a runtime test. */
|
resort to a runtime test. */
|
||||||
|
|
||||||
case PCRE2_CONFIG_VERSION:
|
case PCRE2_CONFIG_VERSION:
|
||||||
{
|
{
|
||||||
const char *v = (XSTRING(Z PCRE2_PRERELEASE)[1] == 0)?
|
const char *v = (XSTRING(Z PCRE2_PRERELEASE)[1] == 0)?
|
||||||
XSTRING(PCRE2_MAJOR.PCRE2_MINOR PCRE2_DATE) :
|
XSTRING(PCRE2_MAJOR.PCRE2_MINOR PCRE2_DATE) :
|
||||||
XSTRING(PCRE2_MAJOR.PCRE2_MINOR) XSTRING(PCRE2_PRERELEASE PCRE2_DATE);
|
XSTRING(PCRE2_MAJOR.PCRE2_MINOR) XSTRING(PCRE2_PRERELEASE PCRE2_DATE);
|
||||||
return (where == NULL)? (int)strlen(v) :
|
return (where == NULL)? (int)strlen(v) :
|
||||||
PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v);
|
PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -72,15 +72,15 @@ free(block);
|
||||||
* Get a block and save memory control *
|
* Get a block and save memory control *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* This internal function is called to get a block of memory in which the
|
/* This internal function is called to get a block of memory in which the
|
||||||
memory control data is to be stored at the start for future use.
|
memory control data is to be stored at the start for future use.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
size amount of memory required
|
size amount of memory required
|
||||||
memctl pointer to a memctl block or NULL
|
memctl pointer to a memctl block or NULL
|
||||||
|
|
||||||
Returns: pointer to memory or NULL on failure
|
Returns: pointer to memory or NULL on failure
|
||||||
*/
|
*/
|
||||||
|
|
||||||
PCRE2_EXP_DEFN void *
|
PCRE2_EXP_DEFN void *
|
||||||
PRIV(memctl_malloc)(size_t size, pcre2_memctl *memctl)
|
PRIV(memctl_malloc)(size_t size, pcre2_memctl *memctl)
|
||||||
|
@ -88,7 +88,7 @@ PRIV(memctl_malloc)(size_t size, pcre2_memctl *memctl)
|
||||||
pcre2_memctl *newmemctl;
|
pcre2_memctl *newmemctl;
|
||||||
void *yield = (memctl == NULL)? malloc(size) :
|
void *yield = (memctl == NULL)? malloc(size) :
|
||||||
memctl->malloc(size, memctl->memory_data);
|
memctl->malloc(size, memctl->memory_data);
|
||||||
if (yield == NULL) return NULL;
|
if (yield == NULL) return NULL;
|
||||||
newmemctl = (pcre2_memctl *)yield;
|
newmemctl = (pcre2_memctl *)yield;
|
||||||
if (memctl == NULL)
|
if (memctl == NULL)
|
||||||
{
|
{
|
||||||
|
@ -96,9 +96,9 @@ if (memctl == NULL)
|
||||||
newmemctl->free = default_free;
|
newmemctl->free = default_free;
|
||||||
newmemctl->memory_data = NULL;
|
newmemctl->memory_data = NULL;
|
||||||
}
|
}
|
||||||
else *newmemctl = *memctl;
|
else *newmemctl = *memctl;
|
||||||
return yield;
|
return yield;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -108,11 +108,11 @@ return yield;
|
||||||
|
|
||||||
/* Initializing for compile and match contexts is done in separate, private
|
/* Initializing for compile and match contexts is done in separate, private
|
||||||
functions so that these can be called from functions such as pcre2_compile()
|
functions so that these can be called from functions such as pcre2_compile()
|
||||||
when an external context is not supplied. The initializing functions have an
|
when an external context is not supplied. The initializing functions have an
|
||||||
option to set up default memory management. */
|
option to set up default memory management. */
|
||||||
|
|
||||||
PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION
|
||||||
pcre2_general_context_create(void *(*private_malloc)(size_t, void *),
|
pcre2_general_context_create(void *(*private_malloc)(size_t, void *),
|
||||||
void (*private_free)(void *, void *), void *memory_data)
|
void (*private_free)(void *, void *), void *memory_data)
|
||||||
{
|
{
|
||||||
pcre2_general_context *gcontext;
|
pcre2_general_context *gcontext;
|
||||||
|
@ -121,7 +121,7 @@ if (private_free == NULL) private_free = default_free;
|
||||||
gcontext = private_malloc(sizeof(pcre2_real_general_context), memory_data);
|
gcontext = private_malloc(sizeof(pcre2_real_general_context), memory_data);
|
||||||
if (gcontext == NULL) return NULL;
|
if (gcontext == NULL) return NULL;
|
||||||
gcontext->memctl.malloc = private_malloc;
|
gcontext->memctl.malloc = private_malloc;
|
||||||
gcontext->memctl.free = private_free;
|
gcontext->memctl.free = private_free;
|
||||||
gcontext->memctl.memory_data = memory_data;
|
gcontext->memctl.memory_data = memory_data;
|
||||||
return gcontext;
|
return gcontext;
|
||||||
}
|
}
|
||||||
|
@ -136,7 +136,7 @@ const pcre2_compile_context PRIV(default_compile_context) = {
|
||||||
PRIV(default_tables),
|
PRIV(default_tables),
|
||||||
BSR_DEFAULT,
|
BSR_DEFAULT,
|
||||||
NEWLINE_DEFAULT,
|
NEWLINE_DEFAULT,
|
||||||
PARENS_NEST_LIMIT };
|
PARENS_NEST_LIMIT };
|
||||||
|
|
||||||
/* The create function copies the default into the new memory, but must
|
/* The create function copies the default into the new memory, but must
|
||||||
override the default memory handling functions if a gcontext was provided. */
|
override the default memory handling functions if a gcontext was provided. */
|
||||||
|
@ -145,8 +145,8 @@ PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION
|
||||||
pcre2_compile_context_create(pcre2_general_context *gcontext)
|
pcre2_compile_context_create(pcre2_general_context *gcontext)
|
||||||
{
|
{
|
||||||
pcre2_compile_context *ccontext = PRIV(memctl_malloc)(
|
pcre2_compile_context *ccontext = PRIV(memctl_malloc)(
|
||||||
sizeof(pcre2_real_compile_context), (pcre2_memctl *)gcontext);
|
sizeof(pcre2_real_compile_context), (pcre2_memctl *)gcontext);
|
||||||
if (ccontext == NULL) return NULL;
|
if (ccontext == NULL) return NULL;
|
||||||
*ccontext = PRIV(default_compile_context);
|
*ccontext = PRIV(default_compile_context);
|
||||||
if (gcontext != NULL)
|
if (gcontext != NULL)
|
||||||
*((pcre2_memctl *)ccontext) = *((pcre2_memctl *)gcontext);
|
*((pcre2_memctl *)ccontext) = *((pcre2_memctl *)gcontext);
|
||||||
|
@ -159,14 +159,14 @@ when no context is supplied to a match function. */
|
||||||
|
|
||||||
const pcre2_match_context PRIV(default_match_context) = {
|
const pcre2_match_context PRIV(default_match_context) = {
|
||||||
{ default_malloc, default_free, NULL },
|
{ default_malloc, default_free, NULL },
|
||||||
#ifdef HEAP_MATCH_RECURSE
|
#ifdef HEAP_MATCH_RECURSE
|
||||||
{ default_malloc, default_free, NULL },
|
{ default_malloc, default_free, NULL },
|
||||||
#endif
|
#endif
|
||||||
NULL,
|
NULL,
|
||||||
NULL,
|
NULL,
|
||||||
MATCH_LIMIT,
|
MATCH_LIMIT,
|
||||||
MATCH_LIMIT_RECURSION };
|
MATCH_LIMIT_RECURSION };
|
||||||
|
|
||||||
/* The create function copies the default into the new memory, but must
|
/* The create function copies the default into the new memory, but must
|
||||||
override the default memory handling functions if a gcontext was provided. */
|
override the default memory handling functions if a gcontext was provided. */
|
||||||
|
|
||||||
|
@ -174,8 +174,8 @@ PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION
|
||||||
pcre2_match_context_create(pcre2_general_context *gcontext)
|
pcre2_match_context_create(pcre2_general_context *gcontext)
|
||||||
{
|
{
|
||||||
pcre2_match_context *mcontext = PRIV(memctl_malloc)(
|
pcre2_match_context *mcontext = PRIV(memctl_malloc)(
|
||||||
sizeof(pcre2_real_match_context), (pcre2_memctl *)gcontext);
|
sizeof(pcre2_real_match_context), (pcre2_memctl *)gcontext);
|
||||||
if (mcontext == NULL) return NULL;
|
if (mcontext == NULL) return NULL;
|
||||||
*mcontext = PRIV(default_match_context);
|
*mcontext = PRIV(default_match_context);
|
||||||
if (gcontext != NULL)
|
if (gcontext != NULL)
|
||||||
*((pcre2_memctl *)mcontext) = *((pcre2_memctl *)gcontext);
|
*((pcre2_memctl *)mcontext) = *((pcre2_memctl *)gcontext);
|
||||||
|
@ -190,8 +190,8 @@ return mcontext;
|
||||||
PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION
|
||||||
pcre2_general_context_copy(pcre2_general_context *gcontext)
|
pcre2_general_context_copy(pcre2_general_context *gcontext)
|
||||||
{
|
{
|
||||||
pcre2_general_context *new =
|
pcre2_general_context *new =
|
||||||
gcontext->memctl.malloc(sizeof(pcre2_real_general_context),
|
gcontext->memctl.malloc(sizeof(pcre2_real_general_context),
|
||||||
gcontext->memctl.memory_data);
|
gcontext->memctl.memory_data);
|
||||||
if (new == NULL) return NULL;
|
if (new == NULL) return NULL;
|
||||||
memcpy(new, gcontext, sizeof(pcre2_real_general_context));
|
memcpy(new, gcontext, sizeof(pcre2_real_general_context));
|
||||||
|
@ -202,8 +202,8 @@ return new;
|
||||||
PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION
|
||||||
pcre2_compile_context_copy(pcre2_compile_context *ccontext)
|
pcre2_compile_context_copy(pcre2_compile_context *ccontext)
|
||||||
{
|
{
|
||||||
pcre2_compile_context *new =
|
pcre2_compile_context *new =
|
||||||
ccontext->memctl.malloc(sizeof(pcre2_real_compile_context),
|
ccontext->memctl.malloc(sizeof(pcre2_real_compile_context),
|
||||||
ccontext->memctl.memory_data);
|
ccontext->memctl.memory_data);
|
||||||
if (new == NULL) return NULL;
|
if (new == NULL) return NULL;
|
||||||
memcpy(new, ccontext, sizeof(pcre2_real_compile_context));
|
memcpy(new, ccontext, sizeof(pcre2_real_compile_context));
|
||||||
|
@ -214,8 +214,8 @@ return new;
|
||||||
PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION
|
||||||
pcre2_match_context_copy(pcre2_match_context *mcontext)
|
pcre2_match_context_copy(pcre2_match_context *mcontext)
|
||||||
{
|
{
|
||||||
pcre2_match_context *new =
|
pcre2_match_context *new =
|
||||||
mcontext->memctl.malloc(sizeof(pcre2_real_match_context),
|
mcontext->memctl.malloc(sizeof(pcre2_real_match_context),
|
||||||
mcontext->memctl.memory_data);
|
mcontext->memctl.memory_data);
|
||||||
if (new == NULL) return NULL;
|
if (new == NULL) return NULL;
|
||||||
memcpy(new, mcontext, sizeof(pcre2_real_match_context));
|
memcpy(new, mcontext, sizeof(pcre2_real_match_context));
|
||||||
|
@ -267,14 +267,14 @@ data. */
|
||||||
/* ------------ Compile contexts ------------ */
|
/* ------------ Compile contexts ------------ */
|
||||||
|
|
||||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
pcre2_set_character_tables(pcre2_compile_context *ccontext,
|
pcre2_set_character_tables(pcre2_compile_context *ccontext,
|
||||||
const unsigned char *tables)
|
const unsigned char *tables)
|
||||||
{
|
{
|
||||||
ccontext->tables = tables;
|
ccontext->tables = tables;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
pcre2_set_bsr(pcre2_compile_context *ccontext, uint32_t value)
|
pcre2_set_bsr(pcre2_compile_context *ccontext, uint32_t value)
|
||||||
{
|
{
|
||||||
switch(value)
|
switch(value)
|
||||||
|
@ -283,13 +283,13 @@ switch(value)
|
||||||
case PCRE2_BSR_UNICODE:
|
case PCRE2_BSR_UNICODE:
|
||||||
ccontext->bsr_convention = value;
|
ccontext->bsr_convention = value;
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return PCRE2_ERROR_BADDATA;
|
return PCRE2_ERROR_BADDATA;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
pcre2_set_newline(pcre2_compile_context *ccontext, uint32_t newline)
|
pcre2_set_newline(pcre2_compile_context *ccontext, uint32_t newline)
|
||||||
{
|
{
|
||||||
switch(newline)
|
switch(newline)
|
||||||
|
@ -301,10 +301,10 @@ switch(newline)
|
||||||
case PCRE2_NEWLINE_ANYCRLF:
|
case PCRE2_NEWLINE_ANYCRLF:
|
||||||
ccontext->newline_convention = newline;
|
ccontext->newline_convention = newline;
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return PCRE2_ERROR_BADDATA;
|
return PCRE2_ERROR_BADDATA;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
@ -315,7 +315,7 @@ return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
|
pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
|
||||||
int (*guard)(uint32_t))
|
int (*guard)(uint32_t))
|
||||||
{
|
{
|
||||||
ccontext->stack_guard = guard;
|
ccontext->stack_guard = guard;
|
||||||
|
@ -325,8 +325,8 @@ return 0;
|
||||||
|
|
||||||
/* ------------ Match contexts ------------ */
|
/* ------------ Match contexts ------------ */
|
||||||
|
|
||||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
pcre2_set_callout(pcre2_match_context *mcontext,
|
pcre2_set_callout(pcre2_match_context *mcontext,
|
||||||
int (*callout)(pcre2_callout_block *), void *callout_data)
|
int (*callout)(pcre2_callout_block *), void *callout_data)
|
||||||
{
|
{
|
||||||
mcontext->callout = callout;
|
mcontext->callout = callout;
|
||||||
|
@ -349,8 +349,8 @@ return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
pcre2_set_recursion_memory_management(pcre2_match_context *mcontext,
|
pcre2_set_recursion_memory_management(pcre2_match_context *mcontext,
|
||||||
void *(*mymalloc)(size_t, void *), void (*myfree)(void *, void *),
|
void *(*mymalloc)(size_t, void *), void (*myfree)(void *, void *),
|
||||||
void *mydata)
|
void *mydata)
|
||||||
{
|
{
|
||||||
#ifdef HEAP_MATCH_RECURSE
|
#ifdef HEAP_MATCH_RECURSE
|
||||||
|
@ -364,6 +364,6 @@ mcontext->stack_memctl.memory_data = mydata;
|
||||||
(void)mydata;
|
(void)mydata;
|
||||||
#endif
|
#endif
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* End of pcre2_context.c */
|
/* End of pcre2_context.c */
|
||||||
|
|
|
@ -376,7 +376,7 @@ stateblock *next_active_state, *next_new_state;
|
||||||
|
|
||||||
const uint8_t *ctypes, *lcc, *fcc;
|
const uint8_t *ctypes, *lcc, *fcc;
|
||||||
PCRE2_SPTR ptr;
|
PCRE2_SPTR ptr;
|
||||||
PCRE2_SPTR end_code;
|
PCRE2_SPTR end_code;
|
||||||
PCRE2_SPTR first_op;
|
PCRE2_SPTR first_op;
|
||||||
|
|
||||||
dfa_recursion_info new_recursive;
|
dfa_recursion_info new_recursive;
|
||||||
|
@ -542,8 +542,8 @@ for (;;)
|
||||||
BOOL partial_newline = FALSE;
|
BOOL partial_newline = FALSE;
|
||||||
BOOL could_continue = reset_could_continue;
|
BOOL could_continue = reset_could_continue;
|
||||||
reset_could_continue = FALSE;
|
reset_could_continue = FALSE;
|
||||||
|
|
||||||
if (ptr > mb->last_used_ptr) mb->last_used_ptr = ptr;
|
if (ptr > mb->last_used_ptr) mb->last_used_ptr = ptr;
|
||||||
|
|
||||||
/* Make the new state list into the active state list and empty the
|
/* Make the new state list into the active state list and empty the
|
||||||
new state list. */
|
new state list. */
|
||||||
|
@ -633,7 +633,7 @@ for (;;)
|
||||||
|
|
||||||
/* If this opcode inspects a character, but we are at the end of the
|
/* If this opcode inspects a character, but we are at the end of the
|
||||||
subject, remember the fact for use when testing for a partial match. */
|
subject, remember the fact for use when testing for a partial match. */
|
||||||
|
|
||||||
if (clen == 0 && poptable[codevalue] != 0)
|
if (clen == 0 && poptable[codevalue] != 0)
|
||||||
could_continue = TRUE;
|
could_continue = TRUE;
|
||||||
|
|
||||||
|
@ -975,7 +975,7 @@ for (;;)
|
||||||
if (utf) { FORWARDCHARTEST(temp, mb->end_subject); }
|
if (utf) { FORWARDCHARTEST(temp, mb->end_subject); }
|
||||||
#endif
|
#endif
|
||||||
mb->last_used_ptr = temp;
|
mb->last_used_ptr = temp;
|
||||||
}
|
}
|
||||||
#ifdef SUPPORT_UNICODE
|
#ifdef SUPPORT_UNICODE
|
||||||
if ((mb->poptions & PCRE2_UCP) != 0)
|
if ((mb->poptions & PCRE2_UCP) != 0)
|
||||||
{
|
{
|
||||||
|
@ -2643,7 +2643,7 @@ for (;;)
|
||||||
|
|
||||||
if (condcode == OP_FALSE)
|
if (condcode == OP_FALSE)
|
||||||
{ ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
|
{ ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
|
||||||
|
|
||||||
/* There is also an always-true condition */
|
/* There is also an always-true condition */
|
||||||
|
|
||||||
if (condcode == OP_TRUE)
|
if (condcode == OP_TRUE)
|
||||||
|
@ -2999,7 +2999,7 @@ for (;;)
|
||||||
|
|
||||||
The "could_continue" variable is true if a state could have continued but
|
The "could_continue" variable is true if a state could have continued but
|
||||||
for the fact that the end of the subject was reached. */
|
for the fact that the end of the subject was reached. */
|
||||||
|
|
||||||
if (new_count <= 0)
|
if (new_count <= 0)
|
||||||
{
|
{
|
||||||
if (rlevel == 1 && /* Top level, and */
|
if (rlevel == 1 && /* Top level, and */
|
||||||
|
@ -3098,7 +3098,7 @@ if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
|
||||||
/* Plausibility checks */
|
/* Plausibility checks */
|
||||||
|
|
||||||
if ((options & ~PUBLIC_DFA_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
|
if ((options & ~PUBLIC_DFA_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
|
||||||
if (re == NULL || subject == NULL || workspace == NULL || match_data == NULL)
|
if (re == NULL || subject == NULL || workspace == NULL || match_data == NULL)
|
||||||
return PCRE2_ERROR_NULL;
|
return PCRE2_ERROR_NULL;
|
||||||
if (wscount < 20) return PCRE2_ERROR_DFA_WSSIZE;
|
if (wscount < 20) return PCRE2_ERROR_DFA_WSSIZE;
|
||||||
if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
|
if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
|
||||||
|
@ -3127,19 +3127,19 @@ with different endianness. */
|
||||||
if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
|
if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
|
||||||
return PCRE2_ERROR_BADMODE;
|
return PCRE2_ERROR_BADMODE;
|
||||||
|
|
||||||
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
|
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
|
||||||
options variable for this function. Users of PCRE2 who are not calling the
|
options variable for this function. Users of PCRE2 who are not calling the
|
||||||
function directly would like to have a way of setting these flags, in the same
|
function directly would like to have a way of setting these flags, in the same
|
||||||
way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
|
way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
|
||||||
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
|
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
|
||||||
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be
|
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be
|
||||||
transferred to the options for this function. The bits are guaranteed to be
|
transferred to the options for this function. The bits are guaranteed to be
|
||||||
adjacent, but do not have the same values. This bit of Boolean trickery assumes
|
adjacent, but do not have the same values. This bit of Boolean trickery assumes
|
||||||
that the match-time bits are not more significant than the flag bits. If by
|
that the match-time bits are not more significant than the flag bits. If by
|
||||||
accident this is not the case, a compile-time division by zero error will
|
accident this is not the case, a compile-time division by zero error will
|
||||||
occur. */
|
occur. */
|
||||||
|
|
||||||
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
|
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
|
||||||
#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
|
#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
|
||||||
options |= (re->flags & FF) / ((FF & -FF) / (OO & -OO));
|
options |= (re->flags & FF) / ((FF & -FF) / (OO & -OO));
|
||||||
#undef FF
|
#undef FF
|
||||||
|
@ -3168,7 +3168,7 @@ end_subject = subject + length;
|
||||||
req_cu_ptr = start_match - 1;
|
req_cu_ptr = start_match - 1;
|
||||||
anchored = (options & (PCRE2_ANCHORED|PCRE2_DFA_RESTART)) != 0 ||
|
anchored = (options & (PCRE2_ANCHORED|PCRE2_DFA_RESTART)) != 0 ||
|
||||||
(re->overall_options & PCRE2_ANCHORED) != 0;
|
(re->overall_options & PCRE2_ANCHORED) != 0;
|
||||||
|
|
||||||
/* The "must be at the start of a line" flags are used in a loop when finding
|
/* The "must be at the start of a line" flags are used in a loop when finding
|
||||||
where to start. */
|
where to start. */
|
||||||
|
|
||||||
|
@ -3307,7 +3307,7 @@ for (;;)
|
||||||
/* There are some optimizations that avoid running the match if a known
|
/* There are some optimizations that avoid running the match if a known
|
||||||
starting point is not found, or if a known later code unit is not present.
|
starting point is not found, or if a known later code unit is not present.
|
||||||
However, there is an option (settable at compile time) that disables
|
However, there is an option (settable at compile time) that disables
|
||||||
these, for testing and for ensuring that all callouts do actually occur.
|
these, for testing and for ensuring that all callouts do actually occur.
|
||||||
The optimizations must also be avoided when restarting a DFA match. */
|
The optimizations must also be avoided when restarting a DFA match. */
|
||||||
|
|
||||||
if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 &&
|
if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 &&
|
||||||
|
@ -3493,7 +3493,7 @@ for (;;)
|
||||||
|
|
||||||
/* Anything other than "no match" means we are done, always; otherwise, carry
|
/* Anything other than "no match" means we are done, always; otherwise, carry
|
||||||
on only if not anchored. */
|
on only if not anchored. */
|
||||||
|
|
||||||
if (rc != PCRE2_ERROR_NOMATCH || anchored)
|
if (rc != PCRE2_ERROR_NOMATCH || anchored)
|
||||||
{
|
{
|
||||||
if (rc == PCRE2_ERROR_PARTIAL && match_data->oveccount > 0)
|
if (rc == PCRE2_ERROR_PARTIAL && match_data->oveccount > 0)
|
||||||
|
@ -3504,7 +3504,7 @@ for (;;)
|
||||||
match_data->leftchar = (PCRE2_SIZE)(mb->start_used_ptr - subject);
|
match_data->leftchar = (PCRE2_SIZE)(mb->start_used_ptr - subject);
|
||||||
match_data->rightchar = mb->last_used_ptr - subject;
|
match_data->rightchar = mb->last_used_ptr - subject;
|
||||||
match_data->startchar = (PCRE2_SIZE)(start_match - subject);
|
match_data->startchar = (PCRE2_SIZE)(start_match - subject);
|
||||||
match_data->rc = rc;
|
match_data->rc = rc;
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -48,7 +48,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define STRING(a) # a
|
#define STRING(a) # a
|
||||||
#define XSTRING(s) STRING(s)
|
#define XSTRING(s) STRING(s)
|
||||||
|
|
||||||
/* The texts of compile-time error messages. Compile-time error numbers start
|
/* The texts of compile-time error messages. Compile-time error numbers start
|
||||||
at COMPILE_ERROR_BASE (100).
|
at COMPILE_ERROR_BASE (100).
|
||||||
|
|
||||||
Do not ever re-use any error number, because they are documented. Always add a
|
Do not ever re-use any error number, because they are documented. Always add a
|
||||||
|
@ -101,7 +101,7 @@ static const char compile_error_texts[] =
|
||||||
"(?R or (?[+-]digits must be followed by )\0"
|
"(?R or (?[+-]digits must be followed by )\0"
|
||||||
/* 30 */
|
/* 30 */
|
||||||
"unknown POSIX class name\0"
|
"unknown POSIX class name\0"
|
||||||
"internal error in pcre2_study(): should not occur\0"
|
"internal error in pcre2_study(): should not occur\0"
|
||||||
"this version of PCRE does not have UTF or Unicode property support\0"
|
"this version of PCRE does not have UTF or Unicode property support\0"
|
||||||
"parentheses are too deeply nested (stack check)\0"
|
"parentheses are too deeply nested (stack check)\0"
|
||||||
"character code point value in \\x{} or \\o{} is too large\0"
|
"character code point value in \\x{} or \\o{} is too large\0"
|
||||||
|
@ -158,94 +158,94 @@ static const char compile_error_texts[] =
|
||||||
"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
|
"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
|
||||||
"character code point value in \\u.... sequence is too large\0"
|
"character code point value in \\u.... sequence is too large\0"
|
||||||
"digits missing in \\x{} or \\o{}\0"
|
"digits missing in \\x{} or \\o{}\0"
|
||||||
"syntax error in (?(VERSION condition\0"
|
"syntax error in (?(VERSION condition\0"
|
||||||
;
|
;
|
||||||
|
|
||||||
/* Match-time and UTF error texts are in the same format. */
|
/* Match-time and UTF error texts are in the same format. */
|
||||||
|
|
||||||
static const char match_error_texts[] =
|
static const char match_error_texts[] =
|
||||||
"no error\0"
|
"no error\0"
|
||||||
"no match\0"
|
"no match\0"
|
||||||
"partial match\0"
|
"partial match\0"
|
||||||
"UTF-8 error: 1 byte missing at end\0"
|
"UTF-8 error: 1 byte missing at end\0"
|
||||||
"UTF-8 error: 2 bytes missing at end\0"
|
"UTF-8 error: 2 bytes missing at end\0"
|
||||||
/* 5 */
|
/* 5 */
|
||||||
"UTF-8 error: 3 bytes missing at end\0"
|
"UTF-8 error: 3 bytes missing at end\0"
|
||||||
"UTF-8 error: 4 bytes missing at end\0"
|
"UTF-8 error: 4 bytes missing at end\0"
|
||||||
"UTF-8 error: 5 bytes missing at end\0"
|
"UTF-8 error: 5 bytes missing at end\0"
|
||||||
"UTF-8 error: byte 2 top bits not 0x80\0"
|
"UTF-8 error: byte 2 top bits not 0x80\0"
|
||||||
"UTF-8 error: byte 3 top bits not 0x80\0"
|
"UTF-8 error: byte 3 top bits not 0x80\0"
|
||||||
/* 10 */
|
/* 10 */
|
||||||
"UTF-8 error: byte 4 top bits not 0x80\0"
|
"UTF-8 error: byte 4 top bits not 0x80\0"
|
||||||
"UTF-8 error: byte 5 top bits not 0x80\0"
|
"UTF-8 error: byte 5 top bits not 0x80\0"
|
||||||
"UTF-8 error: byte 6 top bits not 0x80\0"
|
"UTF-8 error: byte 6 top bits not 0x80\0"
|
||||||
"UTF-8 error: 5-byte character is not allowed (RFC 3629)\0"
|
"UTF-8 error: 5-byte character is not allowed (RFC 3629)\0"
|
||||||
"UTF-8 error: 6-byte character is not allowed (RFC 3629)\0"
|
"UTF-8 error: 6-byte character is not allowed (RFC 3629)\0"
|
||||||
/* 15 */
|
/* 15 */
|
||||||
"UTF-8 error: code points greater than 0x10ffff are not defined\0"
|
"UTF-8 error: code points greater than 0x10ffff are not defined\0"
|
||||||
"UTF-8 error: code points 0xd800-0xdfff are not defined\0"
|
"UTF-8 error: code points 0xd800-0xdfff are not defined\0"
|
||||||
"UTF-8 error: overlong 2-byte sequence\0"
|
"UTF-8 error: overlong 2-byte sequence\0"
|
||||||
"UTF-8 error: overlong 3-byte sequence\0"
|
"UTF-8 error: overlong 3-byte sequence\0"
|
||||||
"UTF-8 error: overlong 4-byte sequence\0"
|
"UTF-8 error: overlong 4-byte sequence\0"
|
||||||
/* 20 */
|
/* 20 */
|
||||||
"UTF-8 error: overlong 5-byte sequence\0"
|
"UTF-8 error: overlong 5-byte sequence\0"
|
||||||
"UTF-8 error: overlong 6-byte sequence\0"
|
"UTF-8 error: overlong 6-byte sequence\0"
|
||||||
"UTF-8 error: isolated 0x80 byte\0"
|
"UTF-8 error: isolated 0x80 byte\0"
|
||||||
"UTF-8 error: illegal byte (0xfe or 0xff)\0"
|
"UTF-8 error: illegal byte (0xfe or 0xff)\0"
|
||||||
"UTF-16 error: missing low surrogate at end\0"
|
"UTF-16 error: missing low surrogate at end\0"
|
||||||
/* 25 */
|
/* 25 */
|
||||||
"UTF-16 error: invalid low surrogate\0"
|
"UTF-16 error: invalid low surrogate\0"
|
||||||
"UTF-16 error: isolated low surrogate\0"
|
"UTF-16 error: isolated low surrogate\0"
|
||||||
"UTF-32 error: code points 0xd800-0xdfff are not defined\0"
|
"UTF-32 error: code points 0xd800-0xdfff are not defined\0"
|
||||||
"UTF-32 error: code points greater than 0x10ffff are not defined\0"
|
"UTF-32 error: code points greater than 0x10ffff are not defined\0"
|
||||||
"bad data value\0"
|
"bad data value\0"
|
||||||
/* 30 */
|
/* 30 */
|
||||||
"bad length\0"
|
"bad length\0"
|
||||||
"magic number missing\0"
|
"magic number missing\0"
|
||||||
"pattern compiled in wrong mode: 8/16/32-bit error\0"
|
"pattern compiled in wrong mode: 8/16/32-bit error\0"
|
||||||
"bad offset value\0"
|
"bad offset value\0"
|
||||||
"bad option value\0"
|
"bad option value\0"
|
||||||
/* 35 */
|
/* 35 */
|
||||||
"bad offset into UTF string\0"
|
"bad offset into UTF string\0"
|
||||||
"callout error code\0" /* Never returned by PCRE2 itself */
|
"callout error code\0" /* Never returned by PCRE2 itself */
|
||||||
"invalid data in workspace for DFA restart\0"
|
"invalid data in workspace for DFA restart\0"
|
||||||
"too much recursion for DFA matching\0"
|
"too much recursion for DFA matching\0"
|
||||||
"backreference condition or recursion test not supported for DFA matching\0"
|
"backreference condition or recursion test not supported for DFA matching\0"
|
||||||
/* 40 */
|
/* 40 */
|
||||||
"item unsupported for DFA matching\0"
|
"item unsupported for DFA matching\0"
|
||||||
"workspace size exceeded in DFA matching\0"
|
"workspace size exceeded in DFA matching\0"
|
||||||
"internal error - pattern overwritten?\0"
|
"internal error - pattern overwritten?\0"
|
||||||
"bad JIT option\0"
|
"bad JIT option\0"
|
||||||
"JIT stack limit reached\0"
|
"JIT stack limit reached\0"
|
||||||
/* 45 */
|
/* 45 */
|
||||||
"match limit exceeded\0"
|
"match limit exceeded\0"
|
||||||
"no more memory\0"
|
"no more memory\0"
|
||||||
"unknown or unset substring\0"
|
"unknown or unset substring\0"
|
||||||
"NULL argument passed\0"
|
"NULL argument passed\0"
|
||||||
"nested recursion at the same subject position\0"
|
"nested recursion at the same subject position\0"
|
||||||
/* 50 */
|
/* 50 */
|
||||||
"recursion limit exceeded\0"
|
"recursion limit exceeded\0"
|
||||||
"requested value is not set\0"
|
"requested value is not set\0"
|
||||||
;
|
;
|
||||||
|
|
||||||
|
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Return error message *
|
* Return error message *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* This function copies an error message into a buffer whose units are of an
|
/* This function copies an error message into a buffer whose units are of an
|
||||||
appropriate width. Error numbers are positive for compile-time errors, and
|
appropriate width. Error numbers are positive for compile-time errors, and
|
||||||
negative for match-time errors (except for UTF errors), but the numbers are all
|
negative for match-time errors (except for UTF errors), but the numbers are all
|
||||||
distinct.
|
distinct.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
enumber error number
|
enumber error number
|
||||||
buffer where to put the message (zero terminated)
|
buffer where to put the message (zero terminated)
|
||||||
size size of the buffer
|
size size of the buffer
|
||||||
|
|
||||||
Returns: length of message if all is well
|
Returns: length of message if all is well
|
||||||
negative on error
|
negative on error
|
||||||
*/
|
*/
|
||||||
|
|
||||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
pcre2_get_error_message(int enumber, PCRE2_UCHAR *buffer, size_t size)
|
pcre2_get_error_message(int enumber, PCRE2_UCHAR *buffer, size_t size)
|
||||||
|
@ -260,23 +260,23 @@ if (size == 0) return PCRE2_ERROR_NOMEMORY;
|
||||||
if (enumber > COMPILE_ERROR_BASE) /* Compile error */
|
if (enumber > COMPILE_ERROR_BASE) /* Compile error */
|
||||||
{
|
{
|
||||||
message = compile_error_texts;
|
message = compile_error_texts;
|
||||||
n = enumber - COMPILE_ERROR_BASE;
|
n = enumber - COMPILE_ERROR_BASE;
|
||||||
}
|
}
|
||||||
else /* Match or UTF error */
|
else /* Match or UTF error */
|
||||||
{
|
{
|
||||||
message = match_error_texts;
|
message = match_error_texts;
|
||||||
n = -enumber;
|
n = -enumber;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (; n > 0; n--)
|
for (; n > 0; n--)
|
||||||
{
|
{
|
||||||
while (*message++ != CHAR_NULL) {};
|
while (*message++ != CHAR_NULL) {};
|
||||||
if (*message == CHAR_NULL)
|
if (*message == CHAR_NULL)
|
||||||
{
|
{
|
||||||
sprintf(xbuff, "Internal error: no text for error %d", enumber);
|
sprintf(xbuff, "Internal error: no text for error %d", enumber);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; *message != 0; i++)
|
for (i = 0; *message != 0; i++)
|
||||||
{
|
{
|
||||||
|
@ -287,9 +287,9 @@ for (i = 0; *message != 0; i++)
|
||||||
}
|
}
|
||||||
buffer[i] = *message++;
|
buffer[i] = *message++;
|
||||||
}
|
}
|
||||||
|
|
||||||
buffer[i] = 0;
|
buffer[i] = 0;
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* End of pcre2_error.c */
|
/* End of pcre2_error.c */
|
||||||
|
|
|
@ -1553,11 +1553,11 @@ enum {
|
||||||
/* This is used to skip a subpattern with a {0} quantifier */
|
/* This is used to skip a subpattern with a {0} quantifier */
|
||||||
|
|
||||||
OP_SKIPZERO, /* 162 */
|
OP_SKIPZERO, /* 162 */
|
||||||
|
|
||||||
/* This is used to identify a DEFINE group during compilation so that it can
|
/* This is used to identify a DEFINE group during compilation so that it can
|
||||||
be checked for having only one branch. It is changed to OP_FALSE before
|
be checked for having only one branch. It is changed to OP_FALSE before
|
||||||
compilation finishes. */
|
compilation finishes. */
|
||||||
|
|
||||||
OP_DEFINE, /* 163 */
|
OP_DEFINE, /* 163 */
|
||||||
|
|
||||||
/* This is not an opcode, but is used to check that tables indexed by opcode
|
/* This is not an opcode, but is used to check that tables indexed by opcode
|
||||||
|
@ -1565,7 +1565,7 @@ enum {
|
||||||
some in the past. */
|
some in the past. */
|
||||||
|
|
||||||
OP_TABLE_LENGTH
|
OP_TABLE_LENGTH
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
|
/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
|
||||||
|
@ -1708,7 +1708,7 @@ in UTF-8 mode. The code that uses this table must know about such things. */
|
||||||
1, 3, /* THEN, THEN_ARG */ \
|
1, 3, /* THEN, THEN_ARG */ \
|
||||||
1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \
|
1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \
|
||||||
1+IMM2_SIZE, 1, /* CLOSE, SKIPZERO */ \
|
1+IMM2_SIZE, 1, /* CLOSE, SKIPZERO */ \
|
||||||
1 /* DEFINE */
|
1 /* DEFINE */
|
||||||
|
|
||||||
/* A magic value for OP_RREF to indicate the "any recursion" condition. */
|
/* A magic value for OP_RREF to indicate the "any recursion" condition. */
|
||||||
|
|
||||||
|
@ -1830,9 +1830,9 @@ extern const uint8_t PRIV(ucd_stage1)[];
|
||||||
extern const uint16_t PRIV(ucd_stage2)[];
|
extern const uint16_t PRIV(ucd_stage2)[];
|
||||||
extern const uint32_t PRIV(ucp_gbtable)[];
|
extern const uint32_t PRIV(ucp_gbtable)[];
|
||||||
extern const uint32_t PRIV(ucp_gentype)[];
|
extern const uint32_t PRIV(ucp_gentype)[];
|
||||||
#ifdef SUPPORT_JIT
|
#ifdef SUPPORT_JIT
|
||||||
extern const int PRIV(ucp_typerange)[];
|
extern const int PRIV(ucp_typerange)[];
|
||||||
#endif
|
#endif
|
||||||
extern const char *PRIV(unicode_version);
|
extern const char *PRIV(unicode_version);
|
||||||
extern const ucp_type_table PRIV(utt)[];
|
extern const ucp_type_table PRIV(utt)[];
|
||||||
extern const char PRIV(utt_names)[];
|
extern const char PRIV(utt_names)[];
|
||||||
|
|
|
@ -39,16 +39,16 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
/* This module contains mode-dependent macro and structure definitions. The
|
/* This module contains mode-dependent macro and structure definitions. The
|
||||||
file is #included by pcre2_internal.h if PCRE2_CODE_UNIT_WIDTH is defined.
|
file is #included by pcre2_internal.h if PCRE2_CODE_UNIT_WIDTH is defined.
|
||||||
These mode-dependent items are kept in a separate file so that they can also be
|
These mode-dependent items are kept in a separate file so that they can also be
|
||||||
#included multiple times for different code unit widths by pcre2test in order
|
#included multiple times for different code unit widths by pcre2test in order
|
||||||
to have access to the hidden structures at all supported widths.
|
to have access to the hidden structures at all supported widths.
|
||||||
|
|
||||||
Some of the mode-dependent macros are required at different widths for
|
Some of the mode-dependent macros are required at different widths for
|
||||||
different parts of the pcre2test code (in particular, the included
|
different parts of the pcre2test code (in particular, the included
|
||||||
pcre_printint.c file). We undefine them here so that they can be re-defined for
|
pcre_printint.c file). We undefine them here so that they can be re-defined for
|
||||||
multiple inclusions. Not all of these are used in pcre2test, but it's easier
|
multiple inclusions. Not all of these are used in pcre2test, but it's easier
|
||||||
just to undefine them all. */
|
just to undefine them all. */
|
||||||
|
|
||||||
#undef ACROSSCHAR
|
#undef ACROSSCHAR
|
||||||
|
@ -93,7 +93,7 @@ request for an even bigger limit. For this reason, and also to make the code
|
||||||
easier to maintain, the storing and loading of offsets from the compiled code
|
easier to maintain, the storing and loading of offsets from the compiled code
|
||||||
unit string is now handled by the macros that are defined here.
|
unit string is now handled by the macros that are defined here.
|
||||||
|
|
||||||
The macros are controlled by the value of LINK_SIZE. This defaults to 2, but
|
The macros are controlled by the value of LINK_SIZE. This defaults to 2, but
|
||||||
values of 2 or 4 are also supported. */
|
values of 2 or 4 are also supported. */
|
||||||
|
|
||||||
/* ------------------- 8-bit support ------------------ */
|
/* ------------------- 8-bit support ------------------ */
|
||||||
|
@ -173,14 +173,14 @@ values of 2 or 4 are also supported. */
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#error Unsupported compiling mode
|
#error Unsupported compiling mode
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/* --------------- Other mode-specific macros ----------------- */
|
/* --------------- Other mode-specific macros ----------------- */
|
||||||
|
|
||||||
/* PCRE uses some other (at least) 16-bit quantities that do not change when
|
/* PCRE uses some other (at least) 16-bit quantities that do not change when
|
||||||
the size of offsets changes. There are used for repeat counts and for other
|
the size of offsets changes. There are used for repeat counts and for other
|
||||||
things such as capturing parenthesis numbers in back references.
|
things such as capturing parenthesis numbers in back references.
|
||||||
|
|
||||||
Define the number of code units required to hold a 16-bit count/offset, and
|
Define the number of code units required to hold a 16-bit count/offset, and
|
||||||
macros to load and store such a value. For reasons that I do not understand,
|
macros to load and store such a value. For reasons that I do not understand,
|
||||||
|
@ -196,7 +196,7 @@ arithmetic results in a signed value. Hence the cast. */
|
||||||
#else /* Code units are 16 or 32 bits */
|
#else /* Code units are 16 or 32 bits */
|
||||||
#define IMM2_SIZE 1
|
#define IMM2_SIZE 1
|
||||||
#define GET2(a,n) a[n]
|
#define GET2(a,n) a[n]
|
||||||
#define PUT2(a,n,d) a[n] = d
|
#define PUT2(a,n,d) a[n] = d
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Other macros that are different for 8-bit mode. The MAX_255 macro checks
|
/* Other macros that are different for 8-bit mode. The MAX_255 macro checks
|
||||||
|
@ -346,7 +346,7 @@ because almost all calls are already within a block of UTF-8 only code. */
|
||||||
/* Same as above, but it allows a fully customizable form. */
|
/* Same as above, but it allows a fully customizable form. */
|
||||||
#define ACROSSCHAR(condition, eptr, action) \
|
#define ACROSSCHAR(condition, eptr, action) \
|
||||||
while((condition) && ((eptr) & 0xc0) == 0x80) action
|
while((condition) && ((eptr) & 0xc0) == 0x80) action
|
||||||
|
|
||||||
/* Deposit a character into memory, returning the number of code units. */
|
/* Deposit a character into memory, returning the number of code units. */
|
||||||
|
|
||||||
#define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \
|
#define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \
|
||||||
|
@ -545,10 +545,10 @@ These are all no-ops since all UTF-32 characters fit into one pcre_uchar. */
|
||||||
|
|
||||||
/* ----------------------- HIDDEN STRUCTURES ----------------------------- */
|
/* ----------------------- HIDDEN STRUCTURES ----------------------------- */
|
||||||
|
|
||||||
/* NOTE: All these structures *must* start with a pcre2_memctl structure. The
|
/* NOTE: All these structures *must* start with a pcre2_memctl structure. The
|
||||||
code that uses them is simpler because it assumes this. */
|
code that uses them is simpler because it assumes this. */
|
||||||
|
|
||||||
/* The real general context structure. At present it holds only data for custom
|
/* The real general context structure. At present it holds only data for custom
|
||||||
memory control. */
|
memory control. */
|
||||||
|
|
||||||
typedef struct pcre2_real_general_context {
|
typedef struct pcre2_real_general_context {
|
||||||
|
@ -572,9 +572,9 @@ typedef struct pcre2_real_match_context {
|
||||||
pcre2_memctl memctl;
|
pcre2_memctl memctl;
|
||||||
#ifdef HEAP_MATCH_RECURSE
|
#ifdef HEAP_MATCH_RECURSE
|
||||||
pcre2_memctl stack_memctl;
|
pcre2_memctl stack_memctl;
|
||||||
#endif
|
#endif
|
||||||
int (*callout)(pcre2_callout_block *);
|
int (*callout)(pcre2_callout_block *);
|
||||||
void *callout_data;
|
void *callout_data;
|
||||||
uint32_t match_limit;
|
uint32_t match_limit;
|
||||||
uint32_t recursion_limit;
|
uint32_t recursion_limit;
|
||||||
} pcre2_real_match_context;
|
} pcre2_real_match_context;
|
||||||
|
@ -584,9 +584,9 @@ typedef struct pcre2_real_match_context {
|
||||||
typedef struct pcre2_real_code {
|
typedef struct pcre2_real_code {
|
||||||
pcre2_memctl memctl; /* Memory control fields */
|
pcre2_memctl memctl; /* Memory control fields */
|
||||||
const uint8_t *tables; /* The character tables */
|
const uint8_t *tables; /* The character tables */
|
||||||
void *executable_jit; /* Pointer to JIT code */
|
void *executable_jit; /* Pointer to JIT code */
|
||||||
uint8_t start_bitmap[32]; /* Bitmap for starting code unit < 256 */
|
uint8_t start_bitmap[32]; /* Bitmap for starting code unit < 256 */
|
||||||
size_t blocksize; /* Total (bytes) that was malloc-ed */
|
size_t blocksize; /* Total (bytes) that was malloc-ed */
|
||||||
uint32_t magic_number; /* Paranoid and endianness check */
|
uint32_t magic_number; /* Paranoid and endianness check */
|
||||||
uint32_t compile_options; /* Options passed to pcre2_compile() */
|
uint32_t compile_options; /* Options passed to pcre2_compile() */
|
||||||
uint32_t overall_options; /* Options after processing the pattern */
|
uint32_t overall_options; /* Options after processing the pattern */
|
||||||
|
@ -596,10 +596,10 @@ typedef struct pcre2_real_code {
|
||||||
uint32_t first_codeunit; /* Starting code unit */
|
uint32_t first_codeunit; /* Starting code unit */
|
||||||
uint32_t last_codeunit; /* This codeunit must be seen */
|
uint32_t last_codeunit; /* This codeunit must be seen */
|
||||||
uint16_t bsr_convention; /* What \R matches */
|
uint16_t bsr_convention; /* What \R matches */
|
||||||
uint16_t newline_convention; /* What is a newline? */
|
uint16_t newline_convention; /* What is a newline? */
|
||||||
uint16_t max_lookbehind; /* Longest lookbehind (characters) */
|
uint16_t max_lookbehind; /* Longest lookbehind (characters) */
|
||||||
uint16_t minlength; /* Minimum length of match */
|
uint16_t minlength; /* Minimum length of match */
|
||||||
uint16_t top_bracket; /* Highest numbered group */
|
uint16_t top_bracket; /* Highest numbered group */
|
||||||
uint16_t top_backref; /* Highest numbered back reference */
|
uint16_t top_backref; /* Highest numbered back reference */
|
||||||
uint16_t name_entry_size; /* Size (code units) of table entries */
|
uint16_t name_entry_size; /* Size (code units) of table entries */
|
||||||
uint16_t name_count; /* Number of name entries in the table */
|
uint16_t name_count; /* Number of name entries in the table */
|
||||||
|
@ -614,10 +614,10 @@ typedef struct pcre2_real_match_data {
|
||||||
int rc; /* The return code from the match */
|
int rc; /* The return code from the match */
|
||||||
PCRE2_SIZE leftchar; /* Offset to leftmost code unit */
|
PCRE2_SIZE leftchar; /* Offset to leftmost code unit */
|
||||||
PCRE2_SIZE rightchar; /* Offset to rightmost code unit */
|
PCRE2_SIZE rightchar; /* Offset to rightmost code unit */
|
||||||
PCRE2_SIZE startchar; /* Offset to starting code unit */
|
PCRE2_SIZE startchar; /* Offset to starting code unit */
|
||||||
PCRE2_SPTR mark; /* Pointer to last mark */
|
PCRE2_SPTR mark; /* Pointer to last mark */
|
||||||
uint16_t oveccount; /* Number of pairs */
|
uint16_t oveccount; /* Number of pairs */
|
||||||
PCRE2_SIZE ovector[1]; /* The first field */
|
PCRE2_SIZE ovector[1]; /* The first field */
|
||||||
} pcre2_real_match_data;
|
} pcre2_real_match_data;
|
||||||
|
|
||||||
|
|
||||||
|
@ -700,7 +700,7 @@ the system stack. */
|
||||||
typedef struct ovecsave_frame {
|
typedef struct ovecsave_frame {
|
||||||
struct ovecsave_frame *next; /* Next frame on free chain */
|
struct ovecsave_frame *next; /* Next frame on free chain */
|
||||||
PCRE2_SIZE saved_ovec[1]; /* First vector element */
|
PCRE2_SIZE saved_ovec[1]; /* First vector element */
|
||||||
} ovecsave_frame;
|
} ovecsave_frame;
|
||||||
|
|
||||||
/* Structure for items in a linked list that represents an explicit recursive
|
/* Structure for items in a linked list that represents an explicit recursive
|
||||||
call within the pattern; used by pcre_match(). */
|
call within the pattern; used by pcre_match(). */
|
||||||
|
@ -738,7 +738,7 @@ typedef struct match_block {
|
||||||
pcre2_memctl memctl; /* For general use */
|
pcre2_memctl memctl; /* For general use */
|
||||||
#ifdef HEAP_MATCH_RECURSE
|
#ifdef HEAP_MATCH_RECURSE
|
||||||
pcre2_memctl stack_memctl; /* For "stack" frames */
|
pcre2_memctl stack_memctl; /* For "stack" frames */
|
||||||
#endif
|
#endif
|
||||||
uint32_t match_call_count; /* As it says */
|
uint32_t match_call_count; /* As it says */
|
||||||
uint32_t match_limit; /* As it says */
|
uint32_t match_limit; /* As it says */
|
||||||
uint32_t match_limit_recursion; /* As it says */
|
uint32_t match_limit_recursion; /* As it says */
|
||||||
|
@ -763,7 +763,7 @@ typedef struct match_block {
|
||||||
PCRE2_SPTR start_match_ptr; /* Start of matched string */
|
PCRE2_SPTR start_match_ptr; /* Start of matched string */
|
||||||
PCRE2_SPTR end_match_ptr; /* Subject position at end match */
|
PCRE2_SPTR end_match_ptr; /* Subject position at end match */
|
||||||
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
|
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
|
||||||
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
|
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
|
||||||
PCRE2_SPTR mark; /* Mark pointer to pass back on success */
|
PCRE2_SPTR mark; /* Mark pointer to pass back on success */
|
||||||
PCRE2_SPTR nomatch_mark; /* Mark pointer to pass back on failure */
|
PCRE2_SPTR nomatch_mark; /* Mark pointer to pass back on failure */
|
||||||
PCRE2_SPTR once_target; /* Where to back up to for atomic groups */
|
PCRE2_SPTR once_target; /* Where to back up to for atomic groups */
|
||||||
|
@ -778,7 +778,7 @@ typedef struct match_block {
|
||||||
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
|
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
|
||||||
eptrblock *eptrchain; /* Chain of eptrblocks for tail recursions */
|
eptrblock *eptrchain; /* Chain of eptrblocks for tail recursions */
|
||||||
recursion_info *recursive; /* Linked list of recursion data */
|
recursion_info *recursive; /* Linked list of recursion data */
|
||||||
ovecsave_frame *ovecsave_chain; /* Linked list of free ovecsave blocks */
|
ovecsave_frame *ovecsave_chain; /* Linked list of free ovecsave blocks */
|
||||||
void *callout_data; /* To pass back to callouts */
|
void *callout_data; /* To pass back to callouts */
|
||||||
int (*callout)(pcre2_callout_block *); /* Callout function or NULL */
|
int (*callout)(pcre2_callout_block *); /* Callout function or NULL */
|
||||||
#ifdef HEAP_MATCH_RECURSE
|
#ifdef HEAP_MATCH_RECURSE
|
||||||
|
@ -795,7 +795,7 @@ typedef struct dfa_match_block {
|
||||||
PCRE2_SPTR start_subject ; /* Start of the subject string */
|
PCRE2_SPTR start_subject ; /* Start of the subject string */
|
||||||
PCRE2_SPTR end_subject; /* End of subject string */
|
PCRE2_SPTR end_subject; /* End of subject string */
|
||||||
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
|
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
|
||||||
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
|
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
|
||||||
const uint8_t *tables; /* Character tables */
|
const uint8_t *tables; /* Character tables */
|
||||||
PCRE2_SIZE start_offset; /* The start offset value */
|
PCRE2_SIZE start_offset; /* The start offset value */
|
||||||
uint32_t moptions; /* Match options */
|
uint32_t moptions; /* Match options */
|
||||||
|
|
|
@ -72,9 +72,9 @@ Arguments:
|
||||||
length length of subject string (may contain binary zeros)
|
length length of subject string (may contain binary zeros)
|
||||||
start_offset where to start in the subject string
|
start_offset where to start in the subject string
|
||||||
options option bits
|
options option bits
|
||||||
match_data points to a match_data block
|
match_data points to a match_data block
|
||||||
mcontext points to a match context
|
mcontext points to a match context
|
||||||
jit_stack points to a JIT stack
|
jit_stack points to a JIT stack
|
||||||
|
|
||||||
Returns: > 0 => success; value is the number of ovector pairs filled
|
Returns: > 0 => success; value is the number of ovector pairs filled
|
||||||
= 0 => success, but ovector is not big enough
|
= 0 => success, but ovector is not big enough
|
||||||
|
|
|
@ -60,9 +60,9 @@ compilation of dftables.c, in which case the macro DFTABLES is defined. */
|
||||||
/* This function builds a set of character tables for use by PCRE2 and returns
|
/* This function builds a set of character tables for use by PCRE2 and returns
|
||||||
a pointer to them. They are build using the ctype functions, and consequently
|
a pointer to them. They are build using the ctype functions, and consequently
|
||||||
their contents will depend upon the current locale setting. When compiled as
|
their contents will depend upon the current locale setting. When compiled as
|
||||||
part of the library, the store is obtained via a general context malloc, if
|
part of the library, the store is obtained via a general context malloc, if
|
||||||
supplied, but when DFTABLES is defined (when compiling the dftables auxiliary
|
supplied, but when DFTABLES is defined (when compiling the dftables auxiliary
|
||||||
program) malloc() is used, and the function has a different name so as not to
|
program) malloc() is used, and the function has a different name so as not to
|
||||||
clash with the prototype in pcre2.h.
|
clash with the prototype in pcre2.h.
|
||||||
|
|
||||||
Arguments: none when DFTABLES is defined
|
Arguments: none when DFTABLES is defined
|
||||||
|
|
|
@ -56,7 +56,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||||
(PCRE2_ANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
|
(PCRE2_ANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
|
||||||
PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \
|
PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \
|
||||||
PCRE2_PARTIAL_SOFT)
|
PCRE2_PARTIAL_SOFT)
|
||||||
|
|
||||||
#define PUBLIC_JIT_MATCH_OPTIONS \
|
#define PUBLIC_JIT_MATCH_OPTIONS \
|
||||||
(PCRE2_NO_UTF_CHECK|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY|\
|
(PCRE2_NO_UTF_CHECK|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY|\
|
||||||
PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_SOFT|PCRE2_PARTIAL_HARD)
|
PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_SOFT|PCRE2_PARTIAL_HARD)
|
||||||
|
@ -125,24 +125,24 @@ ovector length is always a multiple of 3. */
|
||||||
/* This function is called only when it is known that the offset lies within
|
/* This function is called only when it is known that the offset lies within
|
||||||
the offsets that have so far been used in the match. Note that in caseless
|
the offsets that have so far been used in the match. Note that in caseless
|
||||||
UTF-8 mode, the number of subject bytes matched may be different to the number
|
UTF-8 mode, the number of subject bytes matched may be different to the number
|
||||||
of reference bytes. (In theory this could also happen in UTF-16 mode, but it
|
of reference bytes. (In theory this could also happen in UTF-16 mode, but it
|
||||||
seems unlikely.)
|
seems unlikely.)
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
offset index into the offset vector
|
offset index into the offset vector
|
||||||
offset_top top of the used offset vector
|
offset_top top of the used offset vector
|
||||||
eptr pointer into the subject
|
eptr pointer into the subject
|
||||||
mb points to match block
|
mb points to match block
|
||||||
caseless TRUE if caseless
|
caseless TRUE if caseless
|
||||||
lengthptr pointer for returning the length matched
|
lengthptr pointer for returning the length matched
|
||||||
|
|
||||||
Returns: = 0 sucessful match; number of code units matched is set
|
Returns: = 0 sucessful match; number of code units matched is set
|
||||||
< 0 no match
|
< 0 no match
|
||||||
> 0 partial match
|
> 0 partial match
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static int
|
static int
|
||||||
match_ref(PCRE2_SIZE offset, PCRE2_SIZE offset_top, register PCRE2_SPTR eptr,
|
match_ref(PCRE2_SIZE offset, PCRE2_SIZE offset_top, register PCRE2_SPTR eptr,
|
||||||
match_block *mb, BOOL caseless, PCRE2_SIZE *lengthptr)
|
match_block *mb, BOOL caseless, PCRE2_SIZE *lengthptr)
|
||||||
{
|
{
|
||||||
#if defined SUPPORT_UNICODE
|
#if defined SUPPORT_UNICODE
|
||||||
|
@ -153,7 +153,7 @@ register PCRE2_SPTR p;
|
||||||
PCRE2_SIZE length;
|
PCRE2_SIZE length;
|
||||||
PCRE2_SPTR eptr_start = eptr;
|
PCRE2_SPTR eptr_start = eptr;
|
||||||
|
|
||||||
/* Deal with an unset group. The default is no match, but there is an option to
|
/* Deal with an unset group. The default is no match, but there is an option to
|
||||||
match an empty string. */
|
match an empty string. */
|
||||||
|
|
||||||
if (offset >= offset_top || mb->ovector[offset] == PCRE2_UNSET)
|
if (offset >= offset_top || mb->ovector[offset] == PCRE2_UNSET)
|
||||||
|
@ -164,7 +164,7 @@ if (offset >= offset_top || mb->ovector[offset] == PCRE2_UNSET)
|
||||||
return 0; /* Match */
|
return 0; /* Match */
|
||||||
}
|
}
|
||||||
else return -1; /* No match */
|
else return -1; /* No match */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Separate the caseless and UTF cases for speed. */
|
/* Separate the caseless and UTF cases for speed. */
|
||||||
|
|
||||||
|
@ -217,7 +217,7 @@ if (caseless)
|
||||||
if (eptr >= mb->end_subject) return 1; /* Partial match */
|
if (eptr >= mb->end_subject) return 1; /* Partial match */
|
||||||
cc = UCHAR21TEST(eptr);
|
cc = UCHAR21TEST(eptr);
|
||||||
cp = UCHAR21TEST(p);
|
cp = UCHAR21TEST(p);
|
||||||
if (TABLE_GET(cp, mb->lcc, cp) != TABLE_GET(cc, mb->lcc, cc))
|
if (TABLE_GET(cp, mb->lcc, cp) != TABLE_GET(cc, mb->lcc, cc))
|
||||||
return -1; /* No match */
|
return -1; /* No match */
|
||||||
p++;
|
p++;
|
||||||
eptr++;
|
eptr++;
|
||||||
|
@ -345,7 +345,7 @@ argument of match(), which never changes. */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Structure for remembering the local variables in a private frame. Arrange it
|
/* Structure for remembering the local variables in a private frame. Arrange it
|
||||||
so as to minimize the number of holes. */
|
so as to minimize the number of holes. */
|
||||||
|
|
||||||
typedef struct heapframe {
|
typedef struct heapframe {
|
||||||
|
@ -364,7 +364,7 @@ typedef struct heapframe {
|
||||||
PCRE2_SPTR Xpp;
|
PCRE2_SPTR Xpp;
|
||||||
PCRE2_SPTR Xprev;
|
PCRE2_SPTR Xprev;
|
||||||
PCRE2_SPTR Xsaved_eptr;
|
PCRE2_SPTR Xsaved_eptr;
|
||||||
|
|
||||||
eptrblock *Xeptrb;
|
eptrblock *Xeptrb;
|
||||||
|
|
||||||
PCRE2_SIZE Xlength;
|
PCRE2_SIZE Xlength;
|
||||||
|
@ -377,7 +377,7 @@ typedef struct heapframe {
|
||||||
uint32_t Xrdepth;
|
uint32_t Xrdepth;
|
||||||
uint32_t Xop;
|
uint32_t Xop;
|
||||||
uint32_t Xsave_capture_last;
|
uint32_t Xsave_capture_last;
|
||||||
|
|
||||||
#ifdef SUPPORT_UNICODE
|
#ifdef SUPPORT_UNICODE
|
||||||
uint32_t Xprop_value;
|
uint32_t Xprop_value;
|
||||||
int Xprop_type;
|
int Xprop_type;
|
||||||
|
@ -401,7 +401,7 @@ typedef struct heapframe {
|
||||||
|
|
||||||
#ifdef SUPPORT_UNICODE
|
#ifdef SUPPORT_UNICODE
|
||||||
PCRE2_UCHAR Xocchars[6];
|
PCRE2_UCHAR Xocchars[6];
|
||||||
#endif
|
#endif
|
||||||
} heapframe;
|
} heapframe;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -414,9 +414,9 @@ typedef struct heapframe {
|
||||||
/* When HEAP_MATCH_RECURSE is not defined, the match() function implements
|
/* When HEAP_MATCH_RECURSE is not defined, the match() function implements
|
||||||
backtrack points by calling itself recursively in all but one case. The one
|
backtrack points by calling itself recursively in all but one case. The one
|
||||||
special case is when processing OP_RECURSE, which specifies recursion in the
|
special case is when processing OP_RECURSE, which specifies recursion in the
|
||||||
pattern. The entire ovector must be saved and restored while processing
|
pattern. The entire ovector must be saved and restored while processing
|
||||||
OP_RECURSE. If the ovector is small enough, instead of calling match()
|
OP_RECURSE. If the ovector is small enough, instead of calling match()
|
||||||
directly, op_recurse_ovecsave() is called. This function uses the system stack
|
directly, op_recurse_ovecsave() is called. This function uses the system stack
|
||||||
to save the ovector while calling match() to process the pattern recursion. */
|
to save the ovector while calling match() to process the pattern recursion. */
|
||||||
|
|
||||||
#ifndef HEAP_MATCH_RECURSE
|
#ifndef HEAP_MATCH_RECURSE
|
||||||
|
@ -425,7 +425,7 @@ to save the ovector while calling match() to process the pattern recursion. */
|
||||||
op_recurse_ovecsave(). */
|
op_recurse_ovecsave(). */
|
||||||
|
|
||||||
static int
|
static int
|
||||||
match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart,
|
match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart,
|
||||||
PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb, uint32_t rdepth);
|
PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb, uint32_t rdepth);
|
||||||
|
|
||||||
|
|
||||||
|
@ -433,7 +433,7 @@ match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart,
|
||||||
* Process OP_RECURSE, stacking ovector *
|
* Process OP_RECURSE, stacking ovector *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* When this function is called, mb->recursive has already been updated to
|
/* When this function is called, mb->recursive has already been updated to
|
||||||
point to a new recursion data block, and all its fields other than ovec_save
|
point to a new recursion data block, and all its fields other than ovec_save
|
||||||
have been set.
|
have been set.
|
||||||
|
|
||||||
|
@ -447,9 +447,9 @@ Arguments:
|
||||||
eptrb pointer to chain of blocks containing eptr at start of
|
eptrb pointer to chain of blocks containing eptr at start of
|
||||||
brackets - for testing for empty matches
|
brackets - for testing for empty matches
|
||||||
rdepth the recursion depth
|
rdepth the recursion depth
|
||||||
|
|
||||||
Returns: a match() return code
|
Returns: a match() return code
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static int
|
static int
|
||||||
op_recurse_ovecsave(REGISTER PCRE2_SPTR eptr, PCRE2_SPTR callpat,
|
op_recurse_ovecsave(REGISTER PCRE2_SPTR eptr, PCRE2_SPTR callpat,
|
||||||
|
@ -472,7 +472,7 @@ data and the last captured value. */
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
if (cbegroup) mb->match_function_type = MATCH_CBEGROUP;
|
if (cbegroup) mb->match_function_type = MATCH_CBEGROUP;
|
||||||
rrc = match(eptr, callpat + PRIV(OP_lengths)[*callpat], mstart, offset_top,
|
rrc = match(eptr, callpat + PRIV(OP_lengths)[*callpat], mstart, offset_top,
|
||||||
mb, eptrb, rdepth + 1);
|
mb, eptrb, rdepth + 1);
|
||||||
memcpy(mb->ovector, new_recursive->ovec_save,
|
memcpy(mb->ovector, new_recursive->ovec_save,
|
||||||
mb->offset_end * sizeof(PCRE2_SIZE));
|
mb->offset_end * sizeof(PCRE2_SIZE));
|
||||||
|
@ -560,7 +560,7 @@ Returns: MATCH_MATCH if matched ) these values are >= 0
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static int
|
static int
|
||||||
match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart,
|
match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart,
|
||||||
PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb, uint32_t rdepth)
|
PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb, uint32_t rdepth)
|
||||||
{
|
{
|
||||||
/* These variables do not need to be preserved over recursion in this function,
|
/* These variables do not need to be preserved over recursion in this function,
|
||||||
|
@ -1382,10 +1382,10 @@ for (;;)
|
||||||
|
|
||||||
case OP_FALSE:
|
case OP_FALSE:
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OP_TRUE:
|
case OP_TRUE:
|
||||||
condition = TRUE;
|
condition = TRUE;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* The condition is an assertion. Call match() to evaluate it - setting
|
/* The condition is an assertion. Call match() to evaluate it - setting
|
||||||
mb->match_function_type to MATCH_CONDASSERT causes it to stop at the end
|
mb->match_function_type to MATCH_CONDASSERT causes it to stop at the end
|
||||||
|
@ -1475,7 +1475,7 @@ for (;;)
|
||||||
update the last used pointer. */
|
update the last used pointer. */
|
||||||
|
|
||||||
case OP_ASSERT_ACCEPT:
|
case OP_ASSERT_ACCEPT:
|
||||||
if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
|
if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
|
||||||
|
|
||||||
case OP_ACCEPT:
|
case OP_ACCEPT:
|
||||||
case OP_END:
|
case OP_END:
|
||||||
|
@ -1735,7 +1735,7 @@ for (;;)
|
||||||
|
|
||||||
case OP_RECURSE:
|
case OP_RECURSE:
|
||||||
{
|
{
|
||||||
ovecsave_frame *fr;
|
ovecsave_frame *fr;
|
||||||
recursion_info *ri;
|
recursion_info *ri;
|
||||||
uint32_t recno;
|
uint32_t recno;
|
||||||
|
|
||||||
|
@ -1762,15 +1762,15 @@ for (;;)
|
||||||
|
|
||||||
ecode += 1 + LINK_SIZE;
|
ecode += 1 + LINK_SIZE;
|
||||||
|
|
||||||
/* When we are using the system stack for match() recursion we can call a
|
/* When we are using the system stack for match() recursion we can call a
|
||||||
function that uses the system stack for preserving the ovector while
|
function that uses the system stack for preserving the ovector while
|
||||||
processing the pattern recursion, but only if the ovector is small
|
processing the pattern recursion, but only if the ovector is small
|
||||||
enough. */
|
enough. */
|
||||||
|
|
||||||
#ifndef HEAP_MATCH_RECURSE
|
#ifndef HEAP_MATCH_RECURSE
|
||||||
if (mb->offset_end <= OP_RECURSE_STACK_SAVE_MAX)
|
if (mb->offset_end <= OP_RECURSE_STACK_SAVE_MAX)
|
||||||
{
|
{
|
||||||
rrc = op_recurse_ovecsave(eptr, callpat, mstart, offset_top, mb,
|
rrc = op_recurse_ovecsave(eptr, callpat, mstart, offset_top, mb,
|
||||||
eptrb, rdepth);
|
eptrb, rdepth);
|
||||||
mb->recursive = new_recursive.prevrec;
|
mb->recursive = new_recursive.prevrec;
|
||||||
if (rrc != MATCH_MATCH && rrc != MATCH_ACCEPT) RRETURN(rrc);
|
if (rrc != MATCH_MATCH && rrc != MATCH_ACCEPT) RRETURN(rrc);
|
||||||
|
@ -1785,10 +1785,10 @@ for (;;)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
/* If the ovector is too big, or if we are using the heap for match()
|
/* If the ovector is too big, or if we are using the heap for match()
|
||||||
recursion, we have to use the heap for saving the ovector. Used ovecsave
|
recursion, we have to use the heap for saving the ovector. Used ovecsave
|
||||||
frames are kept on a chain and re-used. This makes a small improvement in
|
frames are kept on a chain and re-used. This makes a small improvement in
|
||||||
execution time on Linux. */
|
execution time on Linux. */
|
||||||
|
|
||||||
if (mb->ovecsave_chain != NULL)
|
if (mb->ovecsave_chain != NULL)
|
||||||
{
|
{
|
||||||
new_recursive.ovec_save = mb->ovecsave_chain->saved_ovec;
|
new_recursive.ovec_save = mb->ovecsave_chain->saved_ovec;
|
||||||
|
@ -1800,17 +1800,17 @@ for (;;)
|
||||||
mb->offset_end * sizeof(PCRE2_SIZE), mb->memctl.memory_data));
|
mb->offset_end * sizeof(PCRE2_SIZE), mb->memctl.memory_data));
|
||||||
if (fr == NULL) RRETURN(PCRE2_ERROR_NOMEMORY);
|
if (fr == NULL) RRETURN(PCRE2_ERROR_NOMEMORY);
|
||||||
new_recursive.ovec_save = fr->saved_ovec;
|
new_recursive.ovec_save = fr->saved_ovec;
|
||||||
}
|
}
|
||||||
|
|
||||||
memcpy(new_recursive.ovec_save, mb->ovector,
|
memcpy(new_recursive.ovec_save, mb->ovector,
|
||||||
mb->offset_end * sizeof(PCRE2_SIZE));
|
mb->offset_end * sizeof(PCRE2_SIZE));
|
||||||
|
|
||||||
/* Do the recursion. After processing each alternative, restore the
|
/* Do the recursion. After processing each alternative, restore the
|
||||||
ovector data and the last captured value. This code has the same overall
|
ovector data and the last captured value. This code has the same overall
|
||||||
logic as the code in the op_recurse_ovecsave() function, but is adapted
|
logic as the code in the op_recurse_ovecsave() function, but is adapted
|
||||||
to use RMATCH/RRETURN and to release the heap block containing the saved
|
to use RMATCH/RRETURN and to release the heap block containing the saved
|
||||||
ovector. */
|
ovector. */
|
||||||
|
|
||||||
cbegroup = (*callpat >= OP_SBRA);
|
cbegroup = (*callpat >= OP_SBRA);
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
|
@ -1821,51 +1821,51 @@ for (;;)
|
||||||
mb->offset_end * sizeof(PCRE2_SIZE));
|
mb->offset_end * sizeof(PCRE2_SIZE));
|
||||||
mb->capture_last = new_recursive.saved_capture_last;
|
mb->capture_last = new_recursive.saved_capture_last;
|
||||||
mb->recursive = new_recursive.prevrec;
|
mb->recursive = new_recursive.prevrec;
|
||||||
|
|
||||||
if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
|
if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
|
||||||
{
|
{
|
||||||
fr = (ovecsave_frame *)
|
fr = (ovecsave_frame *)
|
||||||
((uint8_t *)new_recursive.ovec_save - sizeof(ovecsave_frame *));
|
((uint8_t *)new_recursive.ovec_save - sizeof(ovecsave_frame *));
|
||||||
fr->next = mb->ovecsave_chain;
|
fr->next = mb->ovecsave_chain;
|
||||||
mb->ovecsave_chain = fr;
|
mb->ovecsave_chain = fr;
|
||||||
|
|
||||||
/* Set where we got to in the subject, and reset the start, in case
|
/* Set where we got to in the subject, and reset the start, in case
|
||||||
it was changed by \K. This *is* propagated back out of a recursion,
|
it was changed by \K. This *is* propagated back out of a recursion,
|
||||||
for Perl compatibility. */
|
for Perl compatibility. */
|
||||||
|
|
||||||
eptr = mb->end_match_ptr;
|
eptr = mb->end_match_ptr;
|
||||||
mstart = mb->start_match_ptr;
|
mstart = mb->start_match_ptr;
|
||||||
goto RECURSION_MATCHED; /* Exit loop; end processing */
|
goto RECURSION_MATCHED; /* Exit loop; end processing */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
|
/* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
|
||||||
recursion; they cause a NOMATCH for the entire recursion. These codes
|
recursion; they cause a NOMATCH for the entire recursion. These codes
|
||||||
are defined in a range that can be tested for. */
|
are defined in a range that can be tested for. */
|
||||||
|
|
||||||
if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
|
if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
|
||||||
{
|
{
|
||||||
rrc = MATCH_NOMATCH;
|
rrc = MATCH_NOMATCH;
|
||||||
goto RECURSION_RETURN;
|
goto RECURSION_RETURN;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Any return code other than NOMATCH is an error. */
|
/* Any return code other than NOMATCH is an error. */
|
||||||
|
|
||||||
if (rrc != MATCH_NOMATCH) goto RECURSION_RETURN;
|
if (rrc != MATCH_NOMATCH) goto RECURSION_RETURN;
|
||||||
mb->recursive = &new_recursive;
|
mb->recursive = &new_recursive;
|
||||||
callpat += GET(callpat, 1);
|
callpat += GET(callpat, 1);
|
||||||
}
|
}
|
||||||
while (*callpat == OP_ALT);
|
while (*callpat == OP_ALT);
|
||||||
|
|
||||||
RECURSION_RETURN:
|
RECURSION_RETURN:
|
||||||
mb->recursive = new_recursive.prevrec;
|
mb->recursive = new_recursive.prevrec;
|
||||||
fr = (ovecsave_frame *)
|
fr = (ovecsave_frame *)
|
||||||
((uint8_t *)new_recursive.ovec_save - sizeof(ovecsave_frame *));
|
((uint8_t *)new_recursive.ovec_save - sizeof(ovecsave_frame *));
|
||||||
fr->next = mb->ovecsave_chain;
|
fr->next = mb->ovecsave_chain;
|
||||||
mb->ovecsave_chain = fr;
|
mb->ovecsave_chain = fr;
|
||||||
RRETURN(rrc);
|
RRETURN(rrc);
|
||||||
}
|
}
|
||||||
|
|
||||||
RECURSION_MATCHED:
|
RECURSION_MATCHED:
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* An alternation is the end of a branch; scan along to find the end of the
|
/* An alternation is the end of a branch; scan along to find the end of the
|
||||||
|
@ -1942,7 +1942,7 @@ for (;;)
|
||||||
mb->end_match_ptr = eptr; /* For ONCE_NC */
|
mb->end_match_ptr = eptr; /* For ONCE_NC */
|
||||||
mb->end_offset_top = offset_top;
|
mb->end_offset_top = offset_top;
|
||||||
mb->start_match_ptr = mstart;
|
mb->start_match_ptr = mstart;
|
||||||
if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
|
if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
|
||||||
RRETURN(MATCH_MATCH); /* Sets mb->mark */
|
RRETURN(MATCH_MATCH); /* Sets mb->mark */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1966,7 +1966,7 @@ for (;;)
|
||||||
{
|
{
|
||||||
mb->end_match_ptr = eptr;
|
mb->end_match_ptr = eptr;
|
||||||
mb->start_match_ptr = mstart;
|
mb->start_match_ptr = mstart;
|
||||||
if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
|
if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
|
||||||
RRETURN(MATCH_MATCH);
|
RRETURN(MATCH_MATCH);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2010,7 +2010,7 @@ for (;;)
|
||||||
mb->start_match_ptr = mstart; /* In case \K reset it */
|
mb->start_match_ptr = mstart; /* In case \K reset it */
|
||||||
mb->end_match_ptr = eptr;
|
mb->end_match_ptr = eptr;
|
||||||
mb->end_offset_top = offset_top;
|
mb->end_offset_top = offset_top;
|
||||||
if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
|
if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
|
||||||
RRETURN(MATCH_KETRPOS);
|
RRETURN(MATCH_KETRPOS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2230,8 +2230,8 @@ for (;;)
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
PCRE2_SPTR nextptr = eptr + 1;
|
PCRE2_SPTR nextptr = eptr + 1;
|
||||||
FORWARDCHARTEST(nextptr, mb->end_subject);
|
FORWARDCHARTEST(nextptr, mb->end_subject);
|
||||||
if (nextptr > mb->last_used_ptr) mb->last_used_ptr = nextptr;
|
if (nextptr > mb->last_used_ptr) mb->last_used_ptr = nextptr;
|
||||||
GETCHAR(c, eptr);
|
GETCHAR(c, eptr);
|
||||||
if ((mb->poptions & PCRE2_UCP) != 0)
|
if ((mb->poptions & PCRE2_UCP) != 0)
|
||||||
{
|
{
|
||||||
|
@ -2282,7 +2282,7 @@ for (;;)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (eptr >= mb->last_used_ptr) mb->last_used_ptr = eptr + 1;
|
if (eptr >= mb->last_used_ptr) mb->last_used_ptr = eptr + 1;
|
||||||
#ifdef SUPPORT_UNICODE
|
#ifdef SUPPORT_UNICODE
|
||||||
if ((mb->poptions & PCRE2_UCP) != 0)
|
if ((mb->poptions & PCRE2_UCP) != 0)
|
||||||
{
|
{
|
||||||
|
@ -2297,7 +2297,7 @@ for (;;)
|
||||||
#endif
|
#endif
|
||||||
cur_is_word = MAX_255(*eptr)
|
cur_is_word = MAX_255(*eptr)
|
||||||
&& ((mb->ctypes[*eptr] & ctype_word) != 0);
|
&& ((mb->ctypes[*eptr] & ctype_word) != 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Now see if the situation is what we want */
|
/* Now see if the situation is what we want */
|
||||||
|
@ -2689,7 +2689,7 @@ for (;;)
|
||||||
|
|
||||||
/* Match a back reference, possibly repeatedly. Look past the end of the
|
/* Match a back reference, possibly repeatedly. Look past the end of the
|
||||||
item to see if there is repeat information following.
|
item to see if there is repeat information following.
|
||||||
|
|
||||||
The OP_REF and OP_REFI opcodes are used for a reference to a numbered group
|
The OP_REF and OP_REFI opcodes are used for a reference to a numbered group
|
||||||
or to a non-duplicated named group. For a duplicated named group, OP_DNREF
|
or to a non-duplicated named group. For a duplicated named group, OP_DNREF
|
||||||
and OP_DNREFI are used. In this case we must scan the list of groups to
|
and OP_DNREFI are used. In this case we must scan the list of groups to
|
||||||
|
@ -2705,7 +2705,7 @@ for (;;)
|
||||||
|
|
||||||
/* Initializing 'offset' avoids a compiler warning in the REF_REPEAT
|
/* Initializing 'offset' avoids a compiler warning in the REF_REPEAT
|
||||||
code. */
|
code. */
|
||||||
|
|
||||||
offset = 0;
|
offset = 0;
|
||||||
while (count-- > 0)
|
while (count-- > 0)
|
||||||
{
|
{
|
||||||
|
@ -2721,7 +2721,7 @@ for (;;)
|
||||||
caseless = op == OP_REFI;
|
caseless = op == OP_REFI;
|
||||||
offset = GET2(ecode, 1) << 1; /* Doubled ref number */
|
offset = GET2(ecode, 1) << 1; /* Doubled ref number */
|
||||||
ecode += 1 + IMM2_SIZE;
|
ecode += 1 + IMM2_SIZE;
|
||||||
|
|
||||||
/* Set up for repetition, or handle the non-repeated case */
|
/* Set up for repetition, or handle the non-repeated case */
|
||||||
|
|
||||||
REF_REPEAT:
|
REF_REPEAT:
|
||||||
|
@ -2750,7 +2750,7 @@ for (;;)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default: /* No repeat follows */
|
default: /* No repeat follows */
|
||||||
{
|
{
|
||||||
int rc = match_ref(offset, offset_top, eptr, mb, caseless, &length);
|
int rc = match_ref(offset, offset_top, eptr, mb, caseless, &length);
|
||||||
if (rc != 0)
|
if (rc != 0)
|
||||||
{
|
{
|
||||||
|
@ -2758,7 +2758,7 @@ for (;;)
|
||||||
CHECK_PARTIAL();
|
CHECK_PARTIAL();
|
||||||
RRETURN(MATCH_NOMATCH);
|
RRETURN(MATCH_NOMATCH);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
eptr += length;
|
eptr += length;
|
||||||
continue; /* With the main loop */
|
continue; /* With the main loop */
|
||||||
}
|
}
|
||||||
|
@ -2769,16 +2769,16 @@ for (;;)
|
||||||
also continue if PCRE2_MATCH_UNSET_BACKREF is set, because this makes unset
|
also continue if PCRE2_MATCH_UNSET_BACKREF is set, because this makes unset
|
||||||
group be have as a zero-length group. For any other unset cases, carrying
|
group be have as a zero-length group. For any other unset cases, carrying
|
||||||
on will result in NOMATCH. */
|
on will result in NOMATCH. */
|
||||||
|
|
||||||
if (offset < offset_top && mb->ovector[offset] != PCRE2_UNSET)
|
if (offset < offset_top && mb->ovector[offset] != PCRE2_UNSET)
|
||||||
{
|
{
|
||||||
if (mb->ovector[offset] == mb->ovector[offset + 1]) continue;
|
if (mb->ovector[offset] == mb->ovector[offset + 1]) continue;
|
||||||
}
|
}
|
||||||
else /* Group is not set */
|
else /* Group is not set */
|
||||||
{
|
{
|
||||||
if (min == 0 || (mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
|
if (min == 0 || (mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* First, ensure the minimum number of matches are present. We get back
|
/* First, ensure the minimum number of matches are present. We get back
|
||||||
the length of the reference string explicitly rather than passing the
|
the length of the reference string explicitly rather than passing the
|
||||||
|
@ -2787,7 +2787,7 @@ for (;;)
|
||||||
for (i = 1; i <= min; i++)
|
for (i = 1; i <= min; i++)
|
||||||
{
|
{
|
||||||
PCRE2_SIZE slength;
|
PCRE2_SIZE slength;
|
||||||
int rc = match_ref(offset, offset_top, eptr, mb, caseless, &slength);
|
int rc = match_ref(offset, offset_top, eptr, mb, caseless, &slength);
|
||||||
if (rc != 0)
|
if (rc != 0)
|
||||||
{
|
{
|
||||||
if (rc > 0) eptr = mb->end_subject; /* Partial match */
|
if (rc > 0) eptr = mb->end_subject; /* Partial match */
|
||||||
|
@ -2808,13 +2808,13 @@ for (;;)
|
||||||
{
|
{
|
||||||
for (fi = min;; fi++)
|
for (fi = min;; fi++)
|
||||||
{
|
{
|
||||||
int rc;
|
int rc;
|
||||||
PCRE2_SIZE slength;
|
PCRE2_SIZE slength;
|
||||||
RMATCH(eptr, ecode, offset_top, mb, eptrb, RM14);
|
RMATCH(eptr, ecode, offset_top, mb, eptrb, RM14);
|
||||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||||
if (fi >= max) RRETURN(MATCH_NOMATCH);
|
if (fi >= max) RRETURN(MATCH_NOMATCH);
|
||||||
rc = match_ref(offset, offset_top, eptr, mb, caseless, &slength);
|
rc = match_ref(offset, offset_top, eptr, mb, caseless, &slength);
|
||||||
if (rc != 0)
|
if (rc != 0)
|
||||||
{
|
{
|
||||||
if (rc > 0) eptr = mb->end_subject; /* Partial match */
|
if (rc > 0) eptr = mb->end_subject; /* Partial match */
|
||||||
CHECK_PARTIAL();
|
CHECK_PARTIAL();
|
||||||
|
@ -2825,12 +2825,12 @@ for (;;)
|
||||||
/* Control never gets here */
|
/* Control never gets here */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If maximizing, find the longest string and work backwards, as long as
|
/* If maximizing, find the longest string and work backwards, as long as
|
||||||
the matched lengths for each iteration are the same. */
|
the matched lengths for each iteration are the same. */
|
||||||
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
BOOL samelengths = TRUE;
|
BOOL samelengths = TRUE;
|
||||||
pp = eptr;
|
pp = eptr;
|
||||||
length = mb->ovector[offset+1] - mb->ovector[offset];
|
length = mb->ovector[offset+1] - mb->ovector[offset];
|
||||||
|
|
||||||
|
@ -2839,7 +2839,7 @@ for (;;)
|
||||||
PCRE2_SIZE slength;
|
PCRE2_SIZE slength;
|
||||||
int rc = match_ref(offset, offset_top, eptr, mb, caseless, &slength);
|
int rc = match_ref(offset, offset_top, eptr, mb, caseless, &slength);
|
||||||
|
|
||||||
if (rc != 0)
|
if (rc != 0)
|
||||||
{
|
{
|
||||||
/* Can't use CHECK_PARTIAL because we don't want to update eptr in
|
/* Can't use CHECK_PARTIAL because we don't want to update eptr in
|
||||||
the soft partial matching case. */
|
the soft partial matching case. */
|
||||||
|
@ -2857,14 +2857,14 @@ for (;;)
|
||||||
eptr += slength;
|
eptr += slength;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If the length matched for each repetition is the same as the length of
|
/* If the length matched for each repetition is the same as the length of
|
||||||
the captured group, we can easily work backwards. This is the normal
|
the captured group, we can easily work backwards. This is the normal
|
||||||
case. However, in caseless UTF-8 mode there are pairs of case-equivalent
|
case. However, in caseless UTF-8 mode there are pairs of case-equivalent
|
||||||
characters whose lengths (in terms of code units) differ. However, this
|
characters whose lengths (in terms of code units) differ. However, this
|
||||||
is very rare, so we handle it by re-matching fewer and fewer times. */
|
is very rare, so we handle it by re-matching fewer and fewer times. */
|
||||||
|
|
||||||
if (samelengths)
|
if (samelengths)
|
||||||
{
|
{
|
||||||
while (eptr >= pp)
|
while (eptr >= pp)
|
||||||
{
|
{
|
||||||
RMATCH(eptr, ecode, offset_top, mb, eptrb, RM15);
|
RMATCH(eptr, ecode, offset_top, mb, eptrb, RM15);
|
||||||
|
@ -2872,20 +2872,20 @@ for (;;)
|
||||||
eptr -= length;
|
eptr -= length;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The rare case of non-matching lengths. Re-scan the repetition for each
|
/* The rare case of non-matching lengths. Re-scan the repetition for each
|
||||||
iteration. We know that match_ref() will succeed every time. */
|
iteration. We know that match_ref() will succeed every time. */
|
||||||
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
max = i;
|
max = i;
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
RMATCH(eptr, ecode, offset_top, mb, eptrb, RM68);
|
RMATCH(eptr, ecode, offset_top, mb, eptrb, RM68);
|
||||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||||
if (eptr == pp) break; /* Failed after minimal repetition */
|
if (eptr == pp) break; /* Failed after minimal repetition */
|
||||||
eptr = pp;
|
eptr = pp;
|
||||||
max--;
|
max--;
|
||||||
for (i = min; i < max; i++)
|
for (i = min; i < max; i++)
|
||||||
{
|
{
|
||||||
PCRE2_SIZE slength;
|
PCRE2_SIZE slength;
|
||||||
|
@ -2893,8 +2893,8 @@ for (;;)
|
||||||
eptr += slength;
|
eptr += slength;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
RRETURN(MATCH_NOMATCH);
|
RRETURN(MATCH_NOMATCH);
|
||||||
}
|
}
|
||||||
/* Control never gets here */
|
/* Control never gets here */
|
||||||
|
@ -6417,20 +6417,20 @@ with different endianness. */
|
||||||
|
|
||||||
if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
|
if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
|
||||||
return PCRE2_ERROR_BADMODE;
|
return PCRE2_ERROR_BADMODE;
|
||||||
|
|
||||||
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
|
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
|
||||||
options variable for this function. Users of PCRE2 who are not calling the
|
options variable for this function. Users of PCRE2 who are not calling the
|
||||||
function directly would like to have a way of setting these flags, in the same
|
function directly would like to have a way of setting these flags, in the same
|
||||||
way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
|
way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
|
||||||
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
|
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
|
||||||
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be
|
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be
|
||||||
transferred to the options for this function. The bits are guaranteed to be
|
transferred to the options for this function. The bits are guaranteed to be
|
||||||
adjacent, but do not have the same values. This bit of Boolean trickery assumes
|
adjacent, but do not have the same values. This bit of Boolean trickery assumes
|
||||||
that the match-time bits are not more significant than the flag bits. If by
|
that the match-time bits are not more significant than the flag bits. If by
|
||||||
accident this is not the case, a compile-time division by zero error will
|
accident this is not the case, a compile-time division by zero error will
|
||||||
occur. */
|
occur. */
|
||||||
|
|
||||||
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
|
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
|
||||||
#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
|
#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
|
||||||
options |= (re->flags & FF) / ((FF & -FF) / (OO & -OO));
|
options |= (re->flags & FF) / ((FF & -FF) / (OO & -OO));
|
||||||
#undef FF
|
#undef FF
|
||||||
|
@ -6541,7 +6541,7 @@ mb->match_limit = (mcontext->match_limit < re->limit_match)?
|
||||||
mcontext->match_limit : re->limit_match;
|
mcontext->match_limit : re->limit_match;
|
||||||
mb->match_limit_recursion = (mcontext->recursion_limit < re->limit_recursion)?
|
mb->match_limit_recursion = (mcontext->recursion_limit < re->limit_recursion)?
|
||||||
mcontext->recursion_limit : re->limit_recursion;
|
mcontext->recursion_limit : re->limit_recursion;
|
||||||
|
|
||||||
/* Pointers to the individual character tables */
|
/* Pointers to the individual character tables */
|
||||||
|
|
||||||
mb->lcc = re->tables + lcc_offset;
|
mb->lcc = re->tables + lcc_offset;
|
||||||
|
@ -6580,7 +6580,7 @@ switch(re->newline_convention)
|
||||||
|
|
||||||
default: return PCRE2_ERROR_INTERNAL;
|
default: return PCRE2_ERROR_INTERNAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If the expression has got more back references than the offsets supplied can
|
/* If the expression has got more back references than the offsets supplied can
|
||||||
hold, we get a temporary chunk of memory to use during the matching. Otherwise,
|
hold, we get a temporary chunk of memory to use during the matching. Otherwise,
|
||||||
we can use the vector supplied. The size of the ovector is three times the
|
we can use the vector supplied. The size of the ovector is three times the
|
||||||
|
@ -6854,7 +6854,7 @@ for(;;)
|
||||||
|
|
||||||
mb->start_match_ptr = start_match;
|
mb->start_match_ptr = start_match;
|
||||||
mb->start_used_ptr = start_match;
|
mb->start_used_ptr = start_match;
|
||||||
mb->last_used_ptr = start_match;
|
mb->last_used_ptr = start_match;
|
||||||
mb->match_call_count = 0;
|
mb->match_call_count = 0;
|
||||||
mb->match_function_type = 0;
|
mb->match_function_type = 0;
|
||||||
mb->end_offset_top = 0;
|
mb->end_offset_top = 0;
|
||||||
|
@ -6990,7 +6990,7 @@ while (mb->ovecsave_chain != NULL)
|
||||||
ovecsave_frame *this = mb->ovecsave_chain;
|
ovecsave_frame *this = mb->ovecsave_chain;
|
||||||
mb->ovecsave_chain = this->next;
|
mb->ovecsave_chain = this->next;
|
||||||
mb->memctl.free(this, mb->memctl.memory_data);
|
mb->memctl.free(this, mb->memctl.memory_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Fill in fields that are always returned in the match data. */
|
/* Fill in fields that are always returned in the match data. */
|
||||||
|
|
||||||
|
@ -7057,9 +7057,9 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
|
||||||
match_data->ovector[0] = mb->start_match_ptr - mb->start_subject;
|
match_data->ovector[0] = mb->start_match_ptr - mb->start_subject;
|
||||||
match_data->ovector[1] = mb->end_match_ptr - mb->start_subject;
|
match_data->ovector[1] = mb->end_match_ptr - mb->start_subject;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Set the remaining returned values */
|
/* Set the remaining returned values */
|
||||||
|
|
||||||
match_data->startchar = start_match - subject;
|
match_data->startchar = start_match - subject;
|
||||||
match_data->leftchar = mb->start_used_ptr - subject;
|
match_data->leftchar = mb->start_used_ptr - subject;
|
||||||
match_data->rightchar = ((mb->last_used_ptr > mb->end_match_ptr)?
|
match_data->rightchar = ((mb->last_used_ptr > mb->end_match_ptr)?
|
||||||
|
@ -7068,7 +7068,7 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Control gets here if there has been a partial match, an error, or if the
|
/* Control gets here if there has been a partial match, an error, or if the
|
||||||
overall match attempt has failed at all permitted starting positions. Any mark
|
overall match attempt has failed at all permitted starting positions. Any mark
|
||||||
data is in the nomatch_mark field. */
|
data is in the nomatch_mark field. */
|
||||||
|
|
||||||
match_data->mark = mb->nomatch_mark;
|
match_data->mark = mb->nomatch_mark;
|
||||||
|
|
|
@ -72,10 +72,10 @@ return yield;
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
|
||||||
pcre2_match_data_create_from_pattern(pcre2_code *code,
|
pcre2_match_data_create_from_pattern(pcre2_code *code,
|
||||||
pcre2_general_context *gcontext)
|
pcre2_general_context *gcontext)
|
||||||
{
|
{
|
||||||
return pcre2_match_data_create(((pcre2_real_code *)code)->top_bracket + 1,
|
return pcre2_match_data_create(((pcre2_real_code *)code)->top_bracket + 1,
|
||||||
gcontext);
|
gcontext);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -88,7 +88,7 @@ return pcre2_match_data_create(((pcre2_real_code *)code)->top_bracket + 1,
|
||||||
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
|
||||||
pcre2_match_data_free(pcre2_match_data *match_data)
|
pcre2_match_data_free(pcre2_match_data *match_data)
|
||||||
{
|
{
|
||||||
if (match_data != NULL)
|
if (match_data != NULL)
|
||||||
match_data->memctl.free(match_data, match_data->memctl.memory_data);
|
match_data->memctl.free(match_data, match_data->memctl.memory_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -60,9 +60,9 @@ http://unicode.org/unicode/reports/tr18/. */
|
||||||
* Check for newline at given position *
|
* Check for newline at given position *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* This function is called only via the IS_NEWLINE macro, which does so only
|
/* This function is called only via the IS_NEWLINE macro, which does so only
|
||||||
when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
|
when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
|
||||||
newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the code unit
|
newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the code unit
|
||||||
pointed to by ptr is less than the end of the string.
|
pointed to by ptr is less than the end of the string.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
|
@ -76,7 +76,7 @@ Returns: TRUE or FALSE
|
||||||
*/
|
*/
|
||||||
|
|
||||||
BOOL
|
BOOL
|
||||||
PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr,
|
PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr,
|
||||||
uint32_t *lenptr, BOOL utf)
|
uint32_t *lenptr, BOOL utf)
|
||||||
{
|
{
|
||||||
uint32_t c;
|
uint32_t c;
|
||||||
|
@ -90,15 +90,15 @@ c = *ptr;
|
||||||
|
|
||||||
if (type == NLTYPE_ANYCRLF) switch(c)
|
if (type == NLTYPE_ANYCRLF) switch(c)
|
||||||
{
|
{
|
||||||
case CHAR_LF:
|
case CHAR_LF:
|
||||||
*lenptr = 1;
|
*lenptr = 1;
|
||||||
return TRUE;
|
return TRUE;
|
||||||
|
|
||||||
case CHAR_CR:
|
case CHAR_CR:
|
||||||
*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
|
*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
|
||||||
return TRUE;
|
return TRUE;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -111,8 +111,8 @@ else switch(c)
|
||||||
#endif
|
#endif
|
||||||
case CHAR_LF:
|
case CHAR_LF:
|
||||||
case CHAR_VT:
|
case CHAR_VT:
|
||||||
case CHAR_FF:
|
case CHAR_FF:
|
||||||
*lenptr = 1;
|
*lenptr = 1;
|
||||||
return TRUE;
|
return TRUE;
|
||||||
|
|
||||||
case CHAR_CR:
|
case CHAR_CR:
|
||||||
|
@ -121,25 +121,25 @@ else switch(c)
|
||||||
|
|
||||||
#ifndef EBCDIC
|
#ifndef EBCDIC
|
||||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
case CHAR_NEL:
|
case CHAR_NEL:
|
||||||
*lenptr = utf? 2 : 1;
|
*lenptr = utf? 2 : 1;
|
||||||
return TRUE;
|
return TRUE;
|
||||||
|
|
||||||
case 0x2028: /* LS */
|
case 0x2028: /* LS */
|
||||||
case 0x2029: /* PS */
|
case 0x2029: /* PS */
|
||||||
*lenptr = 3;
|
*lenptr = 3;
|
||||||
return TRUE;
|
return TRUE;
|
||||||
|
|
||||||
#else /* 16-bit or 32-bit code units */
|
#else /* 16-bit or 32-bit code units */
|
||||||
case CHAR_NEL:
|
case CHAR_NEL:
|
||||||
case 0x2028: /* LS */
|
case 0x2028: /* LS */
|
||||||
case 0x2029: /* PS */
|
case 0x2029: /* PS */
|
||||||
*lenptr = 1;
|
*lenptr = 1;
|
||||||
return TRUE;
|
return TRUE;
|
||||||
#endif
|
#endif
|
||||||
#endif /* Not EBCDIC */
|
#endif /* Not EBCDIC */
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -166,7 +166,7 @@ Returns: TRUE or FALSE
|
||||||
*/
|
*/
|
||||||
|
|
||||||
BOOL
|
BOOL
|
||||||
PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr,
|
PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr,
|
||||||
uint32_t *lenptr, BOOL utf)
|
uint32_t *lenptr, BOOL utf)
|
||||||
{
|
{
|
||||||
uint32_t c;
|
uint32_t c;
|
||||||
|
@ -190,11 +190,11 @@ if (type == NLTYPE_ANYCRLF) switch(c)
|
||||||
*lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
|
*lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
|
||||||
return TRUE;
|
return TRUE;
|
||||||
|
|
||||||
case CHAR_CR:
|
case CHAR_CR:
|
||||||
*lenptr = 1;
|
*lenptr = 1;
|
||||||
return TRUE;
|
return TRUE;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -211,31 +211,31 @@ else switch(c)
|
||||||
#endif
|
#endif
|
||||||
case CHAR_VT:
|
case CHAR_VT:
|
||||||
case CHAR_FF:
|
case CHAR_FF:
|
||||||
case CHAR_CR:
|
case CHAR_CR:
|
||||||
*lenptr = 1;
|
*lenptr = 1;
|
||||||
return TRUE;
|
return TRUE;
|
||||||
|
|
||||||
#ifndef EBCDIC
|
#ifndef EBCDIC
|
||||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
case CHAR_NEL:
|
case CHAR_NEL:
|
||||||
*lenptr = utf? 2 : 1;
|
*lenptr = utf? 2 : 1;
|
||||||
return TRUE;
|
return TRUE;
|
||||||
|
|
||||||
case 0x2028: /* LS */
|
case 0x2028: /* LS */
|
||||||
case 0x2029: /* PS */
|
case 0x2029: /* PS */
|
||||||
*lenptr = 3;
|
*lenptr = 3;
|
||||||
return TRUE;
|
return TRUE;
|
||||||
|
|
||||||
#else /* 16-bit or 32-bit code units */
|
#else /* 16-bit or 32-bit code units */
|
||||||
case CHAR_NEL:
|
case CHAR_NEL:
|
||||||
case 0x2028: /* LS */
|
case 0x2028: /* LS */
|
||||||
case 0x2029: /* PS */
|
case 0x2029: /* PS */
|
||||||
*lenptr = 1;
|
*lenptr = 1;
|
||||||
return TRUE;
|
return TRUE;
|
||||||
#endif
|
#endif
|
||||||
#endif /* Not EBCDIC */
|
#endif /* Not EBCDIC */
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -89,17 +89,17 @@ if (where == NULL) /* Requests field length */
|
||||||
case PCRE2_INFO_NAMECOUNT:
|
case PCRE2_INFO_NAMECOUNT:
|
||||||
case PCRE2_INFO_NEWLINE:
|
case PCRE2_INFO_NEWLINE:
|
||||||
case PCRE2_INFO_RECURSIONLIMIT:
|
case PCRE2_INFO_RECURSIONLIMIT:
|
||||||
return sizeof(uint32_t);
|
return sizeof(uint32_t);
|
||||||
|
|
||||||
case PCRE2_INFO_FIRSTBITMAP:
|
case PCRE2_INFO_FIRSTBITMAP:
|
||||||
return sizeof(const uint8_t *);
|
return sizeof(const uint8_t *);
|
||||||
|
|
||||||
case PCRE2_INFO_JITSIZE:
|
case PCRE2_INFO_JITSIZE:
|
||||||
case PCRE2_INFO_SIZE:
|
case PCRE2_INFO_SIZE:
|
||||||
return sizeof(size_t);
|
return sizeof(size_t);
|
||||||
|
|
||||||
case PCRE2_INFO_NAMETABLE:
|
case PCRE2_INFO_NAMETABLE:
|
||||||
return sizeof(PCRE2_SPTR);
|
return sizeof(PCRE2_SPTR);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -41,8 +41,8 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
/* This module contains a PCRE private debugging function for printing out the
|
/* This module contains a PCRE private debugging function for printing out the
|
||||||
internal form of a compiled regular expression, along with some supporting
|
internal form of a compiled regular expression, along with some supporting
|
||||||
local functions. This source file is #included in pcre2test.c at each supported
|
local functions. This source file is #included in pcre2test.c at each supported
|
||||||
code unit width, with PCRE2_SUFFIX set appropriately, just like the functions
|
code unit width, with PCRE2_SUFFIX set appropriately, just like the functions
|
||||||
that comprise the library. */
|
that comprise the library. */
|
||||||
|
|
||||||
|
|
||||||
|
@ -82,9 +82,9 @@ Arguments:
|
||||||
f file to write to
|
f file to write to
|
||||||
ptr pointer to first code unit of the character
|
ptr pointer to first code unit of the character
|
||||||
utf TRUE if string is UTF (will be FALSE if UTF is not supported)
|
utf TRUE if string is UTF (will be FALSE if UTF is not supported)
|
||||||
|
|
||||||
Returns: number of additional code units used
|
Returns: number of additional code units used
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static unsigned int
|
static unsigned int
|
||||||
print_char(FILE *f, PCRE2_SPTR ptr, BOOL utf)
|
print_char(FILE *f, PCRE2_SPTR ptr, BOOL utf)
|
||||||
|
@ -105,7 +105,7 @@ if (utf)
|
||||||
one_code_unit = (c & 0xfffff800u) != 0xd800u;
|
one_code_unit = (c & 0xfffff800u) != 0xd800u;
|
||||||
#endif /* CODE_UNIT_WIDTH */
|
#endif /* CODE_UNIT_WIDTH */
|
||||||
}
|
}
|
||||||
#endif /* SUPPORT_UNICODE */
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
/* Handle a valid one-code-unit character at any width. */
|
/* Handle a valid one-code-unit character at any width. */
|
||||||
|
|
||||||
|
@ -115,10 +115,10 @@ if (one_code_unit)
|
||||||
else if (c < 0x80) fprintf(f, "\\x%02x", c);
|
else if (c < 0x80) fprintf(f, "\\x%02x", c);
|
||||||
else fprintf(f, "\\x{%02x}", c);
|
else fprintf(f, "\\x{%02x}", c);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Code for invalid UTF code units and multi-unit UTF characters is different
|
/* Code for invalid UTF code units and multi-unit UTF characters is different
|
||||||
for each width. If UTF is not supported, control should never get here, but we
|
for each width. If UTF is not supported, control should never get here, but we
|
||||||
need a return statement to keep the compiler happy. */
|
need a return statement to keep the compiler happy. */
|
||||||
|
|
||||||
#ifndef SUPPORT_UNICODE
|
#ifndef SUPPORT_UNICODE
|
||||||
|
@ -134,10 +134,10 @@ if ((c & 0xc0) != 0xc0)
|
||||||
{
|
{
|
||||||
fprintf(f, "\\X{%x}", c); /* Invalid starting byte */
|
fprintf(f, "\\X{%x}", c); /* Invalid starting byte */
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
int a = utf8_table4[c & 0x3f]; /* Number of additional bytes */
|
int a = utf8_table4[c & 0x3f]; /* Number of additional bytes */
|
||||||
int s = 6*a;
|
int s = 6*a;
|
||||||
c = (c & utf8_table3[a]) << s;
|
c = (c & utf8_table3[a]) << s;
|
||||||
|
@ -153,7 +153,7 @@ else
|
||||||
}
|
}
|
||||||
fprintf(f, "\\x{%x}", c);
|
fprintf(f, "\\x{%x}", c);
|
||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
|
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
|
||||||
|
|
||||||
/* UTF-16: rather than swallow a low surrogate, just stop if we hit a bad one.
|
/* UTF-16: rather than swallow a low surrogate, just stop if we hit a bad one.
|
||||||
|
@ -173,7 +173,7 @@ return 1;
|
||||||
/* For UTF-32 we get here only for a malformed code unit, which should only
|
/* For UTF-32 we get here only for a malformed code unit, which should only
|
||||||
occur if the sanity check has been turned off. Print it with \X instead of \x
|
occur if the sanity check has been turned off. Print it with \X instead of \x
|
||||||
as an indication. */
|
as an indication. */
|
||||||
|
|
||||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||||
fprintf(f, "\\X{%x}", c);
|
fprintf(f, "\\X{%x}", c);
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -187,15 +187,15 @@ return 0;
|
||||||
* Print string as a list of code units *
|
* Print string as a list of code units *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* This takes no account of UTF as it always prints each individual code unit.
|
/* This takes no account of UTF as it always prints each individual code unit.
|
||||||
The string is zero-terminated.
|
The string is zero-terminated.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
f file to write to
|
f file to write to
|
||||||
ptr point to the string
|
ptr point to the string
|
||||||
|
|
||||||
Returns: nothing
|
Returns: nothing
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static void
|
static void
|
||||||
print_custring(FILE *f, PCRE2_SPTR ptr)
|
print_custring(FILE *f, PCRE2_SPTR ptr)
|
||||||
|
@ -213,9 +213,9 @@ while (*ptr != '\0')
|
||||||
* Find Unicode property name *
|
* Find Unicode property name *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* When there is no UTF/UCP support, the table of names does not exist. This
|
/* When there is no UTF/UCP support, the table of names does not exist. This
|
||||||
function should not be called in such configurations, because a pattern that
|
function should not be called in such configurations, because a pattern that
|
||||||
tries to use Unicode properties won't compile. Rather than put lots of #ifdefs
|
tries to use Unicode properties won't compile. Rather than put lots of #ifdefs
|
||||||
into the main code, however, we just put one into this function. */
|
into the main code, however, we just put one into this function. */
|
||||||
|
|
||||||
static const char *
|
static const char *
|
||||||
|
@ -244,15 +244,15 @@ return "??";
|
||||||
|
|
||||||
/* "Normal" properties can be printed from tables. The PT_CLIST property is a
|
/* "Normal" properties can be printed from tables. The PT_CLIST property is a
|
||||||
pseudo-property that contains a pointer to a list of case-equivalent
|
pseudo-property that contains a pointer to a list of case-equivalent
|
||||||
characters.
|
characters.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
f file to write to
|
f file to write to
|
||||||
code pointer in the compiled code
|
code pointer in the compiled code
|
||||||
before text to print before
|
before text to print before
|
||||||
after text to print after
|
after text to print after
|
||||||
|
|
||||||
Returns: nothing
|
Returns: nothing
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -281,14 +281,14 @@ else
|
||||||
|
|
||||||
/* The print_lengths flag controls whether offsets and lengths of items are
|
/* The print_lengths flag controls whether offsets and lengths of items are
|
||||||
printed. Lenths can be turned off from pcre2test so that automatic tests on
|
printed. Lenths can be turned off from pcre2test so that automatic tests on
|
||||||
bytecode can be written that do not depend on the value of LINK_SIZE.
|
bytecode can be written that do not depend on the value of LINK_SIZE.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
re a compiled pattern
|
re a compiled pattern
|
||||||
f the file to write to
|
f the file to write to
|
||||||
print_lenghts show various lengths
|
print_lenghts show various lengths
|
||||||
|
|
||||||
Returns: nothing
|
Returns: nothing
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -460,7 +460,7 @@ for(;;)
|
||||||
case OP_TYPEMINQUERY:
|
case OP_TYPEMINQUERY:
|
||||||
case OP_TYPEPOSQUERY:
|
case OP_TYPEPOSQUERY:
|
||||||
fprintf(f, " %s ", flag);
|
fprintf(f, " %s ", flag);
|
||||||
|
|
||||||
if (*code >= OP_TYPESTAR)
|
if (*code >= OP_TYPESTAR)
|
||||||
{
|
{
|
||||||
if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
|
if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
|
||||||
|
|
|
@ -39,7 +39,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* This module contains internal functions for comparing and finding the length
|
/* This module contains internal functions for comparing and finding the length
|
||||||
of strings. These are used instead of strcmp() etc because the standard
|
of strings. These are used instead of strcmp() etc because the standard
|
||||||
functions work only on 8-bit data. */
|
functions work only on 8-bit data. */
|
||||||
|
|
||||||
|
|
||||||
|
@ -54,7 +54,7 @@ functions work only on 8-bit data. */
|
||||||
* Compare two zero-terminated PCRE2 strings *
|
* Compare two zero-terminated PCRE2 strings *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Arguments:
|
Arguments:
|
||||||
str1 first string
|
str1 first string
|
||||||
str2 second string
|
str2 second string
|
||||||
|
@ -80,7 +80,7 @@ return 0;
|
||||||
* Compare zero-terminated PCRE2 & 8-bit strings *
|
* Compare zero-terminated PCRE2 & 8-bit strings *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* As the 8-bit string is almost always a literal, its type is specified as
|
/* As the 8-bit string is almost always a literal, its type is specified as
|
||||||
const char *.
|
const char *.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
|
@ -108,7 +108,7 @@ return 0;
|
||||||
* Compare two PCRE2 strings, given a length *
|
* Compare two PCRE2 strings, given a length *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Arguments:
|
Arguments:
|
||||||
str1 first string
|
str1 first string
|
||||||
str2 second string
|
str2 second string
|
||||||
|
@ -135,7 +135,7 @@ return 0;
|
||||||
* Compare PCRE2 string to 8-bit string by length *
|
* Compare PCRE2 string to 8-bit string by length *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* As the 8-bit string is almost always a literal, its type is specified as
|
/* As the 8-bit string is almost always a literal, its type is specified as
|
||||||
const char *.
|
const char *.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
|
@ -164,7 +164,7 @@ return 0;
|
||||||
* Find the length of a PCRE2 string *
|
* Find the length of a PCRE2 string *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Argument: the string
|
Argument: the string
|
||||||
Returns: the length
|
Returns: the length
|
||||||
*/
|
*/
|
||||||
|
@ -185,9 +185,9 @@ return c;
|
||||||
/* Arguments:
|
/* Arguments:
|
||||||
str1 buffer to receive the string
|
str1 buffer to receive the string
|
||||||
str2 8-bit string to be copied
|
str2 8-bit string to be copied
|
||||||
|
|
||||||
Returns: the number of code units used (excluding trailing zero)
|
Returns: the number of code units used (excluding trailing zero)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
int
|
int
|
||||||
PRIV(strcpy_c8)(PCRE2_UCHAR *str1, const char *str2)
|
PRIV(strcpy_c8)(PCRE2_UCHAR *str1, const char *str2)
|
||||||
|
|
|
@ -74,7 +74,7 @@ Arguments:
|
||||||
code pointer to start of group (the bracket)
|
code pointer to start of group (the bracket)
|
||||||
startcode pointer to start of the whole pattern's code
|
startcode pointer to start of the whole pattern's code
|
||||||
recurse_depth RECURSE depth
|
recurse_depth RECURSE depth
|
||||||
utf UTF flag
|
utf UTF flag
|
||||||
|
|
||||||
Returns: the minimum length
|
Returns: the minimum length
|
||||||
-1 if \C in UTF-8 mode or (*ACCEPT) was encountered
|
-1 if \C in UTF-8 mode or (*ACCEPT) was encountered
|
||||||
|
@ -388,10 +388,10 @@ for (;;)
|
||||||
if ((re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
|
if ((re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
|
||||||
{
|
{
|
||||||
int count = GET2(cc, 1+IMM2_SIZE);
|
int count = GET2(cc, 1+IMM2_SIZE);
|
||||||
PCRE2_UCHAR *slot =
|
PCRE2_UCHAR *slot =
|
||||||
(PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) +
|
(PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) +
|
||||||
GET2(cc, 1) * re->name_entry_size;
|
GET2(cc, 1) * re->name_entry_size;
|
||||||
|
|
||||||
d = INT_MAX;
|
d = INT_MAX;
|
||||||
while (count-- > 0)
|
while (count-- > 0)
|
||||||
{
|
{
|
||||||
|
@ -579,7 +579,7 @@ for (;;)
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* Given a character, set its first code unit's bit in the table, and also the
|
/* Given a character, set its first code unit's bit in the table, and also the
|
||||||
corresponding bit for the other version of a letter if we are caseless.
|
corresponding bit for the other version of a letter if we are caseless.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
re points to the regex block
|
re points to the regex block
|
||||||
|
@ -590,20 +590,20 @@ Arguments:
|
||||||
Returns: pointer after the character
|
Returns: pointer after the character
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static PCRE2_SPTR
|
static PCRE2_SPTR
|
||||||
set_table_bit(pcre2_real_code *re, PCRE2_SPTR p, BOOL caseless, BOOL utf)
|
set_table_bit(pcre2_real_code *re, PCRE2_SPTR p, BOOL caseless, BOOL utf)
|
||||||
{
|
{
|
||||||
uint32_t c = *p++; /* First code unit */
|
uint32_t c = *p++; /* First code unit */
|
||||||
(void)utf; /* Stop compiler warning when UTF not supported */
|
(void)utf; /* Stop compiler warning when UTF not supported */
|
||||||
|
|
||||||
/* In 16-bit and 32-bit modes, code units greater than 0xff set the bit for
|
/* In 16-bit and 32-bit modes, code units greater than 0xff set the bit for
|
||||||
0xff. */
|
0xff. */
|
||||||
|
|
||||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||||
if (c > 0xff) SET_BIT(0xff); else
|
if (c > 0xff) SET_BIT(0xff); else
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
SET_BIT(c);
|
SET_BIT(c);
|
||||||
|
|
||||||
/* In UTF-8 or UTF-16 mode, pick up the remaining code units in order to find
|
/* In UTF-8 or UTF-16 mode, pick up the remaining code units in order to find
|
||||||
the end of the character, even when caseless. */
|
the end of the character, even when caseless. */
|
||||||
|
@ -617,7 +617,7 @@ if (utf)
|
||||||
if ((c & 0xfc00) == 0xd800) GETUTF16INC(c, p);
|
if ((c & 0xfc00) == 0xd800) GETUTF16INC(c, p);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
#endif /* SUPPORT_UNICODE */
|
#endif /* SUPPORT_UNICODE */
|
||||||
|
|
||||||
/* If caseless, handle the other case of the character. */
|
/* If caseless, handle the other case of the character. */
|
||||||
|
|
||||||
|
@ -669,7 +669,7 @@ static void
|
||||||
set_type_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit)
|
set_type_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit)
|
||||||
{
|
{
|
||||||
register uint32_t c;
|
register uint32_t c;
|
||||||
for (c = 0; c < table_limit; c++)
|
for (c = 0; c < table_limit; c++)
|
||||||
re->start_bitmap[c] |= re->tables[c+cbits_offset+cbit_type];
|
re->start_bitmap[c] |= re->tables[c+cbits_offset+cbit_type];
|
||||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
if (table_limit == 32) return;
|
if (table_limit == 32) return;
|
||||||
|
@ -710,7 +710,7 @@ static void
|
||||||
set_nottype_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit)
|
set_nottype_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit)
|
||||||
{
|
{
|
||||||
register uint32_t c;
|
register uint32_t c;
|
||||||
for (c = 0; c < table_limit; c++)
|
for (c = 0; c < table_limit; c++)
|
||||||
re->start_bitmap[c] |= ~(re->tables[c+cbits_offset+cbit_type]);
|
re->start_bitmap[c] |= ~(re->tables[c+cbits_offset+cbit_type]);
|
||||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmap[c] = 0xff;
|
if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmap[c] = 0xff;
|
||||||
|
@ -724,10 +724,10 @@ if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmap[c] = 0xff;
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* This function scans a compiled unanchored expression recursively and
|
/* This function scans a compiled unanchored expression recursively and
|
||||||
attempts to build a bitmap of the set of possible starting code units whose
|
attempts to build a bitmap of the set of possible starting code units whose
|
||||||
values are less than 256. In 16-bit and 32-bit mode, values above 255 all cause
|
values are less than 256. In 16-bit and 32-bit mode, values above 255 all cause
|
||||||
the 255 bit to be set. When calling set[_not]_type_bits() in UTF-8 (sic) mode
|
the 255 bit to be set. When calling set[_not]_type_bits() in UTF-8 (sic) mode
|
||||||
we pass a value of 16 rather than 32 as the final argument. (See comments in
|
we pass a value of 16 rather than 32 as the final argument. (See comments in
|
||||||
those functions for the reason.)
|
those functions for the reason.)
|
||||||
|
|
||||||
The SSB_CONTINUE return is useful for parenthesized groups in patterns such as
|
The SSB_CONTINUE return is useful for parenthesized groups in patterns such as
|
||||||
|
@ -769,8 +769,8 @@ do
|
||||||
while (try_next) /* Loop for items in this branch */
|
while (try_next) /* Loop for items in this branch */
|
||||||
{
|
{
|
||||||
int rc;
|
int rc;
|
||||||
uint8_t *classmap = NULL;
|
uint8_t *classmap = NULL;
|
||||||
|
|
||||||
switch(*tcode)
|
switch(*tcode)
|
||||||
{
|
{
|
||||||
/* If we reach something we don't understand, it means a new opcode has
|
/* If we reach something we don't understand, it means a new opcode has
|
||||||
|
@ -854,31 +854,31 @@ do
|
||||||
case OP_THEN:
|
case OP_THEN:
|
||||||
case OP_THEN_ARG:
|
case OP_THEN_ARG:
|
||||||
return SSB_FAIL;
|
return SSB_FAIL;
|
||||||
|
|
||||||
/* A "real" property test implies no starting bits, but the fake property
|
/* A "real" property test implies no starting bits, but the fake property
|
||||||
PT_CLIST identifies a list of characters. These lists are short, as they
|
PT_CLIST identifies a list of characters. These lists are short, as they
|
||||||
are used for characters with more than one "other case", so there is no
|
are used for characters with more than one "other case", so there is no
|
||||||
point in recognizing them for OP_NOTPROP. */
|
point in recognizing them for OP_NOTPROP. */
|
||||||
|
|
||||||
case OP_PROP:
|
case OP_PROP:
|
||||||
if (tcode[1] != PT_CLIST) return SSB_FAIL;
|
if (tcode[1] != PT_CLIST) return SSB_FAIL;
|
||||||
{
|
{
|
||||||
const uint32_t *p = PRIV(ucd_caseless_sets) + tcode[2];
|
const uint32_t *p = PRIV(ucd_caseless_sets) + tcode[2];
|
||||||
while ((c = *p++) < NOTACHAR)
|
while ((c = *p++) < NOTACHAR)
|
||||||
{
|
{
|
||||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
PCRE2_UCHAR buff[6];
|
PCRE2_UCHAR buff[6];
|
||||||
(void)PRIV(ord2utf)(c, buff);
|
(void)PRIV(ord2utf)(c, buff);
|
||||||
c = buff[0];
|
c = buff[0];
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
if (c > 0xff) SET_BIT(0xff); else SET_BIT(c);
|
if (c > 0xff) SET_BIT(0xff); else SET_BIT(c);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
try_next = FALSE;
|
try_next = FALSE;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* We can ignore word boundary tests. */
|
/* We can ignore word boundary tests. */
|
||||||
|
|
||||||
|
@ -1032,14 +1032,14 @@ do
|
||||||
SET_BIT(CHAR_HT);
|
SET_BIT(CHAR_HT);
|
||||||
SET_BIT(CHAR_SPACE);
|
SET_BIT(CHAR_SPACE);
|
||||||
|
|
||||||
/* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set
|
/* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set
|
||||||
the bits for 0xA0 and for code units >= 255, independently of UTF. */
|
the bits for 0xA0 and for code units >= 255, independently of UTF. */
|
||||||
|
|
||||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||||
SET_BIT(0xA0);
|
SET_BIT(0xA0);
|
||||||
SET_BIT(0xFF);
|
SET_BIT(0xFF);
|
||||||
#else
|
#else
|
||||||
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
||||||
units of horizontal space characters. */
|
units of horizontal space characters. */
|
||||||
|
|
||||||
#ifdef SUPPORT_UNICODE
|
#ifdef SUPPORT_UNICODE
|
||||||
|
@ -1052,7 +1052,7 @@ do
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
/* For the 8-bit library not in UTF-8 mode, set the bit for 0xA0, unless
|
/* For the 8-bit library not in UTF-8 mode, set the bit for 0xA0, unless
|
||||||
the code is EBCDIC. */
|
the code is EBCDIC. */
|
||||||
{
|
{
|
||||||
#ifndef EBCDIC
|
#ifndef EBCDIC
|
||||||
|
@ -1060,7 +1060,7 @@ do
|
||||||
#endif /* Not EBCDIC */
|
#endif /* Not EBCDIC */
|
||||||
}
|
}
|
||||||
#endif /* 8-bit support */
|
#endif /* 8-bit support */
|
||||||
|
|
||||||
try_next = FALSE;
|
try_next = FALSE;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -1071,16 +1071,16 @@ do
|
||||||
SET_BIT(CHAR_FF);
|
SET_BIT(CHAR_FF);
|
||||||
SET_BIT(CHAR_CR);
|
SET_BIT(CHAR_CR);
|
||||||
|
|
||||||
/* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set
|
/* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set
|
||||||
the bits for NEL and for code units >= 255, independently of UTF. */
|
the bits for NEL and for code units >= 255, independently of UTF. */
|
||||||
|
|
||||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||||
SET_BIT(CHAR_NEL);
|
SET_BIT(CHAR_NEL);
|
||||||
SET_BIT(0xFF);
|
SET_BIT(0xFF);
|
||||||
#else
|
#else
|
||||||
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
||||||
units of vertical space characters. */
|
units of vertical space characters. */
|
||||||
|
|
||||||
#ifdef SUPPORT_UNICODE
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
|
@ -1093,8 +1093,8 @@ do
|
||||||
{
|
{
|
||||||
SET_BIT(CHAR_NEL);
|
SET_BIT(CHAR_NEL);
|
||||||
}
|
}
|
||||||
#endif /* 8-bit support */
|
#endif /* 8-bit support */
|
||||||
|
|
||||||
try_next = FALSE;
|
try_next = FALSE;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -1166,7 +1166,7 @@ do
|
||||||
case OP_ANY:
|
case OP_ANY:
|
||||||
case OP_ALLANY:
|
case OP_ALLANY:
|
||||||
return SSB_FAIL;
|
return SSB_FAIL;
|
||||||
|
|
||||||
case OP_HSPACE:
|
case OP_HSPACE:
|
||||||
SET_BIT(CHAR_HT);
|
SET_BIT(CHAR_HT);
|
||||||
SET_BIT(CHAR_SPACE);
|
SET_BIT(CHAR_SPACE);
|
||||||
|
@ -1178,7 +1178,7 @@ do
|
||||||
SET_BIT(0xA0);
|
SET_BIT(0xA0);
|
||||||
SET_BIT(0xFF);
|
SET_BIT(0xFF);
|
||||||
#else
|
#else
|
||||||
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
||||||
units of horizontal space characters. */
|
units of horizontal space characters. */
|
||||||
|
|
||||||
#ifdef SUPPORT_UNICODE
|
#ifdef SUPPORT_UNICODE
|
||||||
|
@ -1191,7 +1191,7 @@ do
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
/* For the 8-bit library not in UTF-8 mode, set the bit for 0xA0, unless
|
/* For the 8-bit library not in UTF-8 mode, set the bit for 0xA0, unless
|
||||||
the code is EBCDIC. */
|
the code is EBCDIC. */
|
||||||
{
|
{
|
||||||
#ifndef EBCDIC
|
#ifndef EBCDIC
|
||||||
|
@ -1208,16 +1208,16 @@ do
|
||||||
SET_BIT(CHAR_FF);
|
SET_BIT(CHAR_FF);
|
||||||
SET_BIT(CHAR_CR);
|
SET_BIT(CHAR_CR);
|
||||||
|
|
||||||
/* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set
|
/* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set
|
||||||
the bits for NEL and for code units >= 255, independently of UTF. */
|
the bits for NEL and for code units >= 255, independently of UTF. */
|
||||||
|
|
||||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||||
SET_BIT(CHAR_NEL);
|
SET_BIT(CHAR_NEL);
|
||||||
SET_BIT(0xFF);
|
SET_BIT(0xFF);
|
||||||
#else
|
#else
|
||||||
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
|
||||||
units of vertical space characters. */
|
units of vertical space characters. */
|
||||||
|
|
||||||
#ifdef SUPPORT_UNICODE
|
#ifdef SUPPORT_UNICODE
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
|
@ -1230,7 +1230,7 @@ do
|
||||||
{
|
{
|
||||||
SET_BIT(CHAR_NEL);
|
SET_BIT(CHAR_NEL);
|
||||||
}
|
}
|
||||||
#endif /* 8-bit support */
|
#endif /* 8-bit support */
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OP_NOT_DIGIT:
|
case OP_NOT_DIGIT:
|
||||||
|
@ -1260,8 +1260,8 @@ do
|
||||||
|
|
||||||
tcode += 2;
|
tcode += 2;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* Extended class: if there are any property checks, or if this is a
|
/* Extended class: if there are any property checks, or if this is a
|
||||||
negative XCLASS without a map, give up. If there are no property checks,
|
negative XCLASS without a map, give up. If there are no property checks,
|
||||||
there must be wide characters on the XCLASS list, because otherwise an
|
there must be wide characters on the XCLASS list, because otherwise an
|
||||||
XCLASS would not have been created. This means that code points >= 255
|
XCLASS would not have been created. This means that code points >= 255
|
||||||
|
@ -1270,19 +1270,19 @@ do
|
||||||
#ifdef SUPPORT_WIDE_CHARS
|
#ifdef SUPPORT_WIDE_CHARS
|
||||||
case OP_XCLASS:
|
case OP_XCLASS:
|
||||||
if ((tcode[1 + LINK_SIZE] & XCL_HASPROP) != 0 ||
|
if ((tcode[1 + LINK_SIZE] & XCL_HASPROP) != 0 ||
|
||||||
(tcode[1 + LINK_SIZE] & (XCL_MAP|XCL_NOT)) == XCL_NOT)
|
(tcode[1 + LINK_SIZE] & (XCL_MAP|XCL_NOT)) == XCL_NOT)
|
||||||
return SSB_FAIL;
|
return SSB_FAIL;
|
||||||
|
|
||||||
/* We have a positive XCLASS or a negative one without a map. Set up the
|
/* We have a positive XCLASS or a negative one without a map. Set up the
|
||||||
map pointer if there is one, and fall through. */
|
map pointer if there is one, and fall through. */
|
||||||
|
|
||||||
classmap = ((tcode[1 + LINK_SIZE] & XCL_MAP) == 0)? NULL :
|
classmap = ((tcode[1 + LINK_SIZE] & XCL_MAP) == 0)? NULL :
|
||||||
(uint8_t *)(tcode + 1 + LINK_SIZE + 1);
|
(uint8_t *)(tcode + 1 + LINK_SIZE + 1);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Enter here for a negative non-XCLASS. In the 8-bit library, if we are
|
/* Enter here for a negative non-XCLASS. In the 8-bit library, if we are
|
||||||
in UTF mode, any byte with a value >= 0xc4 is a potentially valid starter
|
in UTF mode, any byte with a value >= 0xc4 is a potentially valid starter
|
||||||
because it starts a character with a value > 255. In 8-bit non-UTF mode,
|
because it starts a character with a value > 255. In 8-bit non-UTF mode,
|
||||||
there is no difference between CLASS and NCLASS. In all other wide
|
there is no difference between CLASS and NCLASS. In all other wide
|
||||||
character modes, set the 0xFF bit to indicate code units >= 255. */
|
character modes, set the 0xFF bit to indicate code units >= 255. */
|
||||||
|
|
||||||
|
@ -1298,26 +1298,26 @@ do
|
||||||
#endif
|
#endif
|
||||||
/* Fall through */
|
/* Fall through */
|
||||||
|
|
||||||
/* Enter here for a positive non-XCLASS. If we have fallen through from
|
/* Enter here for a positive non-XCLASS. If we have fallen through from
|
||||||
an XCLASS, classmap will already be set; just advance the code pointer.
|
an XCLASS, classmap will already be set; just advance the code pointer.
|
||||||
Otherwise, set up classmap for a a non-XCLASS and advance past it. */
|
Otherwise, set up classmap for a a non-XCLASS and advance past it. */
|
||||||
|
|
||||||
case OP_CLASS:
|
case OP_CLASS:
|
||||||
if (*tcode == OP_XCLASS) tcode += GET(tcode, 1); else
|
if (*tcode == OP_XCLASS) tcode += GET(tcode, 1); else
|
||||||
{
|
{
|
||||||
classmap = (uint8_t *)(++tcode);
|
classmap = (uint8_t *)(++tcode);
|
||||||
tcode += 32 / sizeof(PCRE2_UCHAR);
|
tcode += 32 / sizeof(PCRE2_UCHAR);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* When wide characters are supported, classmap may be NULL. In UTF-8
|
/* When wide characters are supported, classmap may be NULL. In UTF-8
|
||||||
(sic) mode, the bits in a class bit map correspond to character values,
|
(sic) mode, the bits in a class bit map correspond to character values,
|
||||||
not to byte values. However, the bit map we are constructing is for byte
|
not to byte values. However, the bit map we are constructing is for byte
|
||||||
values. So we have to do a conversion for characters whose code point is
|
values. So we have to do a conversion for characters whose code point is
|
||||||
greater than 127. In fact, there are only two possible starting bytes for
|
greater than 127. In fact, there are only two possible starting bytes for
|
||||||
characters in the range 128 - 255. */
|
characters in the range 128 - 255. */
|
||||||
|
|
||||||
if (classmap != NULL)
|
if (classmap != NULL)
|
||||||
{
|
{
|
||||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
|
@ -1334,11 +1334,11 @@ do
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
/* In all modes except UTF-8, the two bit maps are compatible. */
|
/* In all modes except UTF-8, the two bit maps are compatible. */
|
||||||
|
|
||||||
{
|
{
|
||||||
for (c = 0; c < 32; c++) re->start_bitmap[c] |= classmap[c];
|
for (c = 0; c < 32; c++) re->start_bitmap[c] |= classmap[c];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Act on what follows the class. For a zero minimum repeat, continue;
|
/* Act on what follows the class. For a zero minimum repeat, continue;
|
||||||
|
@ -1384,13 +1384,13 @@ return yield;
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* This function is handed a compiled expression that it must study to produce
|
/* This function is handed a compiled expression that it must study to produce
|
||||||
information that will speed up the matching.
|
information that will speed up the matching.
|
||||||
|
|
||||||
Argument: points to the compiled expression
|
Argument: points to the compiled expression
|
||||||
Returns: 0 normally; non-zero should never normally occur
|
Returns: 0 normally; non-zero should never normally occur
|
||||||
1 unknown opcode in set_start_bits
|
1 unknown opcode in set_start_bits
|
||||||
2 missing capturing bracket
|
2 missing capturing bracket
|
||||||
3 unknown opcode in find_minlength
|
3 unknown opcode in find_minlength
|
||||||
*/
|
*/
|
||||||
|
|
||||||
int
|
int
|
||||||
|
@ -1402,7 +1402,7 @@ BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
|
||||||
|
|
||||||
/* Find start of compiled code */
|
/* Find start of compiled code */
|
||||||
|
|
||||||
code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) +
|
code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) +
|
||||||
re->name_entry_size * re->name_count;
|
re->name_entry_size * re->name_count;
|
||||||
|
|
||||||
/* For an anchored pattern, or an unanchored pattern that has a first code
|
/* For an anchored pattern, or an unanchored pattern that has a first code
|
||||||
|
@ -1422,17 +1422,17 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0 &&
|
||||||
switch(min = find_minlength(re, code, code, 0, utf))
|
switch(min = find_minlength(re, code, code, 0, utf))
|
||||||
{
|
{
|
||||||
case -1: /* \C in UTF mode or (*ACCEPT) was encountered */
|
case -1: /* \C in UTF mode or (*ACCEPT) was encountered */
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case -2:
|
case -2:
|
||||||
return 2; /* missing capturing bracket */
|
return 2; /* missing capturing bracket */
|
||||||
|
|
||||||
case -3:
|
case -3:
|
||||||
return 3; /* unrecognized opcode */
|
return 3; /* unrecognized opcode */
|
||||||
|
|
||||||
default:
|
default:
|
||||||
re->minlength = min;
|
re->minlength = min;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -81,7 +81,7 @@ for (entry = first; entry <= last; entry += entrysize)
|
||||||
{
|
{
|
||||||
uint16_t n = GET2(entry, 0);
|
uint16_t n = GET2(entry, 0);
|
||||||
if (n < match_data->oveccount && match_data->ovector[n*2] != PCRE2_UNSET)
|
if (n < match_data->oveccount && match_data->ovector[n*2] != PCRE2_UNSET)
|
||||||
return pcre2_substring_copy_bynumber(match_data, n, buffer, sizeptr);
|
return pcre2_substring_copy_bynumber(match_data, n, buffer, sizeptr);
|
||||||
}
|
}
|
||||||
return PCRE2_ERROR_NOSUBSTRING;
|
return PCRE2_ERROR_NOSUBSTRING;
|
||||||
}
|
}
|
||||||
|
@ -108,7 +108,7 @@ Returns: if successful: 0
|
||||||
*/
|
*/
|
||||||
|
|
||||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
pcre2_substring_copy_bynumber(pcre2_match_data *match_data,
|
pcre2_substring_copy_bynumber(pcre2_match_data *match_data,
|
||||||
unsigned int stringnumber, PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
|
unsigned int stringnumber, PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
|
||||||
{
|
{
|
||||||
PCRE2_SIZE left, right;
|
PCRE2_SIZE left, right;
|
||||||
|
@ -119,7 +119,7 @@ if (stringnumber >= match_data->oveccount ||
|
||||||
(left = match_data->ovector[stringnumber*2]) == PCRE2_UNSET)
|
(left = match_data->ovector[stringnumber*2]) == PCRE2_UNSET)
|
||||||
return PCRE2_ERROR_NOSUBSTRING;
|
return PCRE2_ERROR_NOSUBSTRING;
|
||||||
right = match_data->ovector[stringnumber*2+1];
|
right = match_data->ovector[stringnumber*2+1];
|
||||||
if (right - left + 1 > *sizeptr) return PCRE2_ERROR_NOMEMORY;
|
if (right - left + 1 > *sizeptr) return PCRE2_ERROR_NOMEMORY;
|
||||||
while (left < right) buffer[p++] = subject[left++];
|
while (left < right) buffer[p++] = subject[left++];
|
||||||
buffer[p] = 0;
|
buffer[p] = 0;
|
||||||
*sizeptr = p;
|
*sizeptr = p;
|
||||||
|
@ -140,7 +140,7 @@ Arguments:
|
||||||
match_data pointer to match_data
|
match_data pointer to match_data
|
||||||
stringname the name of the required substring
|
stringname the name of the required substring
|
||||||
stringptr where to put the pointer to the new memory
|
stringptr where to put the pointer to the new memory
|
||||||
sizeptr where to put the length of the substring
|
sizeptr where to put the length of the substring
|
||||||
|
|
||||||
Returns: if successful: zero
|
Returns: if successful: zero
|
||||||
if not successful, a negative value:
|
if not successful, a negative value:
|
||||||
|
@ -162,7 +162,7 @@ for (entry = first; entry <= last; entry += entrysize)
|
||||||
{
|
{
|
||||||
uint16_t n = GET2(entry, 0);
|
uint16_t n = GET2(entry, 0);
|
||||||
if (n < match_data->oveccount && match_data->ovector[n*2] != PCRE2_UNSET)
|
if (n < match_data->oveccount && match_data->ovector[n*2] != PCRE2_UNSET)
|
||||||
return pcre2_substring_get_bynumber(match_data, n, stringptr, sizeptr);
|
return pcre2_substring_get_bynumber(match_data, n, stringptr, sizeptr);
|
||||||
}
|
}
|
||||||
return PCRE2_ERROR_NOSUBSTRING;
|
return PCRE2_ERROR_NOSUBSTRING;
|
||||||
}
|
}
|
||||||
|
@ -180,7 +180,7 @@ Arguments:
|
||||||
match_data points to match data
|
match_data points to match data
|
||||||
stringnumber the number of the required substring
|
stringnumber the number of the required substring
|
||||||
stringptr where to put a pointer to the new memory
|
stringptr where to put a pointer to the new memory
|
||||||
sizeptr where to put the size of the substring
|
sizeptr where to put the size of the substring
|
||||||
|
|
||||||
Returns: if successful: zero
|
Returns: if successful: zero
|
||||||
if not successful a negative error code:
|
if not successful a negative error code:
|
||||||
|
@ -189,7 +189,7 @@ Returns: if successful: zero
|
||||||
*/
|
*/
|
||||||
|
|
||||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
pcre2_substring_get_bynumber(pcre2_match_data *match_data,
|
pcre2_substring_get_bynumber(pcre2_match_data *match_data,
|
||||||
unsigned int stringnumber, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
|
unsigned int stringnumber, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
|
||||||
{
|
{
|
||||||
PCRE2_SIZE left, right;
|
PCRE2_SIZE left, right;
|
||||||
|
@ -204,8 +204,8 @@ if (stringnumber >= match_data->oveccount ||
|
||||||
return PCRE2_ERROR_NOSUBSTRING;
|
return PCRE2_ERROR_NOSUBSTRING;
|
||||||
right = match_data->ovector[stringnumber*2+1];
|
right = match_data->ovector[stringnumber*2+1];
|
||||||
|
|
||||||
block = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
|
block = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
|
||||||
(right-left+1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)match_data);
|
(right-left+1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)match_data);
|
||||||
if (block == NULL) return PCRE2_ERROR_NOMEMORY;
|
if (block == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||||
|
|
||||||
yield = (PCRE2_UCHAR *)((char *)block + sizeof(pcre2_memctl));
|
yield = (PCRE2_UCHAR *)((char *)block + sizeof(pcre2_memctl));
|
||||||
|
@ -222,7 +222,7 @@ return 0;
|
||||||
* Free memory obtained by get_substring *
|
* Free memory obtained by get_substring *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Argument: the result of a previous pcre2_substring_get_byxxx()
|
Argument: the result of a previous pcre2_substring_get_byxxx()
|
||||||
Returns: nothing
|
Returns: nothing
|
||||||
*/
|
*/
|
||||||
|
@ -246,7 +246,7 @@ permits duplicate names, the first substring that is set is chosen.
|
||||||
Arguments:
|
Arguments:
|
||||||
match_data pointer to match data
|
match_data pointer to match data
|
||||||
stringname the name of the required substring
|
stringname the name of the required substring
|
||||||
sizeptr where to put the length
|
sizeptr where to put the length
|
||||||
|
|
||||||
Returns: 0 if successful, else a negative error number
|
Returns: 0 if successful, else a negative error number
|
||||||
*/
|
*/
|
||||||
|
@ -265,7 +265,7 @@ for (entry = first; entry <= last; entry += entrysize)
|
||||||
{
|
{
|
||||||
uint16_t n = GET2(entry, 0);
|
uint16_t n = GET2(entry, 0);
|
||||||
if (n < match_data->oveccount && match_data->ovector[n*2] != PCRE2_UNSET)
|
if (n < match_data->oveccount && match_data->ovector[n*2] != PCRE2_UNSET)
|
||||||
return pcre2_substring_length_bynumber(match_data, n, sizeptr);
|
return pcre2_substring_length_bynumber(match_data, n, sizeptr);
|
||||||
}
|
}
|
||||||
return PCRE2_ERROR_NOSUBSTRING;
|
return PCRE2_ERROR_NOSUBSTRING;
|
||||||
}
|
}
|
||||||
|
@ -281,7 +281,7 @@ return PCRE2_ERROR_NOSUBSTRING;
|
||||||
Arguments:
|
Arguments:
|
||||||
match_data pointer to match data
|
match_data pointer to match data
|
||||||
stringnumber the number of the required substring
|
stringnumber the number of the required substring
|
||||||
sizeptr where to put the length
|
sizeptr where to put the length
|
||||||
|
|
||||||
Returns: 0 if successful, else a negative error number
|
Returns: 0 if successful, else a negative error number
|
||||||
*/
|
*/
|
||||||
|
@ -296,7 +296,7 @@ if (stringnumber >= match_data->oveccount ||
|
||||||
return PCRE2_ERROR_NOSUBSTRING;
|
return PCRE2_ERROR_NOSUBSTRING;
|
||||||
*sizeptr = match_data->ovector[stringnumber*2 + 1] -
|
*sizeptr = match_data->ovector[stringnumber*2 + 1] -
|
||||||
match_data->ovector[stringnumber*2];
|
match_data->ovector[stringnumber*2];
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -307,23 +307,23 @@ return 0;
|
||||||
|
|
||||||
/* This function gets one chunk of memory and builds a list of pointers and all
|
/* This function gets one chunk of memory and builds a list of pointers and all
|
||||||
the captured substrings in it. A NULL pointer is put on the end of the list.
|
the captured substrings in it. A NULL pointer is put on the end of the list.
|
||||||
The substrings are zero-terminated, but also, if the final argument is
|
The substrings are zero-terminated, but also, if the final argument is
|
||||||
non-NULL, a list of lengths is also returned. This allows binary data to be
|
non-NULL, a list of lengths is also returned. This allows binary data to be
|
||||||
handled.
|
handled.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
match_data points to the match data
|
match_data points to the match data
|
||||||
listptr set to point to the list of pointers
|
listptr set to point to the list of pointers
|
||||||
lengthsptr set to point to the list of lengths (may be NULL)
|
lengthsptr set to point to the list of lengths (may be NULL)
|
||||||
|
|
||||||
Returns: if successful: 0
|
Returns: if successful: 0
|
||||||
if not successful, a negative error code:
|
if not successful, a negative error code:
|
||||||
PCRE2_ERROR_NOMEMORY: failed to get memory,
|
PCRE2_ERROR_NOMEMORY: failed to get memory,
|
||||||
or a match failure code
|
or a match failure code
|
||||||
*/
|
*/
|
||||||
|
|
||||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
pcre2_substring_list_get(pcre2_match_data *match_data, PCRE2_UCHAR ***listptr,
|
pcre2_substring_list_get(pcre2_match_data *match_data, PCRE2_UCHAR ***listptr,
|
||||||
PCRE2_SIZE **lengthsptr)
|
PCRE2_SIZE **lengthsptr)
|
||||||
{
|
{
|
||||||
int i, count, count2;
|
int i, count, count2;
|
||||||
|
@ -343,22 +343,22 @@ if (lengthsptr != NULL) size += sizeof(PCRE2_SIZE) * count; /* For lengths */
|
||||||
|
|
||||||
for (i = 0; i < count2; i += 2)
|
for (i = 0; i < count2; i += 2)
|
||||||
size += sizeof(PCRE2_UCHAR *) + CU2BYTES(ovector[i+1] - ovector[i] + 1);
|
size += sizeof(PCRE2_UCHAR *) + CU2BYTES(ovector[i+1] - ovector[i] + 1);
|
||||||
memp = PRIV(memctl_malloc)(size, (pcre2_memctl *)match_data);
|
memp = PRIV(memctl_malloc)(size, (pcre2_memctl *)match_data);
|
||||||
if (memp == NULL) return PCRE2_ERROR_NOMEMORY;
|
if (memp == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||||
|
|
||||||
*listptr = listp = (PCRE2_UCHAR **)((char *)memp + sizeof(pcre2_memctl));
|
*listptr = listp = (PCRE2_UCHAR **)((char *)memp + sizeof(pcre2_memctl));
|
||||||
lensp = (PCRE2_SIZE *)((char *)listp + sizeof(PCRE2_UCHAR *) * (count + 1));
|
lensp = (PCRE2_SIZE *)((char *)listp + sizeof(PCRE2_UCHAR *) * (count + 1));
|
||||||
|
|
||||||
if (lengthsptr == NULL)
|
if (lengthsptr == NULL)
|
||||||
{
|
{
|
||||||
sp = (PCRE2_UCHAR *)lensp;
|
sp = (PCRE2_UCHAR *)lensp;
|
||||||
lensp = NULL;
|
lensp = NULL;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
*lengthsptr = lensp;
|
*lengthsptr = lensp;
|
||||||
sp = (PCRE2_UCHAR *)((char *)lensp + sizeof(PCRE2_SIZE) * count);
|
sp = (PCRE2_UCHAR *)((char *)lensp + sizeof(PCRE2_SIZE) * count);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < count2; i += 2)
|
for (i = 0; i < count2; i += 2)
|
||||||
{
|
{
|
||||||
|
@ -398,9 +398,9 @@ memctl->free(memctl, memctl->memory_data);
|
||||||
* Find (multiple) entries for named string *
|
* Find (multiple) entries for named string *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* This function scans the nametable for a given name, using binary chop. It
|
/* This function scans the nametable for a given name, using binary chop. It
|
||||||
returns either two pointers to the entries in the table, or, if no pointers are
|
returns either two pointers to the entries in the table, or, if no pointers are
|
||||||
given, the number of a group with the given name. If duplicate names are
|
given, the number of a group with the given name. If duplicate names are
|
||||||
permitted, this may not be unique.
|
permitted, this may not be unique.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
|
@ -428,11 +428,11 @@ while (top > bot)
|
||||||
uint16_t mid = (top + bot) / 2;
|
uint16_t mid = (top + bot) / 2;
|
||||||
PCRE2_SPTR entry = nametable + entrysize*mid;
|
PCRE2_SPTR entry = nametable + entrysize*mid;
|
||||||
int c = PRIV(strcmp)(stringname, entry + IMM2_SIZE);
|
int c = PRIV(strcmp)(stringname, entry + IMM2_SIZE);
|
||||||
if (c == 0)
|
if (c == 0)
|
||||||
{
|
{
|
||||||
PCRE2_SPTR first;
|
PCRE2_SPTR first;
|
||||||
PCRE2_SPTR last;
|
PCRE2_SPTR last;
|
||||||
PCRE2_SPTR lastentry;
|
PCRE2_SPTR lastentry;
|
||||||
if (firstptr == NULL) return GET2(entry, 0);
|
if (firstptr == NULL) return GET2(entry, 0);
|
||||||
lastentry = nametable + entrysize * (code->name_count - 1);
|
lastentry = nametable + entrysize * (code->name_count - 1);
|
||||||
first = last = entry;
|
first = last = entry;
|
||||||
|
@ -447,7 +447,7 @@ while (top > bot)
|
||||||
last += entrysize;
|
last += entrysize;
|
||||||
}
|
}
|
||||||
*firstptr = first;
|
*firstptr = first;
|
||||||
*lastptr = last;
|
*lastptr = last;
|
||||||
return entrysize;
|
return entrysize;
|
||||||
}
|
}
|
||||||
if (c > 0) bot = mid + 1; else top = mid;
|
if (c > 0) bot = mid + 1; else top = mid;
|
||||||
|
@ -462,7 +462,7 @@ return PCRE2_ERROR_NOSUBSTRING;
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* This function is a convenience wrapper for pcre2_substring_nametable_scan()
|
/* This function is a convenience wrapper for pcre2_substring_nametable_scan()
|
||||||
when it is known that names are unique. If there are duplicate names, it is not
|
when it is known that names are unique. If there are duplicate names, it is not
|
||||||
defined which number is returned.
|
defined which number is returned.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
|
@ -474,7 +474,7 @@ Returns: the number of the named parenthesis, or a negative number
|
||||||
*/
|
*/
|
||||||
|
|
||||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
pcre2_substring_number_from_name(const pcre2_code *code,
|
pcre2_substring_number_from_name(const pcre2_code *code,
|
||||||
PCRE2_SPTR stringname)
|
PCRE2_SPTR stringname)
|
||||||
{
|
{
|
||||||
return pcre2_substring_nametable_scan(code, stringname, NULL, NULL);
|
return pcre2_substring_nametable_scan(code, stringname, NULL, NULL);
|
||||||
|
|
|
@ -232,7 +232,7 @@ enum {
|
||||||
ucp_Takri,
|
ucp_Takri,
|
||||||
/* New for Unicode 7.0.0: */
|
/* New for Unicode 7.0.0: */
|
||||||
ucp_Bassa_Vah,
|
ucp_Bassa_Vah,
|
||||||
ucp_Caucasian_Albanian,
|
ucp_Caucasian_Albanian,
|
||||||
ucp_Duployan,
|
ucp_Duployan,
|
||||||
ucp_Elbasan,
|
ucp_Elbasan,
|
||||||
ucp_Grantha,
|
ucp_Grantha,
|
||||||
|
|
|
@ -154,11 +154,11 @@ for (p = string; length-- > 0; p++)
|
||||||
*erroroffset = (int)(p - string);
|
*erroroffset = (int)(p - string);
|
||||||
switch(ab - length)
|
switch(ab - length)
|
||||||
{
|
{
|
||||||
case 1: return PCRE2_ERROR_UTF8_ERR1;
|
case 1: return PCRE2_ERROR_UTF8_ERR1;
|
||||||
case 2: return PCRE2_ERROR_UTF8_ERR2;
|
case 2: return PCRE2_ERROR_UTF8_ERR2;
|
||||||
case 3: return PCRE2_ERROR_UTF8_ERR3;
|
case 3: return PCRE2_ERROR_UTF8_ERR3;
|
||||||
case 4: return PCRE2_ERROR_UTF8_ERR4;
|
case 4: return PCRE2_ERROR_UTF8_ERR4;
|
||||||
case 5: return PCRE2_ERROR_UTF8_ERR5;
|
case 5: return PCRE2_ERROR_UTF8_ERR5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
length -= ab; /* Length remaining */
|
length -= ab; /* Length remaining */
|
||||||
|
@ -314,7 +314,7 @@ return 0;
|
||||||
|
|
||||||
/* ----------------- Check a UTF-16 string ----------------- */
|
/* ----------------- Check a UTF-16 string ----------------- */
|
||||||
|
|
||||||
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
#elif PCRE2_CODE_UNIT_WIDTH == 16
|
||||||
|
|
||||||
/* There's not so much work, nor so many errors, for UTF-16.
|
/* There's not so much work, nor so many errors, for UTF-16.
|
||||||
PCRE2_ERROR_UTF16_ERR1 Missing low surrogate at the end of the string
|
PCRE2_ERROR_UTF16_ERR1 Missing low surrogate at the end of the string
|
||||||
|
|
|
@ -60,7 +60,7 @@ might contain codepoints above 255 and/or Unicode properties.
|
||||||
Arguments:
|
Arguments:
|
||||||
c the character
|
c the character
|
||||||
data points to the flag code unit of the XCLASS data
|
data points to the flag code unit of the XCLASS data
|
||||||
utf TRUE if in UTF mode
|
utf TRUE if in UTF mode
|
||||||
|
|
||||||
Returns: TRUE if character matches, else FALSE
|
Returns: TRUE if character matches, else FALSE
|
||||||
*/
|
*/
|
||||||
|
@ -261,7 +261,7 @@ while ((t = *data++) != XCL_END)
|
||||||
data += 2;
|
data += 2;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
(void)utf; /* Avoid compiler warning */
|
(void)utf; /* Avoid compiler warning */
|
||||||
#endif /* SUPPORT_UNICODE */
|
#endif /* SUPPORT_UNICODE */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,7 @@ pcre2sample documentation for a short discussion ("man pcre2sample" if you have
|
||||||
the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is
|
the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is
|
||||||
incompatible with the original PCRE API.
|
incompatible with the original PCRE API.
|
||||||
|
|
||||||
There are actually three libraries, each supporting a different code unit
|
There are actually three libraries, each supporting a different code unit
|
||||||
width. This demonstration program uses the 8-bit library.
|
width. This demonstration program uses the 8-bit library.
|
||||||
|
|
||||||
In Unix-like environments, if PCRE2 is installed in your standard system
|
In Unix-like environments, if PCRE2 is installed in your standard system
|
||||||
|
@ -39,8 +39,8 @@ the following line. */
|
||||||
|
|
||||||
/* #define PCRE2_STATIC */
|
/* #define PCRE2_STATIC */
|
||||||
|
|
||||||
/* This macro must be defined before including pcre2.h. For a program that uses
|
/* This macro must be defined before including pcre2.h. For a program that uses
|
||||||
only one code unit width, it makes it possible to use generic function names
|
only one code unit width, it makes it possible to use generic function names
|
||||||
such as pcre2_compile(). */
|
such as pcre2_compile(). */
|
||||||
|
|
||||||
#define PCRE2_CODE_UNIT_WIDTH 8
|
#define PCRE2_CODE_UNIT_WIDTH 8
|
||||||
|
@ -124,7 +124,7 @@ subject_length = strlen((char *)subject);
|
||||||
|
|
||||||
re = pcre2_compile(
|
re = pcre2_compile(
|
||||||
pattern, /* the pattern */
|
pattern, /* the pattern */
|
||||||
PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
|
PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
|
||||||
0, /* default options */
|
0, /* default options */
|
||||||
&errornumber, /* for error number */
|
&errornumber, /* for error number */
|
||||||
&erroroffset, /* for error offset */
|
&erroroffset, /* for error offset */
|
||||||
|
@ -134,9 +134,9 @@ re = pcre2_compile(
|
||||||
|
|
||||||
if (re == NULL)
|
if (re == NULL)
|
||||||
{
|
{
|
||||||
PCRE2_UCHAR buffer[256];
|
PCRE2_UCHAR buffer[256];
|
||||||
pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
|
pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
|
||||||
printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset,
|
printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset,
|
||||||
buffer);
|
buffer);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -180,7 +180,7 @@ if (rc < 0)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Match succeded. Get a pointer to the output vector, where string offsets are
|
/* Match succeded. Get a pointer to the output vector, where string offsets are
|
||||||
stored. */
|
stored. */
|
||||||
|
|
||||||
ovector = pcre2_get_ovector_pointer(match_data);
|
ovector = pcre2_get_ovector_pointer(match_data);
|
||||||
|
@ -193,7 +193,7 @@ printf("\nMatch succeeded at offset %d\n", (int)ovector[0]);
|
||||||
* captured. *
|
* captured. *
|
||||||
*************************************************************************/
|
*************************************************************************/
|
||||||
|
|
||||||
/* The output vector wasn't big enough. This should not happen, because we used
|
/* The output vector wasn't big enough. This should not happen, because we used
|
||||||
pcre2_match_data_create_from_pattern() above. */
|
pcre2_match_data_create_from_pattern() above. */
|
||||||
|
|
||||||
if (rc == 0)
|
if (rc == 0)
|
||||||
|
@ -244,7 +244,7 @@ if (namecount <= 0) printf("No named substrings\n"); else
|
||||||
&name_entry_size); /* where to put the answer */
|
&name_entry_size); /* where to put the answer */
|
||||||
|
|
||||||
/* Now we can scan the table and, for each entry, print the number, the name,
|
/* Now we can scan the table and, for each entry, print the number, the name,
|
||||||
and the substring itself. In the 8-bit library the number is held in two
|
and the substring itself. In the 8-bit library the number is held in two
|
||||||
bytes, most significant first. */
|
bytes, most significant first. */
|
||||||
|
|
||||||
tabptr = name_table;
|
tabptr = name_table;
|
||||||
|
@ -289,7 +289,7 @@ if (namecount <= 0) printf("No named substrings\n"); else
|
||||||
|
|
||||||
if (!find_all) /* Check for -g */
|
if (!find_all) /* Check for -g */
|
||||||
{
|
{
|
||||||
pcre2_match_data_free(match_data); /* Release the memory that was used */
|
pcre2_match_data_free(match_data); /* Release the memory that was used */
|
||||||
pcre2_code_free(re); /* for the match data and the pattern. */
|
pcre2_code_free(re); /* for the match data and the pattern. */
|
||||||
return 0; /* Exit the program. */
|
return 0; /* Exit the program. */
|
||||||
}
|
}
|
||||||
|
@ -307,7 +307,7 @@ sequence. */
|
||||||
(void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &newline);
|
(void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &newline);
|
||||||
crlf_is_newline = newline == PCRE2_NEWLINE_ANY ||
|
crlf_is_newline = newline == PCRE2_NEWLINE_ANY ||
|
||||||
newline == PCRE2_NEWLINE_CRLF ||
|
newline == PCRE2_NEWLINE_CRLF ||
|
||||||
newline == PCRE2_NEWLINE_ANYCRLF;
|
newline == PCRE2_NEWLINE_ANYCRLF;
|
||||||
|
|
||||||
/* Loop for second and subsequent matches */
|
/* Loop for second and subsequent matches */
|
||||||
|
|
||||||
|
|
|
@ -450,7 +450,7 @@ pcre2grep_exit(int rc)
|
||||||
if (resource_error)
|
if (resource_error)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "pcre2grep: Error %d, %d or %d means that a resource limit "
|
fprintf(stderr, "pcre2grep: Error %d, %d or %d means that a resource limit "
|
||||||
"was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
|
"was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
|
||||||
PCRE2_ERROR_RECURSIONLIMIT);
|
PCRE2_ERROR_RECURSIONLIMIT);
|
||||||
fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n");
|
fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n");
|
||||||
}
|
}
|
||||||
|
@ -485,7 +485,7 @@ if (strlen(s) > MAXPATLEN)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n",
|
fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n",
|
||||||
MAXPATLEN);
|
MAXPATLEN);
|
||||||
free(p);
|
free(p);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
p->next = NULL;
|
p->next = NULL;
|
||||||
|
@ -2381,7 +2381,7 @@ switch(letter)
|
||||||
unsigned char buffer[128];
|
unsigned char buffer[128];
|
||||||
(void)pcre2_config(PCRE2_CONFIG_VERSION, buffer);
|
(void)pcre2_config(PCRE2_CONFIG_VERSION, buffer);
|
||||||
fprintf(stdout, "pcre2grep version %s\n", buffer);
|
fprintf(stdout, "pcre2grep version %s\n", buffer);
|
||||||
}
|
}
|
||||||
pcre2grep_exit(0);
|
pcre2grep_exit(0);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -2472,7 +2472,7 @@ if ((popts & PO_FIXED_STRINGS) != 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
sprintf((char *)buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
|
sprintf((char *)buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
|
||||||
p->compiled = pcre2_compile(buffer, -1, options, &errcode, &erroffset,
|
p->compiled = pcre2_compile(buffer, -1, options, &errcode, &erroffset,
|
||||||
compile_context);
|
compile_context);
|
||||||
if (p->compiled != NULL) return TRUE;
|
if (p->compiled != NULL) return TRUE;
|
||||||
|
|
||||||
|
@ -2555,11 +2555,11 @@ while (fgets(buffer, PATBUFSIZE, f) != NULL)
|
||||||
afterwards, as a precaution against any later code trying to use it. */
|
afterwards, as a precaution against any later code trying to use it. */
|
||||||
|
|
||||||
*patlastptr = add_pattern(buffer, *patlastptr);
|
*patlastptr = add_pattern(buffer, *patlastptr);
|
||||||
if (*patlastptr == NULL)
|
if (*patlastptr == NULL)
|
||||||
{
|
{
|
||||||
if (f != stdin) fclose(f);
|
if (f != stdin) fclose(f);
|
||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
if (*patptr == NULL) *patptr = *patlastptr;
|
if (*patptr == NULL) *patptr = *patlastptr;
|
||||||
|
|
||||||
/* This loop is needed because compiling a "pattern" when -F is set may add
|
/* This loop is needed because compiling a "pattern" when -F is set may add
|
||||||
|
@ -2571,10 +2571,10 @@ while (fgets(buffer, PATBUFSIZE, f) != NULL)
|
||||||
{
|
{
|
||||||
if (!compile_pattern(*patlastptr, pcre2_options, popts, TRUE, filename,
|
if (!compile_pattern(*patlastptr, pcre2_options, popts, TRUE, filename,
|
||||||
linenumber))
|
linenumber))
|
||||||
{
|
{
|
||||||
if (f != stdin) fclose(f);
|
if (f != stdin) fclose(f);
|
||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
(*patlastptr)->string = NULL; /* Insurance */
|
(*patlastptr)->string = NULL; /* Insurance */
|
||||||
if ((*patlastptr)->next == NULL) break;
|
if ((*patlastptr)->next == NULL) break;
|
||||||
*patlastptr = (*patlastptr)->next;
|
*patlastptr = (*patlastptr)->next;
|
||||||
|
@ -2622,7 +2622,7 @@ for (i = 1; i < argc; i++)
|
||||||
char *option_data = (char *)""; /* default to keep compiler happy */
|
char *option_data = (char *)""; /* default to keep compiler happy */
|
||||||
BOOL longop;
|
BOOL longop;
|
||||||
BOOL longopwasequals = FALSE;
|
BOOL longopwasequals = FALSE;
|
||||||
|
|
||||||
if (argv[i][0] != '-') break;
|
if (argv[i][0] != '-') break;
|
||||||
|
|
||||||
/* If we hit an argument that is just "-", it may be a reference to STDIN,
|
/* If we hit an argument that is just "-", it may be a reference to STDIN,
|
||||||
|
@ -2925,7 +2925,7 @@ for (i = 1; i < argc; i++)
|
||||||
else *((int *)op->dataptr) = n;
|
else *((int *)op->dataptr) = n;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Options have been decoded. If -C was used, its value is used as a default
|
/* Options have been decoded. If -C was used, its value is used as a default
|
||||||
for -A and -B. */
|
for -A and -B. */
|
||||||
|
|
||||||
|
@ -2946,15 +2946,15 @@ if ((only_matching != NULL && (file_offsets || line_offsets)) ||
|
||||||
"and/or --line-offsets\n");
|
"and/or --line-offsets\n");
|
||||||
pcre2grep_exit(usage(2));
|
pcre2grep_exit(usage(2));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Put limits into the match data block. */
|
/* Put limits into the match data block. */
|
||||||
|
|
||||||
if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
|
if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
|
||||||
if (recursion_limit > 0) pcre2_set_recursion_limit(match_context, recursion_limit);
|
if (recursion_limit > 0) pcre2_set_recursion_limit(match_context, recursion_limit);
|
||||||
|
|
||||||
if (only_matching != NULL || file_offsets || line_offsets)
|
if (only_matching != NULL || file_offsets || line_offsets)
|
||||||
show_only_matching = TRUE;
|
show_only_matching = TRUE;
|
||||||
|
|
||||||
/* If a locale has not been provided as an option, see if the LC_CTYPE or
|
/* If a locale has not been provided as an option, see if the LC_CTYPE or
|
||||||
LC_ALL environment variable is set, and if so, use it. */
|
LC_ALL environment variable is set, and if so, use it. */
|
||||||
|
|
||||||
|
@ -2980,7 +2980,7 @@ if (locale != NULL)
|
||||||
locale, locale_from);
|
locale, locale_from);
|
||||||
goto EXIT2;
|
goto EXIT2;
|
||||||
}
|
}
|
||||||
pcre2_set_character_tables(compile_context, pcre2_maketables(NULL));
|
pcre2_set_character_tables(compile_context, pcre2_maketables(NULL));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Sort out colouring */
|
/* Sort out colouring */
|
||||||
|
@ -3007,27 +3007,27 @@ if (colour_option != NULL && strcmp(colour_option, "never") != 0)
|
||||||
|
|
||||||
if (newline_arg != NULL)
|
if (newline_arg != NULL)
|
||||||
{
|
{
|
||||||
for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *));
|
for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *));
|
||||||
endlinetype++)
|
endlinetype++)
|
||||||
{
|
{
|
||||||
if (strcmpic(newline_arg, newlines[endlinetype]) == 0) break;
|
if (strcmpic(newline_arg, newlines[endlinetype]) == 0) break;
|
||||||
}
|
}
|
||||||
if (endlinetype < (int)(sizeof(newlines)/sizeof(char *)))
|
if (endlinetype < (int)(sizeof(newlines)/sizeof(char *)))
|
||||||
pcre2_set_newline(compile_context, endlinetype);
|
pcre2_set_newline(compile_context, endlinetype);
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n",
|
fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n",
|
||||||
newline_arg);
|
newline_arg);
|
||||||
goto EXIT2;
|
goto EXIT2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Find default newline convention */
|
/* Find default newline convention */
|
||||||
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
(void)pcre2_config(PCRE2_CONFIG_NEWLINE, &endlinetype);
|
(void)pcre2_config(PCRE2_CONFIG_NEWLINE, &endlinetype);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Interpret the text values for -d and -D */
|
/* Interpret the text values for -d and -D */
|
||||||
|
|
||||||
|
|
|
@ -68,7 +68,7 @@ already set. */
|
||||||
#include "pcre2_internal.h"
|
#include "pcre2_internal.h"
|
||||||
#include "pcre2posix.h"
|
#include "pcre2posix.h"
|
||||||
|
|
||||||
/* Table to translate PCRE2 compile time error codes into POSIX error codes.
|
/* Table to translate PCRE2 compile time error codes into POSIX error codes.
|
||||||
Only a few PCRE2 errors with a value greater than 23 turn into special POSIX
|
Only a few PCRE2 errors with a value greater than 23 turn into special POSIX
|
||||||
codes: most go to REG_BADPAT. The second table lists, in pairs, those that
|
codes: most go to REG_BADPAT. The second table lists, in pairs, those that
|
||||||
don't. */
|
don't. */
|
||||||
|
@ -89,7 +89,7 @@ static const int eint1[] = {
|
||||||
REG_ASSERT, /* internal error: unexpected repeat */
|
REG_ASSERT, /* internal error: unexpected repeat */
|
||||||
REG_BADPAT, /* unrecognized character after (? or (?- */
|
REG_BADPAT, /* unrecognized character after (? or (?- */
|
||||||
REG_BADPAT, /* POSIX named classes are supported only within a class */
|
REG_BADPAT, /* POSIX named classes are supported only within a class */
|
||||||
REG_BADPAT, /* POSIX collating elements are not supported */
|
REG_BADPAT, /* POSIX collating elements are not supported */
|
||||||
REG_EPAREN, /* missing ) */
|
REG_EPAREN, /* missing ) */
|
||||||
/* 15 */
|
/* 15 */
|
||||||
REG_ESUBREG, /* reference to non-existent subpattern */
|
REG_ESUBREG, /* reference to non-existent subpattern */
|
||||||
|
@ -103,7 +103,7 @@ static const int eint1[] = {
|
||||||
REG_EPAREN, /* unmatched closing parenthesis */
|
REG_EPAREN, /* unmatched closing parenthesis */
|
||||||
REG_ASSERT /* internal error: code overflow */
|
REG_ASSERT /* internal error: code overflow */
|
||||||
};
|
};
|
||||||
|
|
||||||
static const int eint2[] = {
|
static const int eint2[] = {
|
||||||
30, REG_ECTYPE, /* unknown POSIX class name */
|
30, REG_ECTYPE, /* unknown POSIX class name */
|
||||||
32, REG_INVARG, /* this version of PCRE does not have UTF or UCP support */
|
32, REG_INVARG, /* this version of PCRE does not have UTF or UCP support */
|
||||||
|
@ -216,14 +216,14 @@ if ((cflags & REG_UTF) != 0) options |= PCRE2_UTF;
|
||||||
if ((cflags & REG_UCP) != 0) options |= PCRE2_UCP;
|
if ((cflags & REG_UCP) != 0) options |= PCRE2_UCP;
|
||||||
if ((cflags & REG_UNGREEDY) != 0) options |= PCRE2_UNGREEDY;
|
if ((cflags & REG_UNGREEDY) != 0) options |= PCRE2_UNGREEDY;
|
||||||
|
|
||||||
preg->re_pcre2_code = pcre2_compile((PCRE2_SPTR)pattern, -1, options,
|
preg->re_pcre2_code = pcre2_compile((PCRE2_SPTR)pattern, -1, options,
|
||||||
&errorcode, &erroffset, NULL);
|
&errorcode, &erroffset, NULL);
|
||||||
preg->re_erroffset = erroffset;
|
preg->re_erroffset = erroffset;
|
||||||
|
|
||||||
if (preg->re_pcre2_code == NULL)
|
if (preg->re_pcre2_code == NULL)
|
||||||
{
|
{
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
if (errorcode < 0) return REG_BADPAT; /* UTF error */
|
if (errorcode < 0) return REG_BADPAT; /* UTF error */
|
||||||
errorcode -= COMPILE_ERROR_BASE;
|
errorcode -= COMPILE_ERROR_BASE;
|
||||||
if (errorcode < (int)(sizeof(eint1)/sizeof(const int)))
|
if (errorcode < (int)(sizeof(eint1)/sizeof(const int)))
|
||||||
return eint1[errorcode];
|
return eint1[errorcode];
|
||||||
|
@ -232,7 +232,7 @@ if (preg->re_pcre2_code == NULL)
|
||||||
return REG_BADPAT;
|
return REG_BADPAT;
|
||||||
}
|
}
|
||||||
|
|
||||||
(void)pcre2_pattern_info((const pcre2_code *)preg->re_pcre2_code,
|
(void)pcre2_pattern_info((const pcre2_code *)preg->re_pcre2_code,
|
||||||
PCRE2_INFO_CAPTURECOUNT, &re_nsub);
|
PCRE2_INFO_CAPTURECOUNT, &re_nsub);
|
||||||
preg->re_nsub = (size_t)re_nsub;
|
preg->re_nsub = (size_t)re_nsub;
|
||||||
if ((options & PCRE2_NO_AUTO_CAPTURE) != 0) re_nsub = -1;
|
if ((options & PCRE2_NO_AUTO_CAPTURE) != 0) re_nsub = -1;
|
||||||
|
@ -288,7 +288,7 @@ else
|
||||||
eo = (int)strlen(string);
|
eo = (int)strlen(string);
|
||||||
}
|
}
|
||||||
|
|
||||||
rc = pcre2_match((const pcre2_code *)preg->re_pcre2_code,
|
rc = pcre2_match((const pcre2_code *)preg->re_pcre2_code,
|
||||||
(PCRE2_SPTR)string + so, (eo - so), 0, options, md, NULL);
|
(PCRE2_SPTR)string + so, (eo - so), 0, options, md, NULL);
|
||||||
|
|
||||||
/* Successful match */
|
/* Successful match */
|
||||||
|
|
|
@ -95,7 +95,7 @@ enum {
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
void *re_pcre2_code;
|
void *re_pcre2_code;
|
||||||
void *re_match_data;
|
void *re_match_data;
|
||||||
size_t re_nsub;
|
size_t re_nsub;
|
||||||
size_t re_erroffset;
|
size_t re_erroffset;
|
||||||
} regex_t;
|
} regex_t;
|
||||||
|
|
|
@ -4797,9 +4797,9 @@ for (gmatched = 0;; gmatched++)
|
||||||
PCRE2_SIZE length;
|
PCRE2_SIZE length;
|
||||||
uint32_t copybuffer[256];
|
uint32_t copybuffer[256];
|
||||||
int namelen = strlen((const char *)nptr);
|
int namelen = strlen((const char *)nptr);
|
||||||
#if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
|
#if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
|
||||||
PCRE2_SIZE cnl = namelen;
|
PCRE2_SIZE cnl = namelen;
|
||||||
#endif
|
#endif
|
||||||
if (namelen == 0) break;
|
if (namelen == 0) break;
|
||||||
|
|
||||||
#ifdef SUPPORT_PCRE2_8
|
#ifdef SUPPORT_PCRE2_8
|
||||||
|
@ -4864,9 +4864,9 @@ for (gmatched = 0;; gmatched++)
|
||||||
void *gotbuffer;
|
void *gotbuffer;
|
||||||
int rc;
|
int rc;
|
||||||
int namelen = strlen((const char *)nptr);
|
int namelen = strlen((const char *)nptr);
|
||||||
#if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
|
#if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
|
||||||
PCRE2_SIZE cnl = namelen;
|
PCRE2_SIZE cnl = namelen;
|
||||||
#endif
|
#endif
|
||||||
if (namelen == 0) break;
|
if (namelen == 0) break;
|
||||||
|
|
||||||
#ifdef SUPPORT_PCRE2_8
|
#ifdef SUPPORT_PCRE2_8
|
||||||
|
@ -5389,25 +5389,25 @@ if (PO(options) != DO(options) || PO(control) != DO(control))
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Get the PCRE2 and Unicode version number and JIT target information, at the
|
/* Get the PCRE2 and Unicode version number and JIT target information, at the
|
||||||
same time checking that a request for the length gives the same answer. Also
|
same time checking that a request for the length gives the same answer. Also
|
||||||
check lengths for non-string items. */
|
check lengths for non-string items. */
|
||||||
|
|
||||||
if (PCRE2_CONFIG(PCRE2_CONFIG_VERSION, NULL) !=
|
if (PCRE2_CONFIG(PCRE2_CONFIG_VERSION, NULL) !=
|
||||||
PCRE2_CONFIG(PCRE2_CONFIG_VERSION, version) ||
|
PCRE2_CONFIG(PCRE2_CONFIG_VERSION, version) ||
|
||||||
|
|
||||||
PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, NULL) !=
|
PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, NULL) !=
|
||||||
PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, uversion) ||
|
PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, uversion) ||
|
||||||
|
|
||||||
PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, NULL) !=
|
PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, NULL) !=
|
||||||
PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, jittarget) ||
|
PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, jittarget) ||
|
||||||
|
|
||||||
PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, NULL) != sizeof(int) ||
|
PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, NULL) != sizeof(int) ||
|
||||||
PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, NULL) != sizeof(long int))
|
PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, NULL) != sizeof(long int))
|
||||||
{
|
{
|
||||||
fprintf(stderr, "** Error in pcre2_config(): bad length\n");
|
fprintf(stderr, "** Error in pcre2_config(): bad length\n");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Get buffers from malloc() so that valgrind will check their misuse when
|
/* Get buffers from malloc() so that valgrind will check their misuse when
|
||||||
debugging. They grow automatically when very long lines are read. The 16-
|
debugging. They grow automatically when very long lines are read. The 16-
|
||||||
|
|
Loading…
Reference in New Issue