Tidy a lot of files (remove trailing spaces)

This commit is contained in:
Philip.Hazel 2014-10-20 17:28:49 +00:00
parent 4352f00bb9
commit c3799e750f
64 changed files with 1100 additions and 1113 deletions

View File

@ -382,21 +382,21 @@ SET(PCRE2_SOURCES
${PROJECT_BINARY_DIR}/pcre2_chartables.c ${PROJECT_BINARY_DIR}/pcre2_chartables.c
src/pcre2_compile.c src/pcre2_compile.c
src/pcre2_config.c src/pcre2_config.c
src/pcre2_context.c src/pcre2_context.c
src/pcre2_dfa_match.c src/pcre2_dfa_match.c
src/pcre2_error.c src/pcre2_error.c
src/pcre2_jit_compile.c src/pcre2_jit_compile.c
src/pcre2_jit_match.c src/pcre2_jit_match.c
src/pcre2_jit_misc.c src/pcre2_jit_misc.c
src/pcre2_maketables.c src/pcre2_maketables.c
src/pcre2_match.c src/pcre2_match.c
src/pcre2_match_data.c src/pcre2_match_data.c
src/pcre2_newline.c src/pcre2_newline.c
src/pcre2_ord2utf.c src/pcre2_ord2utf.c
src/pcre2_pattern_info.c src/pcre2_pattern_info.c
src/pcre2_string_utils.c src/pcre2_string_utils.c
src/pcre2_study.c src/pcre2_study.c
src/pcre2_substring.c src/pcre2_substring.c
src/pcre2_tables.c src/pcre2_tables.c
src/pcre2_ucd.c src/pcre2_ucd.c
src/pcre2_valid_utf.c src/pcre2_valid_utf.c
@ -462,11 +462,11 @@ SET(targets)
IF(PCRE2_BUILD_PCRE2_8) IF(PCRE2_BUILD_PCRE2_8)
ADD_LIBRARY(pcre2-8 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) ADD_LIBRARY(pcre2-8 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
SET_PROPERTY(TARGET pcre2-8 SET_PROPERTY(TARGET pcre2-8
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8) PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
SET(targets ${targets} pcre2-8) SET(targets ${targets} pcre2-8)
ADD_LIBRARY(pcre2posix ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES}) ADD_LIBRARY(pcre2posix ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES})
SET_PROPERTY(TARGET pcre2posix SET_PROPERTY(TARGET pcre2posix
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8) PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
SET(targets ${targets} pcre2posix) SET(targets ${targets} pcre2posix)
TARGET_LINK_LIBRARIES(pcre2posix pcre2-8) TARGET_LINK_LIBRARIES(pcre2posix pcre2-8)
@ -503,7 +503,7 @@ ENDIF(PCRE2_BUILD_PCRE2_16)
IF(PCRE2_BUILD_PCRE2_32) IF(PCRE2_BUILD_PCRE2_32)
ADD_LIBRARY(pcre2-32 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h) ADD_LIBRARY(pcre2-32 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
SET_PROPERTY(TARGET pcre2-32 SET_PROPERTY(TARGET pcre2-32
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32) PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32)
SET(targets ${targets} pcre2-32) SET(targets ${targets} pcre2-32)
IF(MINGW AND NOT PCRE2_STATIC) IF(MINGW AND NOT PCRE2_STATIC)
@ -521,7 +521,7 @@ ENDIF(PCRE2_BUILD_PCRE2_32)
IF(PCRE2_BUILD_PCRE2GREP) IF(PCRE2_BUILD_PCRE2GREP)
ADD_EXECUTABLE(pcre2grep src/pcre2grep.c) ADD_EXECUTABLE(pcre2grep src/pcre2grep.c)
SET_PROPERTY(TARGET pcre2grep SET_PROPERTY(TARGET pcre2grep
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8) PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
SET(targets ${targets} pcre2grep) SET(targets ${targets} pcre2grep)
TARGET_LINK_LIBRARIES(pcre2grep pcre2posix ${PCRE2GREP_LIBS}) TARGET_LINK_LIBRARIES(pcre2grep pcre2posix ${PCRE2GREP_LIBS})
ENDIF(PCRE2_BUILD_PCRE2GREP) ENDIF(PCRE2_BUILD_PCRE2GREP)

View File

@ -5,41 +5,41 @@ Version 10.0 xx-xxxx-2014
------------------------- -------------------------
Version 10.0 is the first release of PCRE2, a revised API for the PCRE library. Version 10.0 is the first release of PCRE2, a revised API for the PCRE library.
Changes prior to 10.0 are logged in the ChangeLog file for the old API, up to Changes prior to 10.0 are logged in the ChangeLog file for the old API, up to
item 20 for release 8.36. item 20 for release 8.36.
The code of the library was heavily revised as part of the new API The code of the library was heavily revised as part of the new API
implementation. Details of each and every modification were not individually implementation. Details of each and every modification were not individually
logged. In addition to the API changes, the following changes were made. They logged. In addition to the API changes, the following changes were made. They
are either new functionality, or bug fixes and other noticeable changes of are either new functionality, or bug fixes and other noticeable changes of
behaviour that were implemented after the code had been forked. behaviour that were implemented after the code had been forked.
1. The test program, now called pcre2test, was re-specified and almost 1. The test program, now called pcre2test, was re-specified and almost
completely re-written. Its input is not compatible with input for pcretest. completely re-written. Its input is not compatible with input for pcretest.
2. Patterns may start with (*NOTEMPTY) or (*NOTEMPTY_ATSTART) to set the 2. Patterns may start with (*NOTEMPTY) or (*NOTEMPTY_ATSTART) to set the
PCRE2_NOTEMPTY or PCRE2_NOTEMPTY_ATSTART options for every subject line that is PCRE2_NOTEMPTY or PCRE2_NOTEMPTY_ATSTART options for every subject line that is
matched by that pattern. matched by that pattern.
3. For the benefit of those who use PCRE2 via some other application, that is, 3. For the benefit of those who use PCRE2 via some other application, that is,
not writing the function calls themselves, it is possible to check the PCRE2 not writing the function calls themselves, it is possible to check the PCRE2
version by matching a pattern such as /(?(VERSION>=10.0)yes|no)/ against a version by matching a pattern such as /(?(VERSION>=10.0)yes|no)/ against a
string such as "yesno". string such as "yesno".
4. There are case-equivalent Unicode characters whose encodings use different 4. There are case-equivalent Unicode characters whose encodings use different
numbers of code units in UTF-8. U+023A and U+2C65 are one example. (It is numbers of code units in UTF-8. U+023A and U+2C65 are one example. (It is
theoretically possible for this to happen in UTF-16 too.) If a backreference to theoretically possible for this to happen in UTF-16 too.) If a backreference to
a group containing one of these characters was greedily repeated, and during a group containing one of these characters was greedily repeated, and during
the match a backtrack occurred, the subject might be backtracked by the wrong the match a backtrack occurred, the subject might be backtracked by the wrong
number of code units. For example, if /^(\x{23a})\1*(.)/ is matched caselessly number of code units. For example, if /^(\x{23a})\1*(.)/ is matched caselessly
(and in UTF-8 mode) against "\x{23a}\x{2c65}\x{2c65}\x{2c65}", group 2 should (and in UTF-8 mode) against "\x{23a}\x{2c65}\x{2c65}\x{2c65}", group 2 should
capture the final character, which is the three bytes E2, B1, and A5 in UTF-8. capture the final character, which is the three bytes E2, B1, and A5 in UTF-8.
Incorrect backtracking meant that group 2 captured only the last two bytes. Incorrect backtracking meant that group 2 captured only the last two bytes.
This bug has been fixed; the new code is slower, but it is used only when the This bug has been fixed; the new code is slower, but it is used only when the
strings matched by the repetition are not all the same length. strings matched by the repetition are not all the same length.
5. A pattern such as /()a/ was not setting the "first character must be 'a'" 5. A pattern such as /()a/ was not setting the "first character must be 'a'"
information. This applied to any pattern with a group that matched no information. This applied to any pattern with a group that matched no
characters, for example: /(?:(?=.)|(?<!x))a/. characters, for example: /(?:(?=.)|(?<!x))a/.
**** ****

2
NEWS
View File

@ -5,7 +5,7 @@ Version 10.0 xx-xxxx-2014
------------------------- -------------------------
Version 10.0 is the first release of PCRE2, a revised API for the PCRE library. Version 10.0 is the first release of PCRE2, a revised API for the PCRE library.
Changes prior to 10.0 are logged in the ChangeLog file for the old API, up to Changes prior to 10.0 are logged in the ChangeLog file for the old API, up to
item 20 for release 8.36. item 20 for release 8.36.
**** ****

View File

@ -1,9 +1,9 @@
Building PCRE2 without using autotools Building PCRE2 without using autotools
-------------------------------------- --------------------------------------
This document has been converted from the PCRE1 document, but is not yet This document has been converted from the PCRE1 document, but is not yet
complete. I have removed a number of quite old sections about building in complete. I have removed a number of quite old sections about building in
various environments, as they applied only to PCRE1 and are probably out of various environments, as they applied only to PCRE1 and are probably out of
date. date.
@ -57,7 +57,7 @@ can skip ahead to the CMake section.
environment. In particular, you can alter the definition of the NEWLINE environment. In particular, you can alter the definition of the NEWLINE
macro to specify what character(s) you want to be interpreted as line macro to specify what character(s) you want to be interpreted as line
terminators. terminators.
When you compile any of the PCRE2 modules, you must specify When you compile any of the PCRE2 modules, you must specify
-DHAVE_CONFIG_H to your compiler so that src/config.h is included in the -DHAVE_CONFIG_H to your compiler so that src/config.h is included in the
sources. sources.
@ -100,7 +100,7 @@ can skip ahead to the CMake section.
pcre2_chartables.c pcre2_chartables.c
pcre2_compile.c pcre2_compile.c
pcre2_config.c pcre2_config.c
pcre2_context.c pcre2_context.c
pcre2_dfa_match.c pcre2_dfa_match.c
pcre2_error.c pcre2_error.c
pcre2_jit_compile.c pcre2_jit_compile.c
@ -114,7 +114,7 @@ can skip ahead to the CMake section.
pcre2_pattern_info.c pcre2_pattern_info.c
pcre2_string_utils.c pcre2_string_utils.c
pcre2_study.c pcre2_study.c
pcre2_substring.c pcre2_substring.c
pcre2_tables.c pcre2_tables.c
pcre2_ucd.c pcre2_ucd.c
pcre2_valid_utf.c pcre2_valid_utf.c
@ -138,8 +138,8 @@ can skip ahead to the CMake section.
(6) If you want to build a 16-bit library or 32-bit library (as well as, or (6) If you want to build a 16-bit library or 32-bit library (as well as, or
instead of the 8-bit library) just supply 16 or 32 as the value of instead of the 8-bit library) just supply 16 or 32 as the value of
-DPCRE2_CODE_UNIT_WIDTH when you are compiling. -DPCRE2_CODE_UNIT_WIDTH when you are compiling.
(7) If you want to build the POSIX wrapper functions (which apply only to the (7) If you want to build the POSIX wrapper functions (which apply only to the
8-bit library), ensure that you have the pcre2posix.h file and then 8-bit library), ensure that you have the pcre2posix.h file and then
compile pcre2posix.c. Link the result (on its own) as the pcre2posix compile pcre2posix.c. Link the result (on its own) as the pcre2posix
@ -295,7 +295,7 @@ Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no
spaces in the names for your CMake installation and your PCRE2 source and build spaces in the names for your CMake installation and your PCRE2 source and build
directories. directories.
The following instructions were contributed by a PCRE1 user, but they should The following instructions were contributed by a PCRE1 user, but they should
also work for PCRE2. If they are not followed exactly, errors may occur. In the also work for PCRE2. If they are not followed exactly, errors may occur. In the
event that errors do occur, it is recommended that you delete the CMake cache event that errors do occur, it is recommended that you delete the CMake cache
before attempting to repeat the CMake build process. In the CMake GUI, the before attempting to repeat the CMake build process. In the CMake GUI, the
@ -394,9 +394,9 @@ required. For details, please see this web site:
There is also a mirror here: There is also a mirror here:
http://www.vsoft-software.com/downloads.html http://www.vsoft-software.com/downloads.html
The site currently has ports for PCRE1 releases, but PCRE2 should follow in due The site currently has ports for PCRE1 releases, but PCRE2 should follow in due
course. course.
========================== ==========================
Last Updated: 28 September 2014 Last Updated: 28 September 2014

View File

@ -27,7 +27,7 @@
# README & NON-AUTOTOOLS-BUILD # README & NON-AUTOTOOLS-BUILD
# These files are copied into the doc/html directory, with .txt # These files are copied into the doc/html directory, with .txt
# extensions so that they can by hyperlinked from the HTML # extensions so that they can by hyperlinked from the HTML
# documentation, because some people just go to the HTML without # documentation, because some people just go to the HTML without
# looking for text files. # looking for text files.
@ -71,7 +71,7 @@ for file in pcre2 pcre2api pcre2build pcre2callout pcre2compat pcre2jit \
# pcre2syntax \ # pcre2syntax \
# pcre2precompile pcre2perform pcre2posix pcre2sample \ # pcre2precompile pcre2perform pcre2posix pcre2sample \
# pcre2stack ; do # pcre2stack ; do
echo " Processing $file.3" echo " Processing $file.3"
nroff -c -man $file.3 >$file.rawtxt nroff -c -man $file.3 >$file.rawtxt
perl ../CleanTxt <$file.rawtxt >>pcre2.txt perl ../CleanTxt <$file.rawtxt >>pcre2.txt
@ -168,17 +168,13 @@ cd ..
echo Documentation done echo Documentation done
if [ "$1" = "doc" ] ; then exit; fi if [ "$1" = "doc" ] ; then exit; fi
# FIXME pro tem only do docs
exit
# These files are detrailed; do not detrail the test data because there may be # These files are detrailed; do not detrail the test data because there may be
# significant trailing spaces. Do not detrail RunTest.bat, because it has CRLF # significant trailing spaces. Do not detrail RunTest.bat, because it has CRLF
# line endings and the detrail script removes all trailing white space. The # line endings and the detrail script removes all trailing white space. The
# configure files are also omitted from the detrailing. # configure files are also omitted from the detrailing.
files="\ files="\
Makefile.am \ Makefile.am \
Makefile.in \
configure.ac \ configure.ac \
README \ README \
LICENCE \ LICENCE \
@ -195,54 +191,45 @@ files="\
RunGrepTest \ RunGrepTest \
RunTest \ RunTest \
pcre2-config.in \ pcre2-config.in \
libpcre.pc.in \
libpcre16.pc.in \
libpcre32.pc.in \
libpcreposix.pc.in \
libpcrecpp.pc.in \
config.h.in \
pcre2_chartables.c.dist \
pcre2demo.c \
pcre2grep.c \
pcre2test.c \
dftables.c \
pcre2posix.c \
pcre2posix.h \
pcre2.h.in \
pcre2_internal.h \
pcre2_byte_order.c \
pcre2_compile.c \
pcre2_config.c \
pcre2_dfa_exec.c \
pcre2_exec.c \
pcre2_fullinfo.c \
pcre2_get.c \
pcre2_globals.c \
pcre2_jit_compile.c \
pcre2_jit_test.c \
pcre2_maketables.c \
pcre2_newline.c \
pcre2_ord2utf8.c \
pcre16_ord2utf16.c \
pcre32_ord2utf32.c \
pcre2_printint.c \
pcre2_refcount.c \
pcre2_string_utils.c \
pcre2_study.c \
pcre2_tables.c \
pcre2_valid_utf8.c \
pcre2_version.c \
pcre2_xclass.c \
pcre16_utf16_utils.c \
pcre32_utf32_utils.c \
pcre16_valid_utf16.c \
pcre32_valid_utf32.c \
perltest.pl \ perltest.pl \
ucp.h \ libpcre2-8.pc.in \
makevp.bat \ libpcre2-16.pc.in \
pcre.def \ libpcre2-32.pc.in \
libpcre.def \ libpcre2-posix.pc.in \
libpcreposix.def" src/dftables.c \
src/pcre2.h.in \
src/pcre2_auto_possess.c \
src/pcre2_compile.c \
src/pcre2_config.c \
src/pcre2_context.c \
src/pcre2_dfa_match.c \
src/pcre2_error.c \
src/pcre2_internal.h \
src/pcre2_intmodedep.h \
src/pcre2_jit_compile.c \
src/pcre2_jit_match.c \
src/pcre2_jit_misc.c \
src/pcre2_jit_test.c \
src/pcre2_maketables.c \
src/pcre2_match.c \
src/pcre2_match_data.c \
src/pcre2_newline.c \
src/pcre2_ord2utf.c \
src/pcre2_pattern_info.c \
src/pcre2_printint.c \
src/pcre2_string_utils.c \
src/pcre2_study.c \
src/pcre2_substring.c \
src/pcre2_tables.c \
src/pcre2_ucd.c \
src/pcre2_ucp.h \
src/pcre2_valid_utf.c \
src/pcre2_xclass.c \
src/pcre2demo.c \
src/pcre2grep.c \
src/pcre2posix.c \
src/pcre2posix.h \
src/pcre2test.c"
echo Detrailing echo Detrailing
perl ./Detrail $files doc/p* doc/html/* perl ./Detrail $files doc/p* doc/html/*

46
README
View File

@ -1,7 +1,7 @@
README file for PCRE2 (Perl-compatible regular expression library) README file for PCRE2 (Perl-compatible regular expression library)
------------------------------------------------------------------ ------------------------------------------------------------------
PCRE2 is a re-implementation of the original PCRE library with an entirely new PCRE2 is a re-implementation of the original PCRE library with an entirely new
API. The latest release of PCRE2 is always available in three alternative API. The latest release of PCRE2 is always available in three alternative
formats from: formats from:
@ -11,7 +11,7 @@ FIXME: THIS WILL NOT BE THE CASE UNTIL THERE IS A FORMAL RELEASE.
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.tar.bz2 ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.tar.bz2
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.zip ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.zip
There is a mailing list for discussion about the development of PCRE (both the There is a mailing list for discussion about the development of PCRE (both the
original and new APIs) at pcre-dev@exim.org. You can access the archives and original and new APIs) at pcre-dev@exim.org. You can access the archives and
subscribe or manage your subscription here: subscribe or manage your subscription here:
@ -41,7 +41,7 @@ The PCRE2 APIs
PCRE2 is written in C, and it has its own API. There are three sets of PCRE2 is written in C, and it has its own API. There are three sets of
functions, one for the 8-bit library, which processes strings of bytes, one for functions, one for the 8-bit library, which processes strings of bytes, one for
the 16-bit library, which processes strings of 16-bit values, and one for the the 16-bit library, which processes strings of 16-bit values, and one for the
32-bit library, which processes strings of 32-bit values. As this is a new API, 32-bit library, which processes strings of 32-bit values. As this is a new API,
there as yet no C++ wrappers. there as yet no C++ wrappers.
The distribution does contain a set of C wrapper functions for the 8-bit The distribution does contain a set of C wrapper functions for the 8-bit
@ -102,7 +102,7 @@ NON-AUTOTOOLS-BUILD, though if your system supports the use of "configure" and
"make" you may be able to build PCRE2 using autotools in the same way as for "make" you may be able to build PCRE2 using autotools in the same way as for
many Unix-like systems. many Unix-like systems.
PCRE2 can also be configured using CMake, which can be run in various ways PCRE2 can also be configured using CMake, which can be run in various ways
(command line, GUI, etc). This creates Makefiles, solution files, etc. The file (command line, GUI, etc). This creates Makefiles, solution files, etc. The file
NON-AUTOTOOLS-BUILD has information about CMake. NON-AUTOTOOLS-BUILD has information about CMake.
@ -186,13 +186,13 @@ library. They are also documented in the pcre2build man page.
handling UTF-8, UTF-16 and UTF-8 is not included. It is not possible to handling UTF-8, UTF-16 and UTF-8 is not included. It is not possible to
configure one library with UTF support and the other without in the same configure one library with UTF support and the other without in the same
configuration. configuration.
Even when --enable-unicode is included, the use of a UTF encoding still has Even when --enable-unicode is included, the use of a UTF encoding still has
to be enabled by an option at run time. When PCRE2 is compiled with this to be enabled by an option at run time. When PCRE2 is compiled with this
option, its input can only either be ASCII or UTF-8/16/32, even when running option, its input can only either be ASCII or UTF-8/16/32, even when running
on EBCDIC platforms. It is not possible to use both --enable-unicode and on EBCDIC platforms. It is not possible to use both --enable-unicode and
--enable-ebcdic at the same time. --enable-ebcdic at the same time.
When --enable-unicode is specified, as well as supporting UTF strings, PCRE2 When --enable-unicode is specified, as well as supporting UTF strings, PCRE2
includes support for the \P, \p, and \X sequences that recognize Unicode includes support for the \P, \p, and \X sequences that recognize Unicode
character properties. However, only the basic two-letter properties such as character properties. However, only the basic two-letter properties such as
@ -248,7 +248,7 @@ library. They are also documented in the pcre2build man page.
cause programs to crash in strange ways. There is a discussion about stack cause programs to crash in strange ways. There is a discussion about stack
sizes in the pcre2stack man page. sizes in the pcre2stack man page.
. In the 8-bit library, the default maximum compiled pattern size is around . In the 8-bit library, the default maximum compiled pattern size is around
64K. You can increase this by adding --with-link-size=3 to the "configure" 64K. You can increase this by adding --with-link-size=3 to the "configure"
command. PCRE2 then uses three bytes instead of two for offsets to different command. PCRE2 then uses three bytes instead of two for offsets to different
parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is
@ -360,7 +360,7 @@ The "configure" script builds the following files for the basic C library:
. src/pcre2.h the public PCRE2 header file . src/pcre2.h the public PCRE2 header file
. pcre2-config script that shows the building settings such as CFLAGS . pcre2-config script that shows the building settings such as CFLAGS
that were set for "configure" that were set for "configure"
. libpcre2-8.pc ) . libpcre2-8.pc )
. libpcre2-16.pc ) data for the pkg-config command . libpcre2-16.pc ) data for the pkg-config command
. libpcre2-32.pc ) . libpcre2-32.pc )
. libpcre2-posix.pc ) . libpcre2-posix.pc )
@ -452,7 +452,7 @@ prints the version number, and
outputs information about where the 8-bit library is installed. This command outputs information about where the 8-bit library is installed. This command
can be included in makefiles for programs that use PCRE2, saving the programmer can be included in makefiles for programs that use PCRE2, saving the programmer
from having to remember too many details. Run pcre2-config with no arguments to from having to remember too many details. Run pcre2-config with no arguments to
obtain a list of possible arguments. obtain a list of possible arguments.
The pkg-config command is another system for saving and retrieving information The pkg-config command is another system for saving and retrieving information
@ -593,7 +593,7 @@ bug in PCRE2.
The third set of tests checks pcre2_maketables(), the facility for building a The third set of tests checks pcre2_maketables(), the facility for building a
set of character tables for a specific locale and using them instead of the set of character tables for a specific locale and using them instead of the
default tables. The script uses the "locale" command to check for the default tables. The script uses the "locale" command to check for the
availability of the "fr_FR", "french", or "fr" locale, and uses the first one availability of the "fr_FR", "french", or "fr" locale, and uses the first one
that it finds. If the "locale" command fails, or if its output doesn't include that it finds. If the "locale" command fails, or if its output doesn't include
"fr_FR", "french", or "fr" in the list of available locales, the third test "fr_FR", "french", or "fr" in the list of available locales, the third test
@ -609,7 +609,7 @@ of the French locale have been encountered. The test passes if its output
matches any one of them. matches any one of them.
The fourth and fifth tests check UTF and Unicode property support, the fourth The fourth and fifth tests check UTF and Unicode property support, the fourth
being compatible with the perltest.pl script, and the fifth checking being compatible with the perltest.pl script, and the fifth checking
PCRE2-specific things. PCRE2-specific things.
The sixth and seventh tests check the pcre2_dfa_match() alternative matching The sixth and seventh tests check the pcre2_dfa_match() alternative matching
@ -623,8 +623,8 @@ change) and when Unicode support is enabled.
The ninth and tenth tests are run only in 8-bit mode, and the eleventh and The ninth and tenth tests are run only in 8-bit mode, and the eleventh and
twelfth tests are run only in 16-bit and 32-bit modes. These are tests that twelfth tests are run only in 16-bit and 32-bit modes. These are tests that
generate different output in 8-bit mode. Each pair are for general cases and generate different output in 8-bit mode. Each pair are for general cases and
Unicode support, respectively. The thirteenth test checks the handling of Unicode support, respectively. The thirteenth test checks the handling of
non-UTF characters greater than 255 by pcre2_dfa_match() in 16-bit and 32-bit non-UTF characters greater than 255 by pcre2_dfa_match() in 16-bit and 32-bit
modes. modes.
The fourteenth test is run only when JIT support is not available, and the The fourteenth test is run only when JIT support is not available, and the
@ -632,8 +632,8 @@ fifteenth test is run only when JIT support is available. They test some
JIT-specific features such as information output from pcre2test about JIT JIT-specific features such as information output from pcre2test about JIT
compilation. compilation.
The sixteenth and seventeenth tests are run only in 8-bit mode. They check the The sixteenth and seventeenth tests are run only in 8-bit mode. They check the
POSIX interface to the 8-bit library, withouth and with Unicode support, POSIX interface to the 8-bit library, withouth and with Unicode support,
respectively. respectively.
@ -692,9 +692,9 @@ will cause PCRE2 to malfunction.
File manifest File manifest
------------- -------------
The distribution should contain the files listed below. The distribution should contain the files listed below.
(A) Source files for the PCRE2 library functions and their headers are found in (A) Source files for the PCRE2 library functions and their headers are found in
the src directory: the src directory:
src/dftables.c auxiliary program for building pcre2_chartables.c src/dftables.c auxiliary program for building pcre2_chartables.c
@ -705,25 +705,25 @@ The distribution should contain the files listed below.
specified, used by copying to pcre2_chartables.c specified, used by copying to pcre2_chartables.c
src/pcre2posix.c ) src/pcre2posix.c )
src/pcre2_auto_possess.c ) src/pcre2_auto_possess.c )
src/pcre2_compile.c ) src/pcre2_compile.c )
src/pcre2_config.c ) src/pcre2_config.c )
src/pcre2_context.c ) src/pcre2_context.c )
src/pcre2_dfa_match.c ) src/pcre2_dfa_match.c )
src/pcre2_error.c ) src/pcre2_error.c )
src/pcre2_exec.c ) src/pcre2_exec.c )
src/pcre2_jit_compile.c ) src/pcre2_jit_compile.c )
src/pcre2_jit_match.c ) sources for the functions in the library, src/pcre2_jit_match.c ) sources for the functions in the library,
src/pcre2_jit_misc.c ) and some internal functions that they use src/pcre2_jit_misc.c ) and some internal functions that they use
src/pcre2_maketables.c ) src/pcre2_maketables.c )
src/pcre2_match.c ) src/pcre2_match.c )
src/pcre2_match_data.c ) src/pcre2_match_data.c )
src/pcre2_newline.c ) src/pcre2_newline.c )
src/pcre2_ord2utf.c ) src/pcre2_ord2utf.c )
src/pcre2_pattern_info.c ) src/pcre2_pattern_info.c )
src/pcre2_string_utils.c ) src/pcre2_string_utils.c )
src/pcre2_study.c ) src/pcre2_study.c )
src/pcre2_substring.c ) src/pcre2_substring.c )
src/pcre2_tables.c ) src/pcre2_tables.c )
src/pcre2_ucd.c ) src/pcre2_ucd.c )
src/pcre2_valid_utf.c ) src/pcre2_valid_utf.c )

View File

@ -23,7 +23,7 @@ pcre2grep=$builddir/pcre2grep
if [ ! -x $pcre2grep ] ; then if [ ! -x $pcre2grep ] ; then
echo "** $pcre2grep does not exist or is not execuatble." echo "** $pcre2grep does not exist or is not execuatble."
exit 1 exit 1
fi fi
valgrind= valgrind=
while [ $# -gt 0 ] ; do while [ $# -gt 0 ] ; do

View File

@ -126,7 +126,7 @@ fi
checkresult() checkresult()
{ {
if [ $1 -ne 0 ] ; then if [ $1 -ne 0 ] ; then
echo "** pcre2test failed - check testtry" echo "** pcre2test failed - check testtry"
exit 1 exit 1
fi fi

View File

@ -106,7 +106,7 @@ AC_ARG_ENABLE(pcre32,,,enable_pcre32=no)
if test "$enable_pcre8$enable_pcre16$enable_pcre32" != "nonono" if test "$enable_pcre8$enable_pcre16$enable_pcre32" != "nonono"
then then
echo "** ERROR: Use --[[en|dis]]able-pcre2-[[8|16|32]], not --[[en|dis]]able-pcre[[8|16|32]]" echo "** ERROR: Use --[[en|dis]]able-pcre2-[[8|16|32]], not --[[en|dis]]able-pcre[[8|16|32]]"
exit 1 exit 1
fi fi
# Handle --disable-pcre2-8 (enabled by default) # Handle --disable-pcre2-8 (enabled by default)
@ -512,7 +512,7 @@ if test "$enable_jit" = "yes"; then
CC="$PTHREAD_CC" CC="$PTHREAD_CC"
CFLAGS="$PTHREAD_CFLAGS $CFLAGS" CFLAGS="$PTHREAD_CFLAGS $CFLAGS"
LIBS="$PTHREAD_LIBS $LIBS" LIBS="$PTHREAD_LIBS $LIBS"
fi fi
AC_DEFINE([SUPPORT_JIT], [], [ AC_DEFINE([SUPPORT_JIT], [], [
Define to any value to enable support for Just-In-Time compiling.]) Define to any value to enable support for Just-In-Time compiling.])
else else
@ -538,7 +538,7 @@ if test "$enable_stack_for_recursion" = "no"; then
matching. This can sometimes be a problem on systems that have matching. This can sometimes be a problem on systems that have
stacks of limited size. Define HEAP_MATCH_RECURSE to any value to get a stacks of limited size. Define HEAP_MATCH_RECURSE to any value to get a
version that doesn't use recursion in the match() function; instead version that doesn't use recursion in the match() function; instead
it creates its own stack by steam using memory from the heap. For more it creates its own stack by steam using memory from the heap. For more
detail, see the comments and other stuff just above the match() function.]) detail, see the comments and other stuff just above the match() function.])
fi fi
@ -559,8 +559,8 @@ if test $with_pcre2grep_bufsize -lt 8192 ; then
with_pcre2grep_bufsize="8192" with_pcre2grep_bufsize="8192"
else else
if test $? -gt 1 ; then if test $? -gt 1 ; then
AC_MSG_ERROR([Bad value for --with-pcre2grep-bufsize]) AC_MSG_ERROR([Bad value for --with-pcre2grep-bufsize])
fi fi
fi fi
AC_DEFINE_UNQUOTED([PCRE2GREP_BUFSIZE], [$with_pcre2grep_bufsize], [ AC_DEFINE_UNQUOTED([PCRE2GREP_BUFSIZE], [$with_pcre2grep_bufsize], [
@ -579,9 +579,9 @@ elif test "$enable_pcre2test_libreadline" = "yes"; then
fi fi
AC_DEFINE_UNQUOTED([NEWLINE_DEFAULT], [$ac_pcre2_newline_value], [ AC_DEFINE_UNQUOTED([NEWLINE_DEFAULT], [$ac_pcre2_newline_value], [
The value of NEWLINE_DEFAULT determines the default newline character The value of NEWLINE_DEFAULT determines the default newline character
sequence. PCRE2 client programs can override this by selecting other values sequence. PCRE2 client programs can override this by selecting other values
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY), at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY),
and 5 (ANYCRLF).]) and 5 (ANYCRLF).])
if test "$enable_bsr_anycrlf" = "yes"; then if test "$enable_bsr_anycrlf" = "yes"; then

View File

@ -1,9 +1,9 @@
Building PCRE2 without using autotools Building PCRE2 without using autotools
-------------------------------------- --------------------------------------
This document has been converted from the PCRE1 document, but is not yet This document has been converted from the PCRE1 document, but is not yet
complete. I have removed a number of quite old sections about building in complete. I have removed a number of quite old sections about building in
various environments, as they applied only to PCRE1 and are probably out of various environments, as they applied only to PCRE1 and are probably out of
date. date.
@ -57,7 +57,7 @@ can skip ahead to the CMake section.
environment. In particular, you can alter the definition of the NEWLINE environment. In particular, you can alter the definition of the NEWLINE
macro to specify what character(s) you want to be interpreted as line macro to specify what character(s) you want to be interpreted as line
terminators. terminators.
When you compile any of the PCRE2 modules, you must specify When you compile any of the PCRE2 modules, you must specify
-DHAVE_CONFIG_H to your compiler so that src/config.h is included in the -DHAVE_CONFIG_H to your compiler so that src/config.h is included in the
sources. sources.
@ -100,7 +100,7 @@ can skip ahead to the CMake section.
pcre2_chartables.c pcre2_chartables.c
pcre2_compile.c pcre2_compile.c
pcre2_config.c pcre2_config.c
pcre2_context.c pcre2_context.c
pcre2_dfa_match.c pcre2_dfa_match.c
pcre2_error.c pcre2_error.c
pcre2_jit_compile.c pcre2_jit_compile.c
@ -114,7 +114,7 @@ can skip ahead to the CMake section.
pcre2_pattern_info.c pcre2_pattern_info.c
pcre2_string_utils.c pcre2_string_utils.c
pcre2_study.c pcre2_study.c
pcre2_substring.c pcre2_substring.c
pcre2_tables.c pcre2_tables.c
pcre2_ucd.c pcre2_ucd.c
pcre2_valid_utf.c pcre2_valid_utf.c
@ -138,8 +138,8 @@ can skip ahead to the CMake section.
(6) If you want to build a 16-bit library or 32-bit library (as well as, or (6) If you want to build a 16-bit library or 32-bit library (as well as, or
instead of the 8-bit library) just supply 16 or 32 as the value of instead of the 8-bit library) just supply 16 or 32 as the value of
-DPCRE2_CODE_UNIT_WIDTH when you are compiling. -DPCRE2_CODE_UNIT_WIDTH when you are compiling.
(7) If you want to build the POSIX wrapper functions (which apply only to the (7) If you want to build the POSIX wrapper functions (which apply only to the
8-bit library), ensure that you have the pcre2posix.h file and then 8-bit library), ensure that you have the pcre2posix.h file and then
compile pcre2posix.c. Link the result (on its own) as the pcre2posix compile pcre2posix.c. Link the result (on its own) as the pcre2posix
@ -295,7 +295,7 @@ Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no
spaces in the names for your CMake installation and your PCRE2 source and build spaces in the names for your CMake installation and your PCRE2 source and build
directories. directories.
The following instructions were contributed by a PCRE1 user, but they should The following instructions were contributed by a PCRE1 user, but they should
also work for PCRE2. If they are not followed exactly, errors may occur. In the also work for PCRE2. If they are not followed exactly, errors may occur. In the
event that errors do occur, it is recommended that you delete the CMake cache event that errors do occur, it is recommended that you delete the CMake cache
before attempting to repeat the CMake build process. In the CMake GUI, the before attempting to repeat the CMake build process. In the CMake GUI, the
@ -394,9 +394,9 @@ required. For details, please see this web site:
There is also a mirror here: There is also a mirror here:
http://www.vsoft-software.com/downloads.html http://www.vsoft-software.com/downloads.html
The site currently has ports for PCRE1 releases, but PCRE2 should follow in due The site currently has ports for PCRE1 releases, but PCRE2 should follow in due
course. course.
========================== ==========================
Last Updated: 28 September 2014 Last Updated: 28 September 2014

View File

@ -1,7 +1,7 @@
README file for PCRE2 (Perl-compatible regular expression library) README file for PCRE2 (Perl-compatible regular expression library)
------------------------------------------------------------------ ------------------------------------------------------------------
PCRE2 is a re-implementation of the original PCRE library with an entirely new PCRE2 is a re-implementation of the original PCRE library with an entirely new
API. The latest release of PCRE2 is always available in three alternative API. The latest release of PCRE2 is always available in three alternative
formats from: formats from:
@ -11,7 +11,7 @@ FIXME: THIS WILL NOT BE THE CASE UNTIL THERE IS A FORMAL RELEASE.
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.tar.bz2 ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.tar.bz2
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.zip ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.zip
There is a mailing list for discussion about the development of PCRE (both the There is a mailing list for discussion about the development of PCRE (both the
original and new APIs) at pcre-dev@exim.org. You can access the archives and original and new APIs) at pcre-dev@exim.org. You can access the archives and
subscribe or manage your subscription here: subscribe or manage your subscription here:
@ -41,7 +41,7 @@ The PCRE2 APIs
PCRE2 is written in C, and it has its own API. There are three sets of PCRE2 is written in C, and it has its own API. There are three sets of
functions, one for the 8-bit library, which processes strings of bytes, one for functions, one for the 8-bit library, which processes strings of bytes, one for
the 16-bit library, which processes strings of 16-bit values, and one for the the 16-bit library, which processes strings of 16-bit values, and one for the
32-bit library, which processes strings of 32-bit values. As this is a new API, 32-bit library, which processes strings of 32-bit values. As this is a new API,
there as yet no C++ wrappers. there as yet no C++ wrappers.
The distribution does contain a set of C wrapper functions for the 8-bit The distribution does contain a set of C wrapper functions for the 8-bit
@ -102,7 +102,7 @@ NON-AUTOTOOLS-BUILD, though if your system supports the use of "configure" and
"make" you may be able to build PCRE2 using autotools in the same way as for "make" you may be able to build PCRE2 using autotools in the same way as for
many Unix-like systems. many Unix-like systems.
PCRE2 can also be configured using CMake, which can be run in various ways PCRE2 can also be configured using CMake, which can be run in various ways
(command line, GUI, etc). This creates Makefiles, solution files, etc. The file (command line, GUI, etc). This creates Makefiles, solution files, etc. The file
NON-AUTOTOOLS-BUILD has information about CMake. NON-AUTOTOOLS-BUILD has information about CMake.
@ -186,13 +186,13 @@ library. They are also documented in the pcre2build man page.
handling UTF-8, UTF-16 and UTF-8 is not included. It is not possible to handling UTF-8, UTF-16 and UTF-8 is not included. It is not possible to
configure one library with UTF support and the other without in the same configure one library with UTF support and the other without in the same
configuration. configuration.
Even when --enable-unicode is included, the use of a UTF encoding still has Even when --enable-unicode is included, the use of a UTF encoding still has
to be enabled by an option at run time. When PCRE2 is compiled with this to be enabled by an option at run time. When PCRE2 is compiled with this
option, its input can only either be ASCII or UTF-8/16/32, even when running option, its input can only either be ASCII or UTF-8/16/32, even when running
on EBCDIC platforms. It is not possible to use both --enable-unicode and on EBCDIC platforms. It is not possible to use both --enable-unicode and
--enable-ebcdic at the same time. --enable-ebcdic at the same time.
When --enable-unicode is specified, as well as supporting UTF strings, PCRE2 When --enable-unicode is specified, as well as supporting UTF strings, PCRE2
includes support for the \P, \p, and \X sequences that recognize Unicode includes support for the \P, \p, and \X sequences that recognize Unicode
character properties. However, only the basic two-letter properties such as character properties. However, only the basic two-letter properties such as
@ -248,7 +248,7 @@ library. They are also documented in the pcre2build man page.
cause programs to crash in strange ways. There is a discussion about stack cause programs to crash in strange ways. There is a discussion about stack
sizes in the pcre2stack man page. sizes in the pcre2stack man page.
. In the 8-bit library, the default maximum compiled pattern size is around . In the 8-bit library, the default maximum compiled pattern size is around
64K. You can increase this by adding --with-link-size=3 to the "configure" 64K. You can increase this by adding --with-link-size=3 to the "configure"
command. PCRE2 then uses three bytes instead of two for offsets to different command. PCRE2 then uses three bytes instead of two for offsets to different
parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is
@ -360,7 +360,7 @@ The "configure" script builds the following files for the basic C library:
. src/pcre2.h the public PCRE2 header file . src/pcre2.h the public PCRE2 header file
. pcre2-config script that shows the building settings such as CFLAGS . pcre2-config script that shows the building settings such as CFLAGS
that were set for "configure" that were set for "configure"
. libpcre2-8.pc ) . libpcre2-8.pc )
. libpcre2-16.pc ) data for the pkg-config command . libpcre2-16.pc ) data for the pkg-config command
. libpcre2-32.pc ) . libpcre2-32.pc )
. libpcre2-posix.pc ) . libpcre2-posix.pc )
@ -452,7 +452,7 @@ prints the version number, and
outputs information about where the 8-bit library is installed. This command outputs information about where the 8-bit library is installed. This command
can be included in makefiles for programs that use PCRE2, saving the programmer can be included in makefiles for programs that use PCRE2, saving the programmer
from having to remember too many details. Run pcre2-config with no arguments to from having to remember too many details. Run pcre2-config with no arguments to
obtain a list of possible arguments. obtain a list of possible arguments.
The pkg-config command is another system for saving and retrieving information The pkg-config command is another system for saving and retrieving information
@ -593,7 +593,7 @@ bug in PCRE2.
The third set of tests checks pcre2_maketables(), the facility for building a The third set of tests checks pcre2_maketables(), the facility for building a
set of character tables for a specific locale and using them instead of the set of character tables for a specific locale and using them instead of the
default tables. The script uses the "locale" command to check for the default tables. The script uses the "locale" command to check for the
availability of the "fr_FR", "french", or "fr" locale, and uses the first one availability of the "fr_FR", "french", or "fr" locale, and uses the first one
that it finds. If the "locale" command fails, or if its output doesn't include that it finds. If the "locale" command fails, or if its output doesn't include
"fr_FR", "french", or "fr" in the list of available locales, the third test "fr_FR", "french", or "fr" in the list of available locales, the third test
@ -609,7 +609,7 @@ of the French locale have been encountered. The test passes if its output
matches any one of them. matches any one of them.
The fourth and fifth tests check UTF and Unicode property support, the fourth The fourth and fifth tests check UTF and Unicode property support, the fourth
being compatible with the perltest.pl script, and the fifth checking being compatible with the perltest.pl script, and the fifth checking
PCRE2-specific things. PCRE2-specific things.
The sixth and seventh tests check the pcre2_dfa_match() alternative matching The sixth and seventh tests check the pcre2_dfa_match() alternative matching
@ -623,8 +623,8 @@ change) and when Unicode support is enabled.
The ninth and tenth tests are run only in 8-bit mode, and the eleventh and The ninth and tenth tests are run only in 8-bit mode, and the eleventh and
twelfth tests are run only in 16-bit and 32-bit modes. These are tests that twelfth tests are run only in 16-bit and 32-bit modes. These are tests that
generate different output in 8-bit mode. Each pair are for general cases and generate different output in 8-bit mode. Each pair are for general cases and
Unicode support, respectively. The thirteenth test checks the handling of Unicode support, respectively. The thirteenth test checks the handling of
non-UTF characters greater than 255 by pcre2_dfa_match() in 16-bit and 32-bit non-UTF characters greater than 255 by pcre2_dfa_match() in 16-bit and 32-bit
modes. modes.
The fourteenth test is run only when JIT support is not available, and the The fourteenth test is run only when JIT support is not available, and the
@ -632,8 +632,8 @@ fifteenth test is run only when JIT support is available. They test some
JIT-specific features such as information output from pcre2test about JIT JIT-specific features such as information output from pcre2test about JIT
compilation. compilation.
The sixteenth and seventeenth tests are run only in 8-bit mode. They check the The sixteenth and seventeenth tests are run only in 8-bit mode. They check the
POSIX interface to the 8-bit library, withouth and with Unicode support, POSIX interface to the 8-bit library, withouth and with Unicode support,
respectively. respectively.
@ -692,9 +692,9 @@ will cause PCRE2 to malfunction.
File manifest File manifest
------------- -------------
The distribution should contain the files listed below. The distribution should contain the files listed below.
(A) Source files for the PCRE2 library functions and their headers are found in (A) Source files for the PCRE2 library functions and their headers are found in
the src directory: the src directory:
src/dftables.c auxiliary program for building pcre2_chartables.c src/dftables.c auxiliary program for building pcre2_chartables.c
@ -705,25 +705,25 @@ The distribution should contain the files listed below.
specified, used by copying to pcre2_chartables.c specified, used by copying to pcre2_chartables.c
src/pcre2posix.c ) src/pcre2posix.c )
src/pcre2_auto_possess.c ) src/pcre2_auto_possess.c )
src/pcre2_compile.c ) src/pcre2_compile.c )
src/pcre2_config.c ) src/pcre2_config.c )
src/pcre2_context.c ) src/pcre2_context.c )
src/pcre2_dfa_match.c ) src/pcre2_dfa_match.c )
src/pcre2_error.c ) src/pcre2_error.c )
src/pcre2_exec.c ) src/pcre2_exec.c )
src/pcre2_jit_compile.c ) src/pcre2_jit_compile.c )
src/pcre2_jit_match.c ) sources for the functions in the library, src/pcre2_jit_match.c ) sources for the functions in the library,
src/pcre2_jit_misc.c ) and some internal functions that they use src/pcre2_jit_misc.c ) and some internal functions that they use
src/pcre2_maketables.c ) src/pcre2_maketables.c )
src/pcre2_match.c ) src/pcre2_match.c )
src/pcre2_match_data.c ) src/pcre2_match_data.c )
src/pcre2_newline.c ) src/pcre2_newline.c )
src/pcre2_ord2utf.c ) src/pcre2_ord2utf.c )
src/pcre2_pattern_info.c ) src/pcre2_pattern_info.c )
src/pcre2_string_utils.c ) src/pcre2_string_utils.c )
src/pcre2_study.c ) src/pcre2_study.c )
src/pcre2_substring.c ) src/pcre2_substring.c )
src/pcre2_tables.c ) src/pcre2_tables.c )
src/pcre2_ucd.c ) src/pcre2_ucd.c )
src/pcre2_valid_utf.c ) src/pcre2_valid_utf.c )

View File

@ -1,10 +1,10 @@
<html> <html>
<!-- This is a manually maintained file that is the root of the HTML version of <!-- This is a manually maintained file that is the root of the HTML version of
the PCRE2 documentation. When the HTML documents are built from the man the PCRE2 documentation. When the HTML documents are built from the man
page versions, the entire doc/html directory is emptied, this file is then page versions, the entire doc/html directory is emptied, this file is then
copied into doc/html/index.html, and the remaining files therein are copied into doc/html/index.html, and the remaining files therein are
created by the 132html script. created by the 132html script.
--> -->
<head> <head>
<title>PCRE2 specification</title> <title>PCRE2 specification</title>
</head> </head>
@ -87,7 +87,7 @@ in the library. There is a single page for each triple of 8-bit/16-bit/32-bit
functions. functions.
</p> </p>
<table> <table>
<tr><td><a href="pcre2_assign_jit_stack.html">pcre2_assign_jit_stack</a></td> <tr><td><a href="pcre2_assign_jit_stack.html">pcre2_assign_jit_stack</a></td>
<td>&nbsp;&nbsp;Assign stack for JIT matching</td></tr> <td>&nbsp;&nbsp;Assign stack for JIT matching</td></tr>
@ -153,7 +153,7 @@ functions.
<tr><td><a href="pcre2_maketables.html">pcre2_maketables</a></td> <tr><td><a href="pcre2_maketables.html">pcre2_maketables</a></td>
<td>&nbsp;&nbsp;Build character tables in current locale</td></tr> <td>&nbsp;&nbsp;Build character tables in current locale</td></tr>
<tr><td><a href="pcre2_pattern_to_host_byte_order.html">pcre2_pattern_to_host_byte_order</a></td> <tr><td><a href="pcre2_pattern_to_host_byte_order.html">pcre2_pattern_to_host_byte_order</a></td>
<td>&nbsp;&nbsp;Convert compiled pattern to host byte order if necessary</td></tr> <td>&nbsp;&nbsp;Convert compiled pattern to host byte order if necessary</td></tr>

View File

@ -43,11 +43,11 @@ of Unicode in use can be discovered by running
</PRE> </PRE>
</P> </P>
<P> <P>
The three libraries contain identical sets of functions, with names ending in The three libraries contain identical sets of functions, with names ending in
_8, _16, or _32, respectively (for example, <b>pcre2_compile_8()</b>). However, _8, _16, or _32, respectively (for example, <b>pcre2_compile_8()</b>). However,
by defining PCRE2_CODE_UNIT_WIDTH to be 8, 16, or 32, a program that uses just by defining PCRE2_CODE_UNIT_WIDTH to be 8, 16, or 32, a program that uses just
one code unit width can be written using generic names such as one code unit width can be written using generic names such as
<b>pcre2_compile()</b>, and the documentation is written assuming that this is <b>pcre2_compile()</b>, and the documentation is written assuming that this is
the case. the case.
</P> </P>
<P> <P>

View File

@ -306,7 +306,7 @@ unknown should also use the real function names. (Unfortunately, it is not
possible in C code to save and restore the value of a macro.) possible in C code to save and restore the value of a macro.)
</P> </P>
<P> <P>
If PCRE2_CODE_UNIT_WIDTH is not defined before including <b>pcre2.h</b>, a If PCRE2_CODE_UNIT_WIDTH is not defined before including <b>pcre2.h</b>, a
compiler error occurs. compiler error occurs.
</P> </P>
<P> <P>
@ -443,7 +443,7 @@ below.
</P> </P>
<P> <P>
The choice of newline convention does not affect the interpretation of The choice of newline convention does not affect the interpretation of
the \n or \r escape sequences, nor does it affect what \R matches, which has the \n or \r escape sequences, nor does it affect what \R matches, which has
its own separate control. its own separate control.
</P> </P>
<br><a name="SEC12" href="#TOC1">MULTITHREADING</a><br> <br><a name="SEC12" href="#TOC1">MULTITHREADING</a><br>
@ -553,7 +553,7 @@ The memory used for a general context should be freed by calling:
The compile context The compile context
</b><br> </b><br>
<P> <P>
A compile context is required if you want to change the default values of any A compile context is required if you want to change the default values of any
of the following compile-time parameters: of the following compile-time parameters:
<pre> <pre>
What \R matches (Unicode newlines or CR, LF, CRLF only); What \R matches (Unicode newlines or CR, LF, CRLF only);
@ -562,7 +562,7 @@ of the following compile-time parameters:
The compile time nested parentheses limit; The compile time nested parentheses limit;
An external function for stack checking. An external function for stack checking.
</pre> </pre>
A compile context is also required if you are using custom memory management. A compile context is also required if you are using custom memory management.
If none of these apply, just pass NULL as the context argument of If none of these apply, just pass NULL as the context argument of
<i>pcre2_compile()</i>. <i>pcre2_compile()</i>.
</P> </P>
@ -579,33 +579,33 @@ A compile context is created, copied, and freed by the following functions:
<b>void pcre2_compile_context_free(pcre2_compile_context *<i>ccontext</i>);</b> <b>void pcre2_compile_context_free(pcre2_compile_context *<i>ccontext</i>);</b>
<br> <br>
<br> <br>
A compile context is created with default values for its parameters. These can A compile context is created with default values for its parameters. These can
be changed by calling the following functions, which return 0 on success, or be changed by calling the following functions, which return 0 on success, or
PCRE2_ERROR_BADDATA if invalid data is detected. PCRE2_ERROR_BADDATA if invalid data is detected.
<b>int pcre2_set_bsr(pcre2_compile_context *<i>ccontext</i>,</b> <b>int pcre2_set_bsr(pcre2_compile_context *<i>ccontext</i>,</b>
<b> uint32_t <i>value</i>);</b> <b> uint32_t <i>value</i>);</b>
<br> <br>
<br> <br>
The value must be PCRE2_BSR_ANYCRLF, to specify that \R matches only CR, LF, The value must be PCRE2_BSR_ANYCRLF, to specify that \R matches only CR, LF,
or CRLF, or PCRE2_BSR_UNICODE, to specify that \R matches any Unicode line or CRLF, or PCRE2_BSR_UNICODE, to specify that \R matches any Unicode line
ending sequence. The value of this parameter does not affect what is compiled; ending sequence. The value of this parameter does not affect what is compiled;
it is just saved with the compiled pattern. The value is used by the JIT it is just saved with the compiled pattern. The value is used by the JIT
compiler and by the two interpreted matching functions, <i>pcre2_match()</i> and compiler and by the two interpreted matching functions, <i>pcre2_match()</i> and
<i>pcre2_dfa_match()</i>. <i>pcre2_dfa_match()</i>.
<b>int pcre2_set_character_tables(pcre2_compile_context *<i>ccontext</i>,</b> <b>int pcre2_set_character_tables(pcre2_compile_context *<i>ccontext</i>,</b>
<b> const unsigned char *<i>tables</i>);</b> <b> const unsigned char *<i>tables</i>);</b>
<br> <br>
<br> <br>
The value must be the result of a call to <i>pcre2_maketables()</i>, whose only The value must be the result of a call to <i>pcre2_maketables()</i>, whose only
argument is a general context. This function builds a set of character tables argument is a general context. This function builds a set of character tables
in the current locale. in the current locale.
<b>int pcre2_set_newline(pcre2_compile_context *<i>ccontext</i>,</b> <b>int pcre2_set_newline(pcre2_compile_context *<i>ccontext</i>,</b>
<b> uint32_t <i>value</i>);</b> <b> uint32_t <i>value</i>);</b>
<br> <br>
<br> <br>
This specifies which characters or character sequences are to be recognized as This specifies which characters or character sequences are to be recognized as
newlines. The value must be one of PCRE2_NEWLINE_CR (carriage return only), newlines. The value must be one of PCRE2_NEWLINE_CR (carriage return only),
PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the two-character PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the two-character
sequence CR followed by LF), PCRE2_NEWLINE_ANYCRLF (any of the above), or sequence CR followed by LF), PCRE2_NEWLINE_ANYCRLF (any of the above), or
PCRE2_NEWLINE_ANY (any Unicode newline sequence). PCRE2_NEWLINE_ANY (any Unicode newline sequence).
</P> </P>
@ -627,7 +627,7 @@ using up too much system stack when being compiled.
<br> <br>
<br> <br>
There is at least one application that runs PCRE2 in threads with very limited There is at least one application that runs PCRE2 in threads with very limited
system stack, where running out of stack is to be avoided at all costs. The system stack, where running out of stack is to be avoided at all costs. The
parenthesis limit above cannot take account of how much stack is actually parenthesis limit above cannot take account of how much stack is actually
available. For a finer control, you can supply a function that is called available. For a finer control, you can supply a function that is called
whenever <b>pcre2_compile()</b> starts to compile a parenthesized part of a whenever <b>pcre2_compile()</b> starts to compile a parenthesized part of a
@ -638,20 +638,20 @@ function should return zero if all is well, or non-zero to force an error.
The match context The match context
</b><br> </b><br>
<P> <P>
A match context is required if you want to change the default values of any A match context is required if you want to change the default values of any
of the following match-time parameters: of the following match-time parameters:
<pre> <pre>
What \R matches (Unicode newlines or CR, LF, CRLF only); What \R matches (Unicode newlines or CR, LF, CRLF only);
A callout function; A callout function;
The limit for calling <i>match()</i>; The limit for calling <i>match()</i>;
The limit for calling <i>match()</i> recursively; The limit for calling <i>match()</i> recursively;
The newline character sequence; The newline character sequence;
</pre> </pre>
A match context is also required if you are using custom memory management. A match context is also required if you are using custom memory management.
If none of these apply, just pass NULL as the context argument of If none of these apply, just pass NULL as the context argument of
<b>pcre2_match()</b>, <b>pcre2_dfa_match()</b>, or <b>pcre2_jit_match()</b>. <b>pcre2_match()</b>, <b>pcre2_dfa_match()</b>, or <b>pcre2_jit_match()</b>.
Changing the newline value or what \R matches at match time disables the use Changing the newline value or what \R matches at match time disables the use
of JIT via <b>pcre2_match()</b>. of JIT via <b>pcre2_match()</b>.
</P> </P>
<P> <P>
A match context is created, copied, and freed by the following functions: A match context is created, copied, and freed by the following functions:
@ -666,8 +666,8 @@ A match context is created, copied, and freed by the following functions:
<b>void pcre2_match_context_free(pcre2_match_context *<i>mcontext</i>);</b> <b>void pcre2_match_context_free(pcre2_match_context *<i>mcontext</i>);</b>
<br> <br>
<br> <br>
A match context is created with default values for its parameters. These can A match context is created with default values for its parameters. These can
be changed by calling the following functions, which return 0 on success, or be changed by calling the following functions, which return 0 on success, or
PCRE2_ERROR_BADDATA if invalid data is detected. PCRE2_ERROR_BADDATA if invalid data is detected.
<b>int pcre2_set_callout(pcre2_match_context *<i>mcontext</i>,</b> <b>int pcre2_set_callout(pcre2_match_context *<i>mcontext</i>,</b>
<b> int (*<i>callout_function</i>)(pcre2_callout_block *),</b> <b> int (*<i>callout_function</i>)(pcre2_callout_block *),</b>
@ -693,7 +693,7 @@ calls repeatedly (sometimes recursively). The limit set by <i>match_limit</i> is
imposed on the number of times this function is called during a match, which imposed on the number of times this function is called during a match, which
has the effect of limiting the amount of backtracking that can take place. For has the effect of limiting the amount of backtracking that can take place. For
patterns that are not anchored, the count restarts from zero for each position patterns that are not anchored, the count restarts from zero for each position
in the subject string. This limit is not relevant to <b>pcre2_dfa_match()</b>, in the subject string. This limit is not relevant to <b>pcre2_dfa_match()</b>,
which ignores it. which ignores it.
</P> </P>
<P> <P>
@ -730,7 +730,7 @@ This limit is of use only if it is set smaller than <i>match_limit</i>.
Limiting the recursion depth limits the amount of system stack that can be Limiting the recursion depth limits the amount of system stack that can be
used, or, when PCRE2 has been compiled to use memory on the heap instead of the used, or, when PCRE2 has been compiled to use memory on the heap instead of the
stack, the amount of heap memory that can be used. This limit is not relevant, stack, the amount of heap memory that can be used. This limit is not relevant,
and is ignored, when matching is done using JIT compiled code or by the and is ignored, when matching is done using JIT compiled code or by the
<b>pcre2_dfa_match()</b> function. <b>pcre2_dfa_match()</b> function.
</P> </P>
<P> <P>
@ -751,9 +751,9 @@ limit is set, less than the default.
<b> void (*<i>private_free</i>)(void *, void *), void *<i>memory_data</i>);</b> <b> void (*<i>private_free</i>)(void *, void *), void *<i>memory_data</i>);</b>
<br> <br>
<br> <br>
This function sets up two additional custom memory management functions for use This function sets up two additional custom memory management functions for use
by <b>pcre2_match()</b> when PCRE2 is compiled to use the heap for remembering by <b>pcre2_match()</b> when PCRE2 is compiled to use the heap for remembering
backtracking data, instead of recursive function calls that use the system backtracking data, instead of recursive function calls that use the system
stack. There is a discussion about PCRE2's stack usage in the stack. There is a discussion about PCRE2's stack usage in the
<a href="pcre2stack.html"><b>pcre2stack</b></a> <a href="pcre2stack.html"><b>pcre2stack</b></a>
documentation. See the documentation. See the
@ -765,7 +765,7 @@ limited stacks. Because of the greater use of memory management,
general custom memory functions are provided so that special-purpose external general custom memory functions are provided so that special-purpose external
code can be used for this case, because the memory blocks are all the same code can be used for this case, because the memory blocks are all the same
size. The blocks are retained by <b>pcre2_match()</b> until it is about to exit size. The blocks are retained by <b>pcre2_match()</b> until it is about to exit
so that they can be re-used when possible during the match. In the absence of so that they can be re-used when possible during the match. In the absence of
these functions, the normal custom memory management functions are used, if these functions, the normal custom memory management functions are used, if
supplied, otherwise the system functions. supplied, otherwise the system functions.
</P> </P>
@ -785,7 +785,7 @@ required. The second argument is a pointer to memory into which the information
is placed. If NULL is passed, the function returns the amount of memory that is is placed. If NULL is passed, the function returns the amount of memory that is
needed for the requested information. For calls that return numerical values, needed for the requested information. For calls that return numerical values,
the value is in bytes; when requesting these values, <i>where</i> should point the value is in bytes; when requesting these values, <i>where</i> should point
to appropriately aligned memory. For calls that return strings, the required to appropriately aligned memory. For calls that return strings, the required
length is given in code units, not counting the terminating zero. length is given in code units, not counting the terminating zero.
</P> </P>
<P> <P>
@ -809,7 +809,7 @@ compiling is available; otherwise it is set to zero.
PCRE2_CONFIG_JITTARGET PCRE2_CONFIG_JITTARGET
</pre> </pre>
The <i>where</i> argument should point to a buffer that is at least 48 code The <i>where</i> argument should point to a buffer that is at least 48 code
units long. (The exact length needed can be found by calling units long. (The exact length needed can be found by calling
<b>pcre2_config()</b> with <b>where</b> set to NULL.) The buffer is filled with a <b>pcre2_config()</b> with <b>where</b> set to NULL.) The buffer is filled with a
string that contains the name of the architecture for which the JIT compiler is string that contains the name of the architecture for which the JIT compiler is
configured, for example "x86 32bit (little endian + unaligned)". If JIT support configured, for example "x86 32bit (little endian + unaligned)". If JIT support
@ -820,9 +820,9 @@ the string, in code units, is returned.
</pre> </pre>
The output is an integer that contains the number of bytes used for internal The output is an integer that contains the number of bytes used for internal
linkage in compiled regular expressions. When PCRE2 is configured, the value linkage in compiled regular expressions. When PCRE2 is configured, the value
can be set to 2, 3, or 4, with the default being 2. This is the value that is can be set to 2, 3, or 4, with the default being 2. This is the value that is
returned by <b>pcre2_config()</b>. However, when the 16-bit library is compiled, returned by <b>pcre2_config()</b>. However, when the 16-bit library is compiled,
a value of 3 is rounded up to 4, and when the 32-bit library is compiled, a value of 3 is rounded up to 4, and when the 32-bit library is compiled,
internal linkages always use 4 bytes, so the configured value is not relevant. internal linkages always use 4 bytes, so the configured value is not relevant.
</P> </P>
<P> <P>
@ -908,16 +908,16 @@ units) is returned.
<b>pcre2_code_free(pcre2_code *<i>code</i>);</b> <b>pcre2_code_free(pcre2_code *<i>code</i>);</b>
</P> </P>
<P> <P>
This function compiles a pattern, defined by a pointer to a string of code This function compiles a pattern, defined by a pointer to a string of code
units and a length, into an internal form. If the pattern is zero-terminated, units and a length, into an internal form. If the pattern is zero-terminated,
the length should be specified as PCRE2_ZERO_TERMINATED. The function returns a the length should be specified as PCRE2_ZERO_TERMINATED. The function returns a
pointer to a block of memory that contains the compiled pattern and related pointer to a block of memory that contains the compiled pattern and related
data. The caller must free the memory by calling <b>pcre2_code_free()</b> when data. The caller must free the memory by calling <b>pcre2_code_free()</b> when
it is no longer needed. it is no longer needed.
</P> </P>
<P> <P>
If the compile context argument <i>ccontext</i> is NULL, the memory is obtained If the compile context argument <i>ccontext</i> is NULL, the memory is obtained
by calling <b>malloc()</b>. Otherwise, it is obtained from the same memory by calling <b>malloc()</b>. Otherwise, it is obtained from the same memory
function that was used for the compile context. function that was used for the compile context.
</P> </P>
<P> <P>
@ -927,7 +927,7 @@ options are described below. Some of them (in particular, those that are
compatible with Perl, but some others as well) can also be set and unset from compatible with Perl, but some others as well) can also be set and unset from
within the pattern (see the detailed description in the within the pattern (see the detailed description in the
<a href="pcre2pattern.html"><b>pcre2pattern</b></a> <a href="pcre2pattern.html"><b>pcre2pattern</b></a>
documentation). documentation).
</P> </P>
<P> <P>
For those options that can be different in different parts of the pattern, the For those options that can be different in different parts of the pattern, the
@ -936,7 +936,7 @@ compilation. The PCRE2_ANCHORED and PCRE2_NO_UTF_CHECK options can be set at
the time of matching as well as at compile time. the time of matching as well as at compile time.
</P> </P>
<P> <P>
Other, less frequently required compile-time parameters (for example, the Other, less frequently required compile-time parameters (for example, the
newline setting) can be provided in a compile context (as described newline setting) can be provided in a compile context (as described
<a href="#compilecontext">above).</a> <a href="#compilecontext">above).</a>
</P> </P>
@ -962,10 +962,10 @@ This code fragment shows a typical straightforward call to
<pre> <pre>
pcre2_code *re; pcre2_code *re;
PCRE2_SIZE erroffset; PCRE2_SIZE erroffset;
int errorcode; int errorcode;
re = pcre2_compile( re = pcre2_compile(
"^A.*Z", /* the pattern */ "^A.*Z", /* the pattern */
PCRE2_ZERO_TERMINATED, /* the pattern is zero-terminated */ PCRE2_ZERO_TERMINATED, /* the pattern is zero-terminated */
0, /* default options */ 0, /* default options */
&errorcode, /* for error code */ &errorcode, /* for error code */
&erroffset, /* for error offset */ &erroffset, /* for error offset */
@ -984,14 +984,14 @@ Perl.
<pre> <pre>
PCRE2_ALLOW_EMPTY_CLASS PCRE2_ALLOW_EMPTY_CLASS
</pre> </pre>
By default, for compatibility with Perl, a closing square bracket that By default, for compatibility with Perl, a closing square bracket that
immediately follows an opening one is treated as a data character for the immediately follows an opening one is treated as a data character for the
class. When PCRE2_ALLOW_EMPTY_CLASS is set, it terminates the class, which class. When PCRE2_ALLOW_EMPTY_CLASS is set, it terminates the class, which
therefore contains no characters and so can never match. therefore contains no characters and so can never match.
<pre> <pre>
PCRE2_ALT_BSUX PCRE2_ALT_BSUX
</pre> </pre>
This option request alternative handling of three escape sequences, which This option request alternative handling of three escape sequences, which
makes PCRE2's behaviour more like ECMAscript (aka JavaScript). When it is set: makes PCRE2's behaviour more like ECMAscript (aka JavaScript). When it is set:
</P> </P>
<P> <P>
@ -1023,7 +1023,7 @@ documentation.
</pre> </pre>
If this bit is set, letters in the pattern match both upper and lower case If this bit is set, letters in the pattern match both upper and lower case
letters in the subject. It is equivalent to Perl's /i option, and it can be letters in the subject. It is equivalent to Perl's /i option, and it can be
changed within a pattern by a (?i) option setting. changed within a pattern by a (?i) option setting.
<pre> <pre>
PCRE2_DOLLAR_ENDONLY PCRE2_DOLLAR_ENDONLY
</pre> </pre>
@ -1076,7 +1076,7 @@ Which characters are interpreted as newlines can be specified by a setting in
the compile context that is passed to <b>pcre2_compile()</b> or by a special the compile context that is passed to <b>pcre2_compile()</b> or by a special
sequence at the start of the pattern, as described in the section entitled sequence at the start of the pattern, as described in the section entitled
<a href="pcrepattern.html#newlines">"Newline conventions"</a> <a href="pcrepattern.html#newlines">"Newline conventions"</a>
in the <b>pcre2pattern</b> documentation. A default is defined when PCRE2 is in the <b>pcre2pattern</b> documentation. A default is defined when PCRE2 is
built. built.
<pre> <pre>
PCRE2_FIRSTLINE PCRE2_FIRSTLINE
@ -1091,7 +1091,7 @@ If this option is set, a back reference to an unset subpattern group matches an
empty string (by default this causes the current matching alternative to fail). empty string (by default this causes the current matching alternative to fail).
A pattern such as (\1)(a) succeeds when this option is set (assuming it can A pattern such as (\1)(a) succeeds when this option is set (assuming it can
find an "a" in the subject), whereas it fails by default, for Perl find an "a" in the subject), whereas it fails by default, for Perl
compatibility. Setting this option makes PCRE2 behave more like ECMAscript (aka compatibility. Setting this option makes PCRE2 behave more like ECMAscript (aka
JavaScript). JavaScript).
<pre> <pre>
PCRE2_MULTILINE PCRE2_MULTILINE
@ -1116,10 +1116,10 @@ occurrences of ^ or $ in a pattern, setting PCRE2_MULTILINE has no effect.
PCRE2_NEVER_UCP PCRE2_NEVER_UCP
</pre> </pre>
This option locks out the use of Unicode properties for handling \B, \b, \D, This option locks out the use of Unicode properties for handling \B, \b, \D,
\d, \S, \s, \W, \w, and some of the POSIX character classes, as described \d, \S, \s, \W, \w, and some of the POSIX character classes, as described
for the PCRE2_UCP option below. In particular, it prevents the creator of the for the PCRE2_UCP option below. In particular, it prevents the creator of the
pattern from enabling this facility by starting the pattern with (*UCP). This pattern from enabling this facility by starting the pattern with (*UCP). This
may be useful in applications that process patterns from external sources. The may be useful in applications that process patterns from external sources. The
option combination PCRE_UCP and PCRE_NEVER_UCP causes an error. option combination PCRE_UCP and PCRE_NEVER_UCP causes an error.
<pre> <pre>
PCRE2_NEVER_UTF PCRE2_NEVER_UTF
@ -1195,7 +1195,7 @@ pattern
(*MARK:A)(X|Y) (*MARK:A)(X|Y)
</pre> </pre>
The minimum length for a match is one character. If the subject is "ABC", there The minimum length for a match is one character. If the subject is "ABC", there
will be attempts to match "ABC", "BC", and "C". An attempt to match an empty will be attempts to match "ABC", "BC", and "C". An attempt to match an empty
string at the end of the subject does not take place, because PCRE2 knows that string at the end of the subject does not take place, because PCRE2 knows that
the subject is now too short, and so the (*MARK) is never encountered. In this the subject is now too short, and so the (*MARK) is never encountered. In this
case, the optimization does not affect the overall match result, which is still case, the optimization does not affect the overall match result, which is still
@ -1211,7 +1211,7 @@ and
<a href="pcre2unicode.html#utf32strings">UTF-32 strings</a> <a href="pcre2unicode.html#utf32strings">UTF-32 strings</a>
in the in the
<a href="pcre2unicode.html"><b>pcre2unicode</b></a> <a href="pcre2unicode.html"><b>pcre2unicode</b></a>
document. document.
If an invalid UTF sequence is found, <b>pcre2_compile()</b> returns a negative If an invalid UTF sequence is found, <b>pcre2_compile()</b> returns a negative
error code. error code.
</P> </P>
@ -1391,9 +1391,9 @@ The possible values for the second argument are defined in <b>pcre2.h</b>, and
are as follows: are as follows:
<pre> <pre>
PCRE2_INFO_ALLOPTIONS PCRE2_INFO_ALLOPTIONS
PCRE2_INFO_ARGOPTIONS PCRE2_INFO_ARGOPTIONS
</pre> </pre>
Return a copy of the pattern's options. The third argument should point to a Return a copy of the pattern's options. The third argument should point to a
<b>uint32_t</b> variable. PCRE2_INFO_ARGOPTIONS returns exactly the options that <b>uint32_t</b> variable. PCRE2_INFO_ARGOPTIONS returns exactly the options that
were passed to <b>pcre2_compile()</b>, whereas PCRE2_INFO_ALLOPTIONS returns were passed to <b>pcre2_compile()</b>, whereas PCRE2_INFO_ALLOPTIONS returns
the compile options as modified by any top-level option settings at the start the compile options as modified by any top-level option settings at the start
@ -1411,7 +1411,7 @@ alternatives begin with one of the following:
\G always \G always
.* if PCRE2_DOTALL is set and there are no back references to the subpattern in which .* appears .* if PCRE2_DOTALL is set and there are no back references to the subpattern in which .* appears
</pre> </pre>
For such patterns, the PCRE2_ANCHORED bit is set in the options returned for For such patterns, the PCRE2_ANCHORED bit is set in the options returned for
PCRE2_INFO_ALLOPTIONS. PCRE2_INFO_ALLOPTIONS.
<pre> <pre>
PCRE2_INFO_BACKREFMAX PCRE2_INFO_BACKREFMAX
@ -1499,7 +1499,7 @@ return zero. The third argument should point to a <b>size_t</b> variable.
</pre> </pre>
Returns 1 if there is a rightmost literal code unit that must exist in any Returns 1 if there is a rightmost literal code unit that must exist in any
matched string, other than at its start. The third argument should point to an matched string, other than at its start. The third argument should point to an
<b>uint32_t</b> variable. If there is no such value, 0 is returned. When 1 is <b>uint32_t</b> variable. If there is no such value, 0 is returned. When 1 is
returned, the code unit value itself can be retrieved using returned, the code unit value itself can be retrieved using
PCRE2_INFO_LASTCODEUNIT. PCRE2_INFO_LASTCODEUNIT.
</P> </P>
@ -1657,11 +1657,11 @@ pattern with the JIT compiler does not alter the value returned by this option.
<b>void pcre2_match_data_free(pcre2_match_data *<i>match_data</i>);</b> <b>void pcre2_match_data_free(pcre2_match_data *<i>match_data</i>);</b>
</P> </P>
<P> <P>
Information about successful and unsuccessful matches is placed in a match Information about successful and unsuccessful matches is placed in a match
data block, which is an opaque structure that is accessed by function calls. In data block, which is an opaque structure that is accessed by function calls. In
particular, the match data block contains a vector of offsets into the subject particular, the match data block contains a vector of offsets into the subject
string that define the matched part of the subject and any substrings that were string that define the matched part of the subject and any substrings that were
capured. This is know as the <i>ovector</i>. capured. This is know as the <i>ovector</i>.
</P> </P>
<P> <P>
Before calling <b>pcre2_match()</b> or <b>pcre2_dfa_match()</b> you must create a Before calling <b>pcre2_match()</b> or <b>pcre2_dfa_match()</b> you must create a
@ -1676,12 +1676,12 @@ return the overall matched string.
</P> </P>
<P> <P>
For <b>pcre2_match_data_create_from_pattern()</b>, the first argument is a For <b>pcre2_match_data_create_from_pattern()</b>, the first argument is a
pointer to a compiled pattern. In this case the ovector is created to be pointer to a compiled pattern. In this case the ovector is created to be
exactly the right size to hold all the substrings a pattern might capture. exactly the right size to hold all the substrings a pattern might capture.
</P> </P>
<P> <P>
The second argument of both these functions ia a pointer to a general context, The second argument of both these functions ia a pointer to a general context,
which can specify custom memory management for obtaining the memory for the which can specify custom memory management for obtaining the memory for the
match data block. If you are not using custom memory management, pass NULL. match data block. If you are not using custom memory management, pass NULL.
</P> </P>
<P> <P>
@ -1728,8 +1728,8 @@ Here is an example of a simple call to <b>pcre2_match()</b>:
match_data, /* the match data block */ match_data, /* the match data block */
NULL); /* a match context; NULL means use defaults */ NULL); /* a match context; NULL means use defaults */
</pre> </pre>
If the subject string is zero-terminated, the length can be given as If the subject string is zero-terminated, the length can be given as
PCRE2_ZERO_TERMINATED. A match context must be provided if certain less common PCRE2_ZERO_TERMINATED. A match context must be provided if certain less common
matching parameters are to be changed. For details, see the section on matching parameters are to be changed. For details, see the section on
<a href="#matchcontext">the match context</a> <a href="#matchcontext">the match context</a>
above. above.
@ -1742,7 +1742,7 @@ The subject string is passed to <b>pcre2_match()</b> as a pointer in
<i>subject</i>, a length in <i>length</i>, and a starting offset in <i>subject</i>, a length in <i>length</i>, and a starting offset in
<i>startoffset</i>. The length and offset are in code units, not characters. <i>startoffset</i>. The length and offset are in code units, not characters.
That is, they are in bytes for the 8-bit library, 16-bit code units for the That is, they are in bytes for the 8-bit library, 16-bit code units for the
16-bit library, and 32-bit code units for the 32-bit library, whether or not 16-bit library, and 32-bit code units for the 32-bit library, whether or not
UTF processing is enabled. UTF processing is enabled.
</P> </P>
<P> <P>
@ -1752,7 +1752,7 @@ zero, the search for a match starts at the beginning of the subject, and this
is by far the most common case. In UTF-8 or UTF-16 mode, the starting offset is by far the most common case. In UTF-8 or UTF-16 mode, the starting offset
must point to the start of a character, or to the end of the subject (in UTF-32 must point to the start of a character, or to the end of the subject (in UTF-32
mode, one code unit equals one character, so all offsets are valid). Like the mode, one code unit equals one character, so all offsets are valid). Like the
pattern string, the subject may contain binary zeroes. pattern string, the subject may contain binary zeroes.
</P> </P>
<P> <P>
A non-zero starting offset is useful when searching for another match in the A non-zero starting offset is useful when searching for another match in the
@ -1814,7 +1814,7 @@ JIT matching is disabled and the normal interpretive code in
The PCRE2_ANCHORED option limits <b>pcre2_match()</b> to matching at the first The PCRE2_ANCHORED option limits <b>pcre2_match()</b> to matching at the first
matching position. If a pattern was compiled with PCRE2_ANCHORED, or turned out matching position. If a pattern was compiled with PCRE2_ANCHORED, or turned out
to be anchored by virtue of its contents, it cannot be made unachored at to be anchored by virtue of its contents, it cannot be made unachored at
matching time. Note that setting the option at match time disables JIT matching time. Note that setting the option at match time disables JIT
matching. matching.
<pre> <pre>
PCRE2_NOTBOL PCRE2_NOTBOL
@ -1867,14 +1867,14 @@ and
<a href="pcre2unicode.html#utf32strings">UTF-32 strings</a> <a href="pcre2unicode.html#utf32strings">UTF-32 strings</a>
in the in the
<a href="pcre2unicode.html"><b>pcre2unicode</b></a> <a href="pcre2unicode.html"><b>pcre2unicode</b></a>
page. page.
</P> </P>
<P> <P>
If you know that your subject is valid, and you want to skip these checks for If you know that your subject is valid, and you want to skip these checks for
performance reasons, you can set the PCRE2_NO_UTF_CHECK option when calling performance reasons, you can set the PCRE2_NO_UTF_CHECK option when calling
<b>pcre2_match()</b>. You might want to do this for the second and subsequent <b>pcre2_match()</b>. You might want to do this for the second and subsequent
calls to <b>pcre2_match()</b> if you are making repeated calls to find all the calls to <b>pcre2_match()</b> if you are making repeated calls to find all the
matches in a single subject string. matches in a single subject string.
</P> </P>
<P> <P>
NOTE: When PCRE2_NO_UTF_CHECK is set, the effect of passing an invalid string NOTE: When PCRE2_NO_UTF_CHECK is set, the effect of passing an invalid string
@ -1908,9 +1908,9 @@ documentation.
</P> </P>
<br><a name="SEC22" href="#TOC1">NEWLINE HANDLING WHEN MATCHING</a><br> <br><a name="SEC22" href="#TOC1">NEWLINE HANDLING WHEN MATCHING</a><br>
<P> <P>
When PCRE2 is built, a default newline convention is set; this is usually the When PCRE2 is built, a default newline convention is set; this is usually the
standard convention for the operating system. The default can be overridden in standard convention for the operating system. The default can be overridden in
either a either a
<a href="#compilecontext">compile context</a> <a href="#compilecontext">compile context</a>
or a or a
<a href="#matchcontext">match context.</a> <a href="#matchcontext">match context.</a>
@ -1953,7 +1953,7 @@ valid newline sequence and explicit \r or \n escapes appear in the pattern.
</P> </P>
<P> <P>
In general, a pattern matches a certain portion of the subject, and in In general, a pattern matches a certain portion of the subject, and in
addition, further substrings from the subject may be picked out by addition, further substrings from the subject may be picked out by
parenthesized parts of the pattern. Following the usage in Jeffrey Friedl's parenthesized parts of the pattern. Following the usage in Jeffrey Friedl's
book, this is called "capturing" in what follows, and the phrase "capturing book, this is called "capturing" in what follows, and the phrase "capturing
subpattern" is used for a fragment of a pattern that picks out a substring. subpattern" is used for a fragment of a pattern that picks out a substring.
@ -1964,11 +1964,11 @@ pattern.
</P> </P>
<P> <P>
The overall matched string and any captured substrings are returned to the The overall matched string and any captured substrings are returned to the
caller via a vector of PCRE2_SIZE values, called the <b>ovector</b>. This is caller via a vector of PCRE2_SIZE values, called the <b>ovector</b>. This is
contained within the contained within the
<a href="#matchdatablock">match data block.</a> <a href="#matchdatablock">match data block.</a>
You can obtain direct access to the ovector by calling You can obtain direct access to the ovector by calling
<b>pcre2_get_ovector_pointer()</b> to find its address, and <b>pcre2_get_ovector_pointer()</b> to find its address, and
<b>pcre2_get_ovector_count()</b> to find the number of pairs of values it <b>pcre2_get_ovector_count()</b> to find the number of pairs of values it
contains. Alternatively, you can use the auxiliary functions for accessing contains. Alternatively, you can use the auxiliary functions for accessing
captured substrings captured substrings
@ -2044,26 +2044,26 @@ Other information about the match
<b>PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *<i>match_data</i>);</b> <b>PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *<i>match_data</i>);</b>
</P> </P>
<P> <P>
In addition to the offsets in the ovector, other information about a match is In addition to the offsets in the ovector, other information about a match is
retained in the match data block and can be retrieved by the above functions. retained in the match data block and can be retrieved by the above functions.
</P> </P>
<P> <P>
When a (*MARK) name is to be passed back, <b>pcre2_get_mark()</b> returns a When a (*MARK) name is to be passed back, <b>pcre2_get_mark()</b> returns a
pointer to the zero-terminated name, which is within the compiled pattern. pointer to the zero-terminated name, which is within the compiled pattern.
Otherwise NULL is returned. A (*MARK) name may be available after a failed Otherwise NULL is returned. A (*MARK) name may be available after a failed
match or a partial match, as well as after a successful one. match or a partial match, as well as after a successful one.
</P> </P>
<P> <P>
The offset of the character at which the successful match started is The offset of the character at which the successful match started is
returned by <b>pcre2_get_startchar()</b>. This can be different to the value of returned by <b>pcre2_get_startchar()</b>. This can be different to the value of
<i>ovector[0]</i> if the pattern contains the \K escape sequence. Note, <i>ovector[0]</i> if the pattern contains the \K escape sequence. Note,
however, the \K has no effect for a partial match. however, the \K has no effect for a partial match.
<a name="errorlist"></a></P> <a name="errorlist"></a></P>
<br><b> <br><b>
Error return values from <b>pcre2_match()</b> Error return values from <b>pcre2_match()</b>
</b><br> </b><br>
<P> <P>
If <b>pcre2_match()</b> fails, it returns a negative number. This can be If <b>pcre2_match()</b> fails, it returns a negative number. This can be
converted to a text string by calling <b>pcre2_get_error_message()</b>. Negative converted to a text string by calling <b>pcre2_get_error_message()</b>. Negative
error codes are also returned by other functions, and are documented with them. error codes are also returned by other functions, and are documented with them.
The codes are given names in the header file. If UTF checking is in force and The codes are given names in the header file. If UTF checking is in force and
@ -2205,7 +2205,7 @@ argument is a pointer to the match data block, the second is the group number,
and the third is a pointer to a variable into which the length is placed. and the third is a pointer to a variable into which the length is placed.
</P> </P>
<P> <P>
The <b>pcre2_substring_copy_bynumber()</b> function copies one string into a The <b>pcre2_substring_copy_bynumber()</b> function copies one string into a
supplied buffer, whereas <b>pcre2_substring_get_bynumber()</b> copies it into supplied buffer, whereas <b>pcre2_substring_get_bynumber()</b> copies it into
new memory, obtained using the same memory allocation function that was used new memory, obtained using the same memory allocation function that was used
for the match data block. The first two arguments of these functions are a for the match data block. The first two arguments of these functions are a
@ -2220,10 +2220,10 @@ This is updated to contain the actual number of code units used, excluding the
terminating zero. terminating zero.
</P> </P>
<P> <P>
For <b>pcre2_substring_get_bynumber()</b> the third and fourth arguments point For <b>pcre2_substring_get_bynumber()</b> the third and fourth arguments point
to variables that are updated with a pointer to the new memory and the number to variables that are updated with a pointer to the new memory and the number
of code units that comprise the substring, again excluding the terminating of code units that comprise the substring, again excluding the terminating
zero. When the substring is no longer needed, the memory should be freed by zero. When the substring is no longer needed, the memory should be freed by
calling <b>pcre2_substring_free()</b>. calling <b>pcre2_substring_free()</b>.
</P> </P>
<P> <P>
@ -2237,9 +2237,9 @@ attempt to get memory failed for <b>pcre2_substring_get_bynumber()</b>.
<pre> <pre>
PCRE2_ERROR_NOSUBSTRING PCRE2_ERROR_NOSUBSTRING
</pre> </pre>
No substring with the given number was captured. This could be because there is No substring with the given number was captured. This could be because there is
no capturing group of that number in the pattern, or because the group with no capturing group of that number in the pattern, or because the group with
that number did not participate in the match, or because the ovector was too that number did not participate in the match, or because the ovector was too
small to capture that group. small to capture that group.
</P> </P>
<br><a name="SEC25" href="#TOC1">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a><br> <br><a name="SEC25" href="#TOC1">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a><br>
@ -2253,7 +2253,7 @@ small to capture that group.
<P> <P>
The <b>pcre2_substring_list_get()</b> function extracts all available substrings The <b>pcre2_substring_list_get()</b> function extracts all available substrings
and builds a list of pointers to them, and a second list that contains their and builds a list of pointers to them, and a second list that contains their
lengths (in code units), excluding a terminating zero that is added to each of lengths (in code units), excluding a terminating zero that is added to each of
them. All this is done in a single block of memory that is obtained using the them. All this is done in a single block of memory that is obtained using the
same memory allocation function that was used to get the match data block. same memory allocation function that was used to get the match data block.
</P> </P>
@ -2265,7 +2265,7 @@ NULL pointer. The address of the list of lengths is returned via
therefore need the lengths, you may supply NULL as the <b>lengthsptr</b> therefore need the lengths, you may supply NULL as the <b>lengthsptr</b>
argument to disable the creation of a list of lengths. The yield of the argument to disable the creation of a list of lengths. The yield of the
function is zero if all went well, or PCRE2_ERROR_NOMEMORY if the memory block function is zero if all went well, or PCRE2_ERROR_NOMEMORY if the memory block
could not be obtained. When the list is no longer needed, it should be freed by could not be obtained. When the list is no longer needed, it should be freed by
calling <b>pcre2_substring_list_free()</b>. calling <b>pcre2_substring_list_free()</b>.
</P> </P>
<P> <P>
@ -2312,7 +2312,7 @@ name.
<P> <P>
Given the number, you can extract the substring directly, or use one of the Given the number, you can extract the substring directly, or use one of the
functions described in the previous section. For convenience, there are also functions described in the previous section. For convenience, there are also
"byname" functions that correspond to the "bynumber" functions, the only "byname" functions that correspond to the "bynumber" functions, the only
difference being that the second argument is a name instead of a number. difference being that the second argument is a name instead of a number.
However, if PCRE2_DUPNAMES is set and there are duplicate names, However, if PCRE2_DUPNAMES is set and there are duplicate names,
the behaviour may not be what you want (see the next section). the behaviour may not be what you want (see the next section).
@ -2375,7 +2375,7 @@ numbers, and hence the captured data.
<P> <P>
The traditional matching function uses a similar algorithm to Perl, which stops The traditional matching function uses a similar algorithm to Perl, which stops
when it finds the first match, starting at a given point in the subject. If you when it finds the first match, starting at a given point in the subject. If you
want to find all possible matches, or the longest possible match at a given want to find all possible matches, or the longest possible match at a given
position, consider using the alternative matching function (see below) instead. position, consider using the alternative matching function (see below) instead.
If you cannot use the alternative function, you can kludge it up by making use If you cannot use the alternative function, you can kludge it up by making use
of the callout facility, which is described in the of the callout facility, which is described in the
@ -2566,8 +2566,8 @@ fail, this error is given.
</P> </P>
<br><a name="SEC30" href="#TOC1">SEE ALSO</a><br> <br><a name="SEC30" href="#TOC1">SEE ALSO</a><br>
<P> <P>
<b>pcre2build</b>(3), <b>pcre2libs</b>(3), <b>pcre2callout</b>(3), <b>pcre2build</b>(3), <b>pcre2libs</b>(3), <b>pcre2callout</b>(3),
<b>pcre2matching</b>(3), <b>pcre2partial</b>(3), <b>pcre2posix</b>(3), <b>pcre2matching</b>(3), <b>pcre2partial</b>(3), <b>pcre2posix</b>(3),
<b>pcre2demo(3)</b>, <b>pcre2sample</b>(3), <b>pcre2stack</b>(3). <b>pcre2demo(3)</b>, <b>pcre2sample</b>(3), <b>pcre2stack</b>(3).
</P> </P>
<br><a name="SEC31" href="#TOC1">AUTHOR</a><br> <br><a name="SEC31" href="#TOC1">AUTHOR</a><br>

View File

@ -88,11 +88,11 @@ single-byte characters, or UTF-8 strings. You can also build two other
libraries, called <b>libpcre2-16</b> and <b>libpcre2-32</b>, which process libraries, called <b>libpcre2-16</b> and <b>libpcre2-32</b>, which process
strings that are contained in vectors of 16-bit and 32-bit code units, strings that are contained in vectors of 16-bit and 32-bit code units,
respectively. These can be interpreted either as single-unit characters or respectively. These can be interpreted either as single-unit characters or
UTF-16/UTF-32 strings. To build these additional libraries, add one or both of UTF-16/UTF-32 strings. To build these additional libraries, add one or both of
the following to the <b>configure</b> command: the following to the <b>configure</b> command:
<pre> <pre>
--enable-pcre16 --enable-pcre16
--enable-pcre32 --enable-pcre32
</pre> </pre>
If you do not want the 8-bit library, add If you do not want the 8-bit library, add
<pre> <pre>
@ -358,7 +358,7 @@ override this value by specifying a run-time option.
If you add one of If you add one of
<pre> <pre>
--enable-pcre2test-libreadline --enable-pcre2test-libreadline
--enable-pcre2test-libedit --enable-pcre2test-libedit
</pre> </pre>
to the <b>configure</b> command, <b>pcre2test</b> is linked with the to the <b>configure</b> command, <b>pcre2test</b> is linked with the
<b>libreadline</b> or<b>libedit</b> library, respectively, and when its input is <b>libreadline</b> or<b>libedit</b> library, respectively, and when its input is
@ -376,8 +376,8 @@ unmodified distribution version of readline is in use), some extra
configuration may be necessary. The INSTALL file for <b>libreadline</b> says configuration may be necessary. The INSTALL file for <b>libreadline</b> says
this: this:
<pre> <pre>
"Readline uses the termcap functions, but does not link with "Readline uses the termcap functions, but does not link with
the termcap or curses library itself, allowing applications the termcap or curses library itself, allowing applications
which link with readline the to choose an appropriate library." which link with readline the to choose an appropriate library."
</pre> </pre>
If your environment has not been set up so that an appropriate library is If your environment has not been set up so that an appropriate library is

View File

@ -25,7 +25,7 @@ pcre2sample documentation for a short discussion ("man pcre2sample" if you have
the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is
incompatible with the original PCRE API. incompatible with the original PCRE API.
There are actually three libraries, each supporting a different code unit There are actually three libraries, each supporting a different code unit
width. This demonstration program uses the 8-bit library. width. This demonstration program uses the 8-bit library.
In Unix-like environments, if PCRE2 is installed in your standard system In Unix-like environments, if PCRE2 is installed in your standard system
@ -56,8 +56,8 @@ the following line. */
/* #define PCRE2_STATIC */ /* #define PCRE2_STATIC */
/* This macro must be defined before including pcre2.h. For a program that uses /* This macro must be defined before including pcre2.h. For a program that uses
only one code unit width, it makes it possible to use generic function names only one code unit width, it makes it possible to use generic function names
such as pcre2_compile(). */ such as pcre2_compile(). */
#define PCRE2_CODE_UNIT_WIDTH 8 #define PCRE2_CODE_UNIT_WIDTH 8
@ -141,7 +141,7 @@ subject_length = strlen((char *)subject);
re = pcre2_compile( re = pcre2_compile(
pattern, /* the pattern */ pattern, /* the pattern */
PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */ PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
0, /* default options */ 0, /* default options */
&amp;errornumber, /* for error number */ &amp;errornumber, /* for error number */
&amp;erroroffset, /* for error offset */ &amp;erroroffset, /* for error offset */
@ -151,9 +151,9 @@ re = pcre2_compile(
if (re == NULL) if (re == NULL)
{ {
PCRE2_UCHAR buffer[256]; PCRE2_UCHAR buffer[256];
pcre2_get_error_message(errornumber, buffer, sizeof(buffer)); pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset, printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset,
buffer); buffer);
return 1; return 1;
} }
@ -197,7 +197,7 @@ if (rc &lt; 0)
return 1; return 1;
} }
/* Match succeded. Get a pointer to the output vector, where string offsets are /* Match succeded. Get a pointer to the output vector, where string offsets are
stored. */ stored. */
ovector = pcre2_get_ovector_pointer(match_data); ovector = pcre2_get_ovector_pointer(match_data);
@ -210,7 +210,7 @@ printf("\nMatch succeeded at offset %d\n", (int)ovector[0]);
* captured. * * captured. *
*************************************************************************/ *************************************************************************/
/* The output vector wasn't big enough. This should not happen, because we used /* The output vector wasn't big enough. This should not happen, because we used
pcre2_match_data_create_from_pattern() above. */ pcre2_match_data_create_from_pattern() above. */
if (rc == 0) if (rc == 0)
@ -261,7 +261,7 @@ if (namecount &lt;= 0) printf("No named substrings\n"); else
&amp;name_entry_size); /* where to put the answer */ &amp;name_entry_size); /* where to put the answer */
/* Now we can scan the table and, for each entry, print the number, the name, /* Now we can scan the table and, for each entry, print the number, the name,
and the substring itself. In the 8-bit library the number is held in two and the substring itself. In the 8-bit library the number is held in two
bytes, most significant first. */ bytes, most significant first. */
tabptr = name_table; tabptr = name_table;
@ -306,7 +306,7 @@ if (namecount &lt;= 0) printf("No named substrings\n"); else
if (!find_all) /* Check for -g */ if (!find_all) /* Check for -g */
{ {
pcre2_match_data_free(match_data); /* Release the memory that was used */ pcre2_match_data_free(match_data); /* Release the memory that was used */
pcre2_code_free(re); /* for the match data and the pattern. */ pcre2_code_free(re); /* for the match data and the pattern. */
return 0; /* Exit the program. */ return 0; /* Exit the program. */
} }
@ -324,7 +324,7 @@ sequence. */
(void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &amp;newline); (void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &amp;newline);
crlf_is_newline = newline == PCRE2_NEWLINE_ANY || crlf_is_newline = newline == PCRE2_NEWLINE_ANY ||
newline == PCRE2_NEWLINE_CRLF || newline == PCRE2_NEWLINE_CRLF ||
newline == PCRE2_NEWLINE_ANYCRLF; newline == PCRE2_NEWLINE_ANYCRLF;
/* Loop for second and subsequent matches */ /* Loop for second and subsequent matches */

View File

@ -71,10 +71,10 @@ performance, there is also a "fast path" API that is JIT-specific.
</P> </P>
<br><a name="SEC3" href="#TOC1">SIMPLE USE OF JIT</a><br> <br><a name="SEC3" href="#TOC1">SIMPLE USE OF JIT</a><br>
<P> <P>
To make use of the JIT support in the simplest way, all you have to do is to To make use of the JIT support in the simplest way, all you have to do is to
call <b>pcre2_jit_compile()</b> after successfully compiling a pattern with call <b>pcre2_jit_compile()</b> after successfully compiling a pattern with
<b>pcre2_compile()</b>. This function has two arguments: the first is the <b>pcre2_compile()</b>. This function has two arguments: the first is the
compiled pattern pointer that was returned by <b>pcre2_compile()</b>, and the compiled pattern pointer that was returned by <b>pcre2_compile()</b>, and the
second is a set of option bits, which must include at least one of second is a set of option bits, which must include at least one of
PCRE2_JIT_COMPLETE, PCRE2_JIT_PARTIAL_HARD, or PCRE2_JIT_PARTIAL_SOFT. PCRE2_JIT_COMPLETE, PCRE2_JIT_PARTIAL_HARD, or PCRE2_JIT_PARTIAL_SOFT.
</P> </P>
@ -239,7 +239,7 @@ non-default JIT stacks might operate:
</pre> </pre>
All the functions described in this section do nothing if JIT is not available, All the functions described in this section do nothing if JIT is not available,
and <b>pcre2_jit_stack_assign()</b> does nothing unless the <b>code</b> argument and <b>pcre2_jit_stack_assign()</b> does nothing unless the <b>code</b> argument
is non-NULL and points to a <b>pcre2_code</b> block that has been successfully is non-NULL and points to a <b>pcre2_code</b> block that has been successfully
processed by <b>pcre2_jit_compile()</b>. processed by <b>pcre2_jit_compile()</b>.
<a name="stackfaq"></a></P> <a name="stackfaq"></a></P>
<br><a name="SEC7" href="#TOC1">JIT STACK FAQ</a><br> <br><a name="SEC7" href="#TOC1">JIT STACK FAQ</a><br>
@ -328,18 +328,18 @@ callback.
<pre> <pre>
int rc; int rc;
pcre2_code *re; pcre2_code *re;
pcre2_match_data *match_data; pcre2_match_data *match_data;
pcre2_jit_stack *jit_stack; pcre2_jit_stack *jit_stack;
re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0, re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0,
&errornumber, &erroffset, NULL); &errornumber, &erroffset, NULL);
/* Check for errors */ /* Check for errors */
rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE); rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
/* Check for errors */ /* Check for errors */
jit_stack = pcre2_jit_stack_alloc(NULL, 32*1024, 512*1024); jit_stack = pcre2_jit_stack_alloc(NULL, 32*1024, 512*1024);
/* Check for error (NULL) */ /* Check for error (NULL) */
pcre2_jit_stack_assign(re, NULL, jit_stack); pcre2_jit_stack_assign(re, NULL, jit_stack);
match_data = pcre2_match_data_create(re, 10); match_data = pcre2_match_data_create(re, 10);
rc = pcre2_match(re, subject, length, 0, 0, match_data, NULL); rc = pcre2_match(re, subject, length, 0, 0, match_data, NULL);
/* Check results */ /* Check results */
pcre2_free(re); pcre2_free(re);

View File

@ -89,15 +89,15 @@ empty string at the end of the subject.
</P> </P>
<P> <P>
When a partial match is returned, the first two elements in the ovector point When a partial match is returned, the first two elements in the ovector point
to the portion of the subject that was matched. The appearance of \K in the to the portion of the subject that was matched. The appearance of \K in the
pattern has no effect for a partial match. Consider this pattern: pattern has no effect for a partial match. Consider this pattern:
<pre> <pre>
/abc\K123/ /abc\K123/
</pre> </pre>
If it is matched against "456abc123xyz" the result is a complete match, and the If it is matched against "456abc123xyz" the result is a complete match, and the
ovector defines the matched string as "123", because \K resets the "start of ovector defines the matched string as "123", because \K resets the "start of
match" point. However, if a partial match is requested and the subject string match" point. However, if a partial match is requested and the subject string
is "456abc12", a partial match is found for the string "abc12", because all is "456abc12", a partial match is found for the string "abc12", because all
these characters are needed for a subsequent re-match with additional these characters are needed for a subsequent re-match with additional
characters. characters.
</P> </P>
@ -343,14 +343,14 @@ same point as before.
For example, if the pattern "(?&#60;=123)abc" is partially matched against the For example, if the pattern "(?&#60;=123)abc" is partially matched against the
string "xx123ab", the ovector offsets are 5 and 7 ("ab"). The maximum string "xx123ab", the ovector offsets are 5 and 7 ("ab"). The maximum
lookbehind count is 3, so all characters before offset 2 can be discarded. The lookbehind count is 3, so all characters before offset 2 can be discarded. The
value of <b>startoffset</b> for the next match should be 3. When <b>pcre2test</b> value of <b>startoffset</b> for the next match should be 3. When <b>pcre2test</b>
displays a partial match, it indicates the lookbehind characters with '&#60;' displays a partial match, it indicates the lookbehind characters with '&#60;'
characters: characters:
<pre> <pre>
re&#62; "(?&#60;=123)abc" re&#62; "(?&#60;=123)abc"
data&#62; xx123ab\=ph data&#62; xx123ab\=ph
Partial match: 123ab Partial match: 123ab
&#60;&#60;&#60; &#60;&#60;&#60;
</PRE> </PRE>
</P> </P>
<P> <P>

View File

@ -145,7 +145,7 @@ Unicode newline sequence. The
<a href="pcre2api.html"><b>pcre2api</b></a> <a href="pcre2api.html"><b>pcre2api</b></a>
page has page has
<a href="pcre2api.html#newlines">further discussion</a> <a href="pcre2api.html#newlines">further discussion</a>
about newlines, and shows how to set the newline convention when calling about newlines, and shows how to set the newline convention when calling
<b>pcre2_compile()</b>. <b>pcre2_compile()</b>.
</P> </P>
<P> <P>
@ -218,7 +218,7 @@ corresponding characters in the subject. As a trivial example, the pattern
</pre> </pre>
matches a portion of a subject string that is identical to itself. When matches a portion of a subject string that is identical to itself. When
caseless matching is specified (the PCRE2_CASELESS option), letters are matched caseless matching is specified (the PCRE2_CASELESS option), letters are matched
independently of case. independently of case.
</P> </P>
<P> <P>
The power of regular expressions comes from the ability to include alternatives The power of regular expressions comes from the ability to include alternatives
@ -1191,8 +1191,8 @@ An opening square bracket introduces a character class, terminated by a closing
square bracket. A closing square bracket on its own is not special by default. square bracket. A closing square bracket on its own is not special by default.
If a closing square bracket is required as a member of the class, it should be If a closing square bracket is required as a member of the class, it should be
the first data character in the class (after an initial circumflex, if present) the first data character in the class (after an initial circumflex, if present)
or escaped with a backslash. This means that, by default, an empty class cannot or escaped with a backslash. This means that, by default, an empty class cannot
be defined. However, if the PCRE2_ALLOW_EMPTY_CLASS option is set, a closing be defined. However, if the PCRE2_ALLOW_EMPTY_CLASS option is set, a closing
square bracket at the start does end the (empty) class. square bracket at the start does end the (empty) class.
</P> </P>
<P> <P>
@ -1216,7 +1216,7 @@ string.
When caseless matching is set, any letters in a class represent both their When caseless matching is set, any letters in a class represent both their
upper case and lower case versions, so for example, a caseless [aeiou] matches upper case and lower case versions, so for example, a caseless [aeiou] matches
"A" as well as "a", and a caseless [^aeiou] does not match "A", whereas a "A" as well as "a", and a caseless [^aeiou] does not match "A", whereas a
caseful version would. caseful version would.
</P> </P>
<P> <P>
Characters that might indicate line breaks are never treated in any special way Characters that might indicate line breaks are never treated in any special way
@ -1341,7 +1341,7 @@ classes by other sequences, as follows:
[:alnum:] becomes \p{Xan} [:alnum:] becomes \p{Xan}
[:alpha:] becomes \p{L} [:alpha:] becomes \p{L}
[:blank:] becomes \h [:blank:] becomes \h
[:cntrl:] becomes \p{Cc} [:cntrl:] becomes \p{Cc}
[:digit:] becomes \p{Nd} [:digit:] becomes \p{Nd}
[:lower:] becomes \p{Ll} [:lower:] becomes \p{Ll}
[:space:] becomes \p{Xps} [:space:] becomes \p{Xps}
@ -1490,7 +1490,7 @@ match "cataract", "erpillar" or an empty string.
<br> <br>
2. It sets up the subpattern as a capturing subpattern. This means that, when 2. It sets up the subpattern as a capturing subpattern. This means that, when
the whole pattern matches, the portion of the subject string that matched the the whole pattern matches, the portion of the subject string that matched the
subpattern is passed back to the caller, separately from the portion that subpattern is passed back to the caller, separately from the portion that
matched the whole pattern. (This applies only to the traditional matching matched the whole pattern. (This applies only to the traditional matching
function; the DFA matching function does not support capturing.) function; the DFA matching function does not support capturing.)
</P> </P>
@ -1908,7 +1908,7 @@ at release 5.10.
PCRE2 has an optimization that automatically "possessifies" certain simple PCRE2 has an optimization that automatically "possessifies" certain simple
pattern constructs. For example, the sequence A+B is treated as A++B because pattern constructs. For example, the sequence A+B is treated as A++B because
there is no point in backtracking into a sequence of A's when B must follow. there is no point in backtracking into a sequence of A's when B must follow.
This feature can be disabled by the PCRE2_NO_AUTOPOSSESS option, or starting This feature can be disabled by the PCRE2_NO_AUTOPOSSESS option, or starting
the pattern with (*NO_AUTO_POSSESS). the pattern with (*NO_AUTO_POSSESS).
</P> </P>
<P> <P>
@ -2216,7 +2216,7 @@ if the pattern is written as
<pre> <pre>
^.*+(?&#60;=abcd) ^.*+(?&#60;=abcd)
</pre> </pre>
there can be no backtracking for the .*+ item because of the possessive there can be no backtracking for the .*+ item because of the possessive
quantifier; it can match only the entire string. The subsequent lookbehind quantifier; it can match only the entire string. The subsequent lookbehind
assertion does a single test on the last four characters. If it fails, the assertion does a single test on the last four characters. If it fails, the
match fails immediately. For long strings, this approach makes a significant match fails immediately. For long strings, this approach makes a significant
@ -2720,8 +2720,8 @@ same pair of parentheses when there is a repetition.
<P> <P>
PCRE2 provides a similar feature, but of course it cannot obey arbitrary Perl PCRE2 provides a similar feature, but of course it cannot obey arbitrary Perl
code. The feature is called "callout". The caller of PCRE2 provides an external code. The feature is called "callout". The caller of PCRE2 provides an external
function by putting its entry point in a match context using the function function by putting its entry point in a match context using the function
<b>pcre2_set_callout()</b> and passing the context to <b>pcre2_match()</b> or <b>pcre2_set_callout()</b> and passing the context to <b>pcre2_match()</b> or
<b>pcre2_dfa_match()</b>. If no match context is passed, or if the callout entry <b>pcre2_dfa_match()</b>. If no match context is passed, or if the callout entry
point is set to NULL, callouts are disabled. point is set to NULL, callouts are disabled.
</P> </P>
@ -2961,7 +2961,7 @@ output from <b>pcre2test</b>:
re&#62; /(*COMMIT)abc/ re&#62; /(*COMMIT)abc/
data&#62; xyzabc data&#62; xyzabc
0: abc 0: abc
data&#62; data&#62;
re&#62; /(*COMMIT)abc/no_start_optimize re&#62; /(*COMMIT)abc/no_start_optimize
data&#62; xyzabc data&#62; xyzabc
No match No match
@ -2989,7 +2989,7 @@ as (*COMMIT).
<P> <P>
The behaviour of (*PRUNE:NAME) is the not the same as (*MARK:NAME)(*PRUNE). The behaviour of (*PRUNE:NAME) is the not the same as (*MARK:NAME)(*PRUNE).
It is like (*MARK:NAME) in that the name is remembered for passing back to the It is like (*MARK:NAME) in that the name is remembered for passing back to the
caller. However, (*SKIP:NAME) searches only for names set with (*MARK), caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
ignoring those set by (*PRUNE) or (*THEN). ignoring those set by (*PRUNE) or (*THEN).
<pre> <pre>
(*SKIP) (*SKIP)
@ -3041,7 +3041,7 @@ group. If (*THEN) is not inside an alternation, it acts like (*PRUNE).
<P> <P>
The behaviour of (*THEN:NAME) is the not the same as (*MARK:NAME)(*THEN). The behaviour of (*THEN:NAME) is the not the same as (*MARK:NAME)(*THEN).
It is like (*MARK:NAME) in that the name is remembered for passing back to the It is like (*MARK:NAME) in that the name is remembered for passing back to the
caller. However, (*SKIP:NAME) searches only for names set with (*MARK), caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
ignoring those set by (*PRUNE) and (*THEN). ignoring those set by (*PRUNE) and (*THEN).
</P> </P>
<P> <P>

View File

@ -103,17 +103,17 @@ PCRE2 to use heap memory instead of stack for remembering back-up points when
of how to do this are given in the of how to do this are given in the
<a href="pcre2build.html"><b>pcre2build</b></a> <a href="pcre2build.html"><b>pcre2build</b></a>
documentation. When built in this way, instead of using the stack, PCRE2 documentation. When built in this way, instead of using the stack, PCRE2
gets memory for remembering backup points from the heap. By default, the memory gets memory for remembering backup points from the heap. By default, the memory
is obtained by calling the system <b>malloc()</b> function, but you can arrange is obtained by calling the system <b>malloc()</b> function, but you can arrange
to supply your own memory management function. For details, see the section to supply your own memory management function. For details, see the section
entitled entitled
<a href="pcre2api.html#matchcontext">"The match context"</a> <a href="pcre2api.html#matchcontext">"The match context"</a>
in the in the
<a href="pcre2api.html"><b>pcre2api</b></a> <a href="pcre2api.html"><b>pcre2api</b></a>
documentation. Since the block sizes are always the same, it may be possible to documentation. Since the block sizes are always the same, it may be possible to
implement customized a memory handler that is more efficient than the standard implement customized a memory handler that is more efficient than the standard
function. The memory blocks obtained for this purpose are retained and re-used function. The memory blocks obtained for this purpose are retained and re-used
if possible while <b>pcre2_match()</b> is running. They are all freed just if possible while <b>pcre2_match()</b> is running. They are all freed just
before it exits. before it exits.
</P> </P>
<br><b> <br><b>

View File

@ -414,7 +414,7 @@ appear.
(*LIMIT_MATCH=d) set the match limit to d (decimal number) (*LIMIT_MATCH=d) set the match limit to d (decimal number)
(*LIMIT_RECURSION=d) set the recursion limit to d (decimal number) (*LIMIT_RECURSION=d) set the recursion limit to d (decimal number)
(*NOTEMPTY) set PCRE2_NOTEMPTY when matching (*NOTEMPTY) set PCRE2_NOTEMPTY when matching
(*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching (*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching
(*NO_AUTO_POSSESS) no auto-possessification (PCRE2_NO_AUTO_POSSESS) (*NO_AUTO_POSSESS) no auto-possessification (PCRE2_NO_AUTO_POSSESS)
(*NO_START_OPT) no start-match optimization (PCRE2_NO_START_OPTIMIZE) (*NO_START_OPT) no start-match optimization (PCRE2_NO_START_OPTIMIZE)
(*UTF) set appropriate UTF mode for the library in use (*UTF) set appropriate UTF mode for the library in use

View File

@ -476,7 +476,7 @@ about the pattern:
/I info show info about compiled pattern /I info show info about compiled pattern
hex pattern is coded in hexadecimal hex pattern is coded in hexadecimal
jit[=&#60;number&#62;] use JIT jit[=&#60;number&#62;] use JIT
jitverify verify JIT use jitverify verify JIT use
locale=&#60;name&#62; use this locale locale=&#60;name&#62; use this locale
memory show memory used memory show memory used
newline=&#60;type&#62; set newline type newline=&#60;type&#62; set newline type
@ -565,7 +565,7 @@ number in the range 0 to 7:
7 all three modes 7 all three modes
</pre> </pre>
If no number is given, 7 is assumed. If JIT compilation is successful, the If no number is given, 7 is assumed. If JIT compilation is successful, the
compiled JIT code will automatically be used when <b>pcre2_match()</b> is run compiled JIT code will automatically be used when <b>pcre2_match()</b> is run
for the appropriate type of match, except when incompatible run-time options for the appropriate type of match, except when incompatible run-time options
are specified. For more details, see the are specified. For more details, see the
<a href="pcre2jit.html"><b>pcre2jit</b></a> <a href="pcre2jit.html"><b>pcre2jit</b></a>
@ -710,7 +710,7 @@ for a description of their effects.
partial_hard (or ph) set PCRE2_PARTIAL_HARD partial_hard (or ph) set PCRE2_PARTIAL_HARD
partial_soft (or ps) set PCRE2_PARTIAL_SOFT partial_soft (or ps) set PCRE2_PARTIAL_SOFT
</pre> </pre>
The partial matching modifiers are provided with abbreviations because they The partial matching modifiers are provided with abbreviations because they
appear frequently in tests. appear frequently in tests.
</P> </P>
<P> <P>
@ -892,8 +892,8 @@ until it finds the minimum values for each parameter that allow
<b>pcre2_match()</b> to complete without error. <b>pcre2_match()</b> to complete without error.
</P> </P>
<P> <P>
If JIT is being used, only the match limit is relevant. If DFA matching is If JIT is being used, only the match limit is relevant. If DFA matching is
being used, neither limit is relevant, and this modifier is ignored (with a being used, neither limit is relevant, and this modifier is ignored (with a
warning message). warning message).
</P> </P>
<P> <P>
@ -939,10 +939,10 @@ appears, though of course it can also be used to set a default in a
available for storing matching information. The default is 15. available for storing matching information. The default is 15.
</P> </P>
<P> <P>
At least one pair of offsets is always created by At least one pair of offsets is always created by
<b>pcre2_match_data_create()</b>, for matching with PCRE2's native API, so a <b>pcre2_match_data_create()</b>, for matching with PCRE2's native API, so a
value of 0 is the same as 1. However a value of 0 is useful when testing the value of 0 is the same as 1. However a value of 0 is useful when testing the
POSIX API because it causes <b>regexec()</b> to be called with a NULL capture POSIX API because it causes <b>regexec()</b> to be called with a NULL capture
vector. vector.
</P> </P>
<br><a name="SEC12" href="#TOC1">THE ALTERNATIVE MATCHING FUNCTION</a><br> <br><a name="SEC12" href="#TOC1">THE ALTERNATIVE MATCHING FUNCTION</a><br>

View File

@ -67,7 +67,7 @@ In UTF modes, the dot metacharacter matches one UTF character instead of a
single code unit. single code unit.
</P> </P>
<P> <P>
The escape sequence \C can be used to match a single code unit, in a UTF mode, The escape sequence \C can be used to match a single code unit, in a UTF mode,
but its use can lead to some strange effects because it breaks up multi-unit but its use can lead to some strange effects because it breaks up multi-unit
characters (see the description of \C in the characters (see the description of \C in the
<a href="pcre2pattern.html"><b>pcre2pattern</b></a> <a href="pcre2pattern.html"><b>pcre2pattern</b></a>
@ -114,8 +114,8 @@ VALIDITY OF UTF STRINGS
</b><br> </b><br>
<P> <P>
When the PCRE2_UTF option is set, the strings passed as patterns and subjects When the PCRE2_UTF option is set, the strings passed as patterns and subjects
are (by default) checked for validity on entry to the relevant functions. are (by default) checked for validity on entry to the relevant functions.
If an invalid UTF string is passed, an error return is given. If an invalid UTF string is passed, an error return is given.
</P> </P>
<P> <P>
UTF-16 and UTF-32 strings can indicate their endianness by special code knows UTF-16 and UTF-32 strings can indicate their endianness by special code knows

View File

@ -23,11 +23,11 @@ of Unicode in use can be discovered by running
.sp .sp
pcre2test -C pcre2test -C
.P .P
The three libraries contain identical sets of functions, with names ending in The three libraries contain identical sets of functions, with names ending in
_8, _16, or _32, respectively (for example, \fBpcre2_compile_8()\fP). However, _8, _16, or _32, respectively (for example, \fBpcre2_compile_8()\fP). However,
by defining PCRE2_CODE_UNIT_WIDTH to be 8, 16, or 32, a program that uses just by defining PCRE2_CODE_UNIT_WIDTH to be 8, 16, or 32, a program that uses just
one code unit width can be written using generic names such as one code unit width can be written using generic names such as
\fBpcre2_compile()\fP, and the documentation is written assuming that this is \fBpcre2_compile()\fP, and the documentation is written assuming that this is
the case. the case.
.P .P
In addition to the Perl-compatible matching function, PCRE2 contains an In addition to the Perl-compatible matching function, PCRE2 contains an

View File

@ -158,8 +158,8 @@ REVISION
Last updated: 28 September 2014 Last updated: 28 September 2014
Copyright (c) 1997-2014 University of Cambridge. Copyright (c) 1997-2014 University of Cambridge.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
PCRE2API(3) Library Functions Manual PCRE2API(3) PCRE2API(3) Library Functions Manual PCRE2API(3)
@ -2529,8 +2529,8 @@ REVISION
Last updated: 16 October 2014 Last updated: 16 October 2014
Copyright (c) 1997-2014 University of Cambridge. Copyright (c) 1997-2014 University of Cambridge.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
PCRE2BUILD(3) Library Functions Manual PCRE2BUILD(3) PCRE2BUILD(3) Library Functions Manual PCRE2BUILD(3)
@ -2981,8 +2981,8 @@ REVISION
Last updated: 28 September 2014 Last updated: 28 September 2014
Copyright (c) 1997-2014 University of Cambridge. Copyright (c) 1997-2014 University of Cambridge.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
PCRE2CALLOUT(3) Library Functions Manual PCRE2CALLOUT(3) PCRE2CALLOUT(3) Library Functions Manual PCRE2CALLOUT(3)
@ -3217,8 +3217,8 @@ REVISION
Last updated: 19 October 2014 Last updated: 19 October 2014
Copyright (c) 1997-2014 University of Cambridge. Copyright (c) 1997-2014 University of Cambridge.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
PCRE2COMPAT(3) Library Functions Manual PCRE2COMPAT(3) PCRE2COMPAT(3) Library Functions Manual PCRE2COMPAT(3)
@ -3403,8 +3403,8 @@ REVISION
Last updated: 28 September 2014 Last updated: 28 September 2014
Copyright (c) 1997-2014 University of Cambridge. Copyright (c) 1997-2014 University of Cambridge.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
PCRE2JIT(3) Library Functions Manual PCRE2JIT(3) PCRE2JIT(3) Library Functions Manual PCRE2JIT(3)
@ -3758,8 +3758,8 @@ REVISION
Last updated: 29 September 2014 Last updated: 29 September 2014
Copyright (c) 1997-2014 University of Cambridge. Copyright (c) 1997-2014 University of Cambridge.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
PCRE2LIMITS(3) Library Functions Manual PCRE2LIMITS(3) PCRE2LIMITS(3) Library Functions Manual PCRE2LIMITS(3)
@ -3826,8 +3826,8 @@ REVISION
Last updated: 29 September 2014 Last updated: 29 September 2014
Copyright (c) 1997-2014 University of Cambridge. Copyright (c) 1997-2014 University of Cambridge.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
PCRE2MATCHING(3) Library Functions Manual PCRE2MATCHING(3) PCRE2MATCHING(3) Library Functions Manual PCRE2MATCHING(3)
@ -4045,8 +4045,8 @@ REVISION
Last updated: 29 September 2014 Last updated: 29 September 2014
Copyright (c) 1997-2014 University of Cambridge. Copyright (c) 1997-2014 University of Cambridge.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
PCRE2PARTIAL(3) Library Functions Manual PCRE2PARTIAL(3) PCRE2PARTIAL(3) Library Functions Manual PCRE2PARTIAL(3)
@ -4485,8 +4485,8 @@ REVISION
Last updated: 14 October 2014 Last updated: 14 October 2014
Copyright (c) 1997-2014 University of Cambridge. Copyright (c) 1997-2014 University of Cambridge.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
PCRE2UNICODE(3) Library Functions Manual PCRE2UNICODE(3) PCRE2UNICODE(3) Library Functions Manual PCRE2UNICODE(3)
@ -4711,5 +4711,5 @@ REVISION
Last updated: 16 September 2014 Last updated: 16 September 2014
Copyright (c) 1997-2014 University of Cambridge. Copyright (c) 1997-2014 University of Cambridge.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------

View File

@ -250,7 +250,7 @@ to be included in an environment where the value of PCRE2_CODE_UNIT_WIDTH is
unknown should also use the real function names. (Unfortunately, it is not unknown should also use the real function names. (Unfortunately, it is not
possible in C code to save and restore the value of a macro.) possible in C code to save and restore the value of a macro.)
.P .P
If PCRE2_CODE_UNIT_WIDTH is not defined before including \fBpcre2.h\fP, a If PCRE2_CODE_UNIT_WIDTH is not defined before including \fBpcre2.h\fP, a
compiler error occurs. compiler error occurs.
.P .P
When using multiple libraries in an application, you must take care when When using multiple libraries in an application, you must take care when
@ -392,7 +392,7 @@ section on \fBpcre2_match()\fP options
below. below.
.P .P
The choice of newline convention does not affect the interpretation of The choice of newline convention does not affect the interpretation of
the \en or \er escape sequences, nor does it affect what \eR matches, which has the \en or \er escape sequences, nor does it affect what \eR matches, which has
its own separate control. its own separate control.
. .
. .
@ -509,7 +509,7 @@ The memory used for a general context should be freed by calling:
.SS "The compile context" .SS "The compile context"
.rs .rs
.sp .sp
A compile context is required if you want to change the default values of any A compile context is required if you want to change the default values of any
of the following compile-time parameters: of the following compile-time parameters:
.sp .sp
What \eR matches (Unicode newlines or CR, LF, CRLF only); What \eR matches (Unicode newlines or CR, LF, CRLF only);
@ -518,7 +518,7 @@ of the following compile-time parameters:
The compile time nested parentheses limit; The compile time nested parentheses limit;
An external function for stack checking. An external function for stack checking.
.sp .sp
A compile context is also required if you are using custom memory management. A compile context is also required if you are using custom memory management.
If none of these apply, just pass NULL as the context argument of If none of these apply, just pass NULL as the context argument of
\fIpcre2_compile()\fP. \fIpcre2_compile()\fP.
.P .P
@ -534,8 +534,8 @@ A compile context is created, copied, and freed by the following functions:
.B void pcre2_compile_context_free(pcre2_compile_context *\fIccontext\fP); .B void pcre2_compile_context_free(pcre2_compile_context *\fIccontext\fP);
.fi .fi
.sp .sp
A compile context is created with default values for its parameters. These can A compile context is created with default values for its parameters. These can
be changed by calling the following functions, which return 0 on success, or be changed by calling the following functions, which return 0 on success, or
PCRE2_ERROR_BADDATA if invalid data is detected. PCRE2_ERROR_BADDATA if invalid data is detected.
.sp .sp
.nf .nf
@ -543,11 +543,11 @@ PCRE2_ERROR_BADDATA if invalid data is detected.
.B " uint32_t \fIvalue\fP);" .B " uint32_t \fIvalue\fP);"
.fi .fi
.sp .sp
The value must be PCRE2_BSR_ANYCRLF, to specify that \eR matches only CR, LF, The value must be PCRE2_BSR_ANYCRLF, to specify that \eR matches only CR, LF,
or CRLF, or PCRE2_BSR_UNICODE, to specify that \eR matches any Unicode line or CRLF, or PCRE2_BSR_UNICODE, to specify that \eR matches any Unicode line
ending sequence. The value of this parameter does not affect what is compiled; ending sequence. The value of this parameter does not affect what is compiled;
it is just saved with the compiled pattern. The value is used by the JIT it is just saved with the compiled pattern. The value is used by the JIT
compiler and by the two interpreted matching functions, \fIpcre2_match()\fP and compiler and by the two interpreted matching functions, \fIpcre2_match()\fP and
\fIpcre2_dfa_match()\fP. \fIpcre2_dfa_match()\fP.
.sp .sp
.nf .nf
@ -555,7 +555,7 @@ compiler and by the two interpreted matching functions, \fIpcre2_match()\fP and
.B " const unsigned char *\fItables\fP);" .B " const unsigned char *\fItables\fP);"
.fi .fi
.sp .sp
The value must be the result of a call to \fIpcre2_maketables()\fP, whose only The value must be the result of a call to \fIpcre2_maketables()\fP, whose only
argument is a general context. This function builds a set of character tables argument is a general context. This function builds a set of character tables
in the current locale. in the current locale.
.sp .sp
@ -564,9 +564,9 @@ in the current locale.
.B " uint32_t \fIvalue\fP);" .B " uint32_t \fIvalue\fP);"
.fi .fi
.sp .sp
This specifies which characters or character sequences are to be recognized as This specifies which characters or character sequences are to be recognized as
newlines. The value must be one of PCRE2_NEWLINE_CR (carriage return only), newlines. The value must be one of PCRE2_NEWLINE_CR (carriage return only),
PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the two-character PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the two-character
sequence CR followed by LF), PCRE2_NEWLINE_ANYCRLF (any of the above), or sequence CR followed by LF), PCRE2_NEWLINE_ANYCRLF (any of the above), or
PCRE2_NEWLINE_ANY (any Unicode newline sequence). PCRE2_NEWLINE_ANY (any Unicode newline sequence).
.P .P
@ -591,7 +591,7 @@ using up too much system stack when being compiled.
.fi .fi
.sp .sp
There is at least one application that runs PCRE2 in threads with very limited There is at least one application that runs PCRE2 in threads with very limited
system stack, where running out of stack is to be avoided at all costs. The system stack, where running out of stack is to be avoided at all costs. The
parenthesis limit above cannot take account of how much stack is actually parenthesis limit above cannot take account of how much stack is actually
available. For a finer control, you can supply a function that is called available. For a finer control, you can supply a function that is called
whenever \fBpcre2_compile()\fP starts to compile a parenthesized part of a whenever \fBpcre2_compile()\fP starts to compile a parenthesized part of a
@ -603,20 +603,20 @@ function should return zero if all is well, or non-zero to force an error.
.SS "The match context" .SS "The match context"
.rs .rs
.sp .sp
A match context is required if you want to change the default values of any A match context is required if you want to change the default values of any
of the following match-time parameters: of the following match-time parameters:
.sp .sp
What \eR matches (Unicode newlines or CR, LF, CRLF only); What \eR matches (Unicode newlines or CR, LF, CRLF only);
A callout function; A callout function;
The limit for calling \fImatch()\fP; The limit for calling \fImatch()\fP;
The limit for calling \fImatch()\fP recursively; The limit for calling \fImatch()\fP recursively;
The newline character sequence; The newline character sequence;
.sp .sp
A match context is also required if you are using custom memory management. A match context is also required if you are using custom memory management.
If none of these apply, just pass NULL as the context argument of If none of these apply, just pass NULL as the context argument of
\fBpcre2_match()\fP, \fBpcre2_dfa_match()\fP, or \fBpcre2_jit_match()\fP. \fBpcre2_match()\fP, \fBpcre2_dfa_match()\fP, or \fBpcre2_jit_match()\fP.
Changing the newline value or what \eR matches at match time disables the use Changing the newline value or what \eR matches at match time disables the use
of JIT via \fBpcre2_match()\fP. of JIT via \fBpcre2_match()\fP.
.P .P
A match context is created, copied, and freed by the following functions: A match context is created, copied, and freed by the following functions:
.sp .sp
@ -630,8 +630,8 @@ A match context is created, copied, and freed by the following functions:
.B void pcre2_match_context_free(pcre2_match_context *\fImcontext\fP); .B void pcre2_match_context_free(pcre2_match_context *\fImcontext\fP);
.fi .fi
.sp .sp
A match context is created with default values for its parameters. These can A match context is created with default values for its parameters. These can
be changed by calling the following functions, which return 0 on success, or be changed by calling the following functions, which return 0 on success, or
PCRE2_ERROR_BADDATA if invalid data is detected. PCRE2_ERROR_BADDATA if invalid data is detected.
.sp .sp
.nf .nf
@ -662,7 +662,7 @@ calls repeatedly (sometimes recursively). The limit set by \fImatch_limit\fP is
imposed on the number of times this function is called during a match, which imposed on the number of times this function is called during a match, which
has the effect of limiting the amount of backtracking that can take place. For has the effect of limiting the amount of backtracking that can take place. For
patterns that are not anchored, the count restarts from zero for each position patterns that are not anchored, the count restarts from zero for each position
in the subject string. This limit is not relevant to \fBpcre2_dfa_match()\fP, in the subject string. This limit is not relevant to \fBpcre2_dfa_match()\fP,
which ignores it. which ignores it.
.P .P
When \fBpcre2_match()\fP is called with a pattern that was successfully studied When \fBpcre2_match()\fP is called with a pattern that was successfully studied
@ -698,7 +698,7 @@ This limit is of use only if it is set smaller than \fImatch_limit\fP.
Limiting the recursion depth limits the amount of system stack that can be Limiting the recursion depth limits the amount of system stack that can be
used, or, when PCRE2 has been compiled to use memory on the heap instead of the used, or, when PCRE2 has been compiled to use memory on the heap instead of the
stack, the amount of heap memory that can be used. This limit is not relevant, stack, the amount of heap memory that can be used. This limit is not relevant,
and is ignored, when matching is done using JIT compiled code or by the and is ignored, when matching is done using JIT compiled code or by the
\fBpcre2_dfa_match()\fP function. \fBpcre2_dfa_match()\fP function.
.P .P
The default value for \fIrecursion_limit\fP can be set when PCRE2 is built; the The default value for \fIrecursion_limit\fP can be set when PCRE2 is built; the
@ -720,9 +720,9 @@ limit is set, less than the default.
.B " void (*\fIprivate_free\fP)(void *, void *), void *\fImemory_data\fP);" .B " void (*\fIprivate_free\fP)(void *, void *), void *\fImemory_data\fP);"
.fi .fi
.sp .sp
This function sets up two additional custom memory management functions for use This function sets up two additional custom memory management functions for use
by \fBpcre2_match()\fP when PCRE2 is compiled to use the heap for remembering by \fBpcre2_match()\fP when PCRE2 is compiled to use the heap for remembering
backtracking data, instead of recursive function calls that use the system backtracking data, instead of recursive function calls that use the system
stack. There is a discussion about PCRE2's stack usage in the stack. There is a discussion about PCRE2's stack usage in the
.\" HREF .\" HREF
\fBpcre2stack\fP \fBpcre2stack\fP
@ -738,7 +738,7 @@ limited stacks. Because of the greater use of memory management,
general custom memory functions are provided so that special-purpose external general custom memory functions are provided so that special-purpose external
code can be used for this case, because the memory blocks are all the same code can be used for this case, because the memory blocks are all the same
size. The blocks are retained by \fBpcre2_match()\fP until it is about to exit size. The blocks are retained by \fBpcre2_match()\fP until it is about to exit
so that they can be re-used when possible during the match. In the absence of so that they can be re-used when possible during the match. In the absence of
these functions, the normal custom memory management functions are used, if these functions, the normal custom memory management functions are used, if
supplied, otherwise the system functions. supplied, otherwise the system functions.
. .
@ -760,7 +760,7 @@ required. The second argument is a pointer to memory into which the information
is placed. If NULL is passed, the function returns the amount of memory that is is placed. If NULL is passed, the function returns the amount of memory that is
needed for the requested information. For calls that return numerical values, needed for the requested information. For calls that return numerical values,
the value is in bytes; when requesting these values, \fIwhere\fP should point the value is in bytes; when requesting these values, \fIwhere\fP should point
to appropriately aligned memory. For calls that return strings, the required to appropriately aligned memory. For calls that return strings, the required
length is given in code units, not counting the terminating zero. length is given in code units, not counting the terminating zero.
.P .P
When requesting information, the returned value from \fBpcre2_config()\fP is When requesting information, the returned value from \fBpcre2_config()\fP is
@ -783,7 +783,7 @@ compiling is available; otherwise it is set to zero.
PCRE2_CONFIG_JITTARGET PCRE2_CONFIG_JITTARGET
.sp .sp
The \fIwhere\fP argument should point to a buffer that is at least 48 code The \fIwhere\fP argument should point to a buffer that is at least 48 code
units long. (The exact length needed can be found by calling units long. (The exact length needed can be found by calling
\fBpcre2_config()\fP with \fBwhere\fP set to NULL.) The buffer is filled with a \fBpcre2_config()\fP with \fBwhere\fP set to NULL.) The buffer is filled with a
string that contains the name of the architecture for which the JIT compiler is string that contains the name of the architecture for which the JIT compiler is
configured, for example "x86 32bit (little endian + unaligned)". If JIT support configured, for example "x86 32bit (little endian + unaligned)". If JIT support
@ -794,9 +794,9 @@ the string, in code units, is returned.
.sp .sp
The output is an integer that contains the number of bytes used for internal The output is an integer that contains the number of bytes used for internal
linkage in compiled regular expressions. When PCRE2 is configured, the value linkage in compiled regular expressions. When PCRE2 is configured, the value
can be set to 2, 3, or 4, with the default being 2. This is the value that is can be set to 2, 3, or 4, with the default being 2. This is the value that is
returned by \fBpcre2_config()\fP. However, when the 16-bit library is compiled, returned by \fBpcre2_config()\fP. However, when the 16-bit library is compiled,
a value of 3 is rounded up to 4, and when the 32-bit library is compiled, a value of 3 is rounded up to 4, and when the 32-bit library is compiled,
internal linkages always use 4 bytes, so the configured value is not relevant. internal linkages always use 4 bytes, so the configured value is not relevant.
.P .P
The default value of 2 for the 8-bit and 16-bit libraries is sufficient for all The default value of 2 for the 8-bit and 16-bit libraries is sufficient for all
@ -820,7 +820,7 @@ that is recognized as meaning "newline". The values are:
3 Carriage return, linefeed (CRLF) 3 Carriage return, linefeed (CRLF)
4 Any Unicode line ending 4 Any Unicode line ending
5 Any of CR, LF, or CRLF 5 Any of CR, LF, or CRLF
.sp .sp
The default should normally correspond to the standard sequence for your The default should normally correspond to the standard sequence for your
operating system. operating system.
.sp .sp
@ -849,7 +849,7 @@ compiled. The output is zero if PCRE2 was compiled to use blocks of data on the
heap instead of recursive function calls. heap instead of recursive function calls.
.sp .sp
PCRE2_CONFIG_UNICODE_VERSION PCRE2_CONFIG_UNICODE_VERSION
.sp .sp
The \fIwhere\fP argument should point to a buffer that is at least 24 code The \fIwhere\fP argument should point to a buffer that is at least 24 code
units long. (The exact length needed can be found by calling units long. (The exact length needed can be found by calling
\fBpcre2_config()\fP with \fBwhere\fP set to NULL.) If PCRE2 has been compiled \fBpcre2_config()\fP with \fBwhere\fP set to NULL.) If PCRE2 has been compiled
@ -884,15 +884,15 @@ units) is returned.
.B pcre2_code_free(pcre2_code *\fIcode\fP); .B pcre2_code_free(pcre2_code *\fIcode\fP);
.fi .fi
.P .P
This function compiles a pattern, defined by a pointer to a string of code This function compiles a pattern, defined by a pointer to a string of code
units and a length, into an internal form. If the pattern is zero-terminated, units and a length, into an internal form. If the pattern is zero-terminated,
the length should be specified as PCRE2_ZERO_TERMINATED. The function returns a the length should be specified as PCRE2_ZERO_TERMINATED. The function returns a
pointer to a block of memory that contains the compiled pattern and related pointer to a block of memory that contains the compiled pattern and related
data. The caller must free the memory by calling \fBpcre2_code_free()\fP when data. The caller must free the memory by calling \fBpcre2_code_free()\fP when
it is no longer needed. it is no longer needed.
.P .P
If the compile context argument \fIccontext\fP is NULL, the memory is obtained If the compile context argument \fIccontext\fP is NULL, the memory is obtained
by calling \fBmalloc()\fP. Otherwise, it is obtained from the same memory by calling \fBmalloc()\fP. Otherwise, it is obtained from the same memory
function that was used for the compile context. function that was used for the compile context.
.P .P
The \fIoptions\fP argument contains various bit settings that affect the The \fIoptions\fP argument contains various bit settings that affect the
@ -903,14 +903,14 @@ within the pattern (see the detailed description in the
.\" HREF .\" HREF
\fBpcre2pattern\fP \fBpcre2pattern\fP
.\" .\"
documentation). documentation).
.P .P
For those options that can be different in different parts of the pattern, the For those options that can be different in different parts of the pattern, the
contents of the \fIoptions\fP argument specifies their settings at the start of contents of the \fIoptions\fP argument specifies their settings at the start of
compilation. The PCRE2_ANCHORED and PCRE2_NO_UTF_CHECK options can be set at compilation. The PCRE2_ANCHORED and PCRE2_NO_UTF_CHECK options can be set at
the time of matching as well as at compile time. the time of matching as well as at compile time.
.P .P
Other, less frequently required compile-time parameters (for example, the Other, less frequently required compile-time parameters (for example, the
newline setting) can be provided in a compile context (as described newline setting) can be provided in a compile context (as described
.\" HTML <a href="#compilecontext"> .\" HTML <a href="#compilecontext">
.\" </a> .\" </a>
@ -936,10 +936,10 @@ This code fragment shows a typical straightforward call to
.sp .sp
pcre2_code *re; pcre2_code *re;
PCRE2_SIZE erroffset; PCRE2_SIZE erroffset;
int errorcode; int errorcode;
re = pcre2_compile( re = pcre2_compile(
"^A.*Z", /* the pattern */ "^A.*Z", /* the pattern */
PCRE2_ZERO_TERMINATED, /* the pattern is zero-terminated */ PCRE2_ZERO_TERMINATED, /* the pattern is zero-terminated */
0, /* default options */ 0, /* default options */
&errorcode, /* for error code */ &errorcode, /* for error code */
&erroffset, /* for error offset */ &erroffset, /* for error offset */
@ -958,14 +958,14 @@ Perl.
.sp .sp
PCRE2_ALLOW_EMPTY_CLASS PCRE2_ALLOW_EMPTY_CLASS
.sp .sp
By default, for compatibility with Perl, a closing square bracket that By default, for compatibility with Perl, a closing square bracket that
immediately follows an opening one is treated as a data character for the immediately follows an opening one is treated as a data character for the
class. When PCRE2_ALLOW_EMPTY_CLASS is set, it terminates the class, which class. When PCRE2_ALLOW_EMPTY_CLASS is set, it terminates the class, which
therefore contains no characters and so can never match. therefore contains no characters and so can never match.
.sp .sp
PCRE2_ALT_BSUX PCRE2_ALT_BSUX
.sp .sp
This option request alternative handling of three escape sequences, which This option request alternative handling of three escape sequences, which
makes PCRE2's behaviour more like ECMAscript (aka JavaScript). When it is set: makes PCRE2's behaviour more like ECMAscript (aka JavaScript). When it is set:
.P .P
(1) \eU matches an upper case "U" character; by default \eU causes a compile (1) \eU matches an upper case "U" character; by default \eU causes a compile
@ -996,7 +996,7 @@ documentation.
.sp .sp
If this bit is set, letters in the pattern match both upper and lower case If this bit is set, letters in the pattern match both upper and lower case
letters in the subject. It is equivalent to Perl's /i option, and it can be letters in the subject. It is equivalent to Perl's /i option, and it can be
changed within a pattern by a (?i) option setting. changed within a pattern by a (?i) option setting.
.sp .sp
PCRE2_DOLLAR_ENDONLY PCRE2_DOLLAR_ENDONLY
.sp .sp
@ -1052,7 +1052,7 @@ sequence at the start of the pattern, as described in the section entitled
.\" </a> .\" </a>
"Newline conventions" "Newline conventions"
.\" .\"
in the \fBpcre2pattern\fP documentation. A default is defined when PCRE2 is in the \fBpcre2pattern\fP documentation. A default is defined when PCRE2 is
built. built.
.sp .sp
PCRE2_FIRSTLINE PCRE2_FIRSTLINE
@ -1067,7 +1067,7 @@ If this option is set, a back reference to an unset subpattern group matches an
empty string (by default this causes the current matching alternative to fail). empty string (by default this causes the current matching alternative to fail).
A pattern such as (\e1)(a) succeeds when this option is set (assuming it can A pattern such as (\e1)(a) succeeds when this option is set (assuming it can
find an "a" in the subject), whereas it fails by default, for Perl find an "a" in the subject), whereas it fails by default, for Perl
compatibility. Setting this option makes PCRE2 behave more like ECMAscript (aka compatibility. Setting this option makes PCRE2 behave more like ECMAscript (aka
JavaScript). JavaScript).
.sp .sp
PCRE2_MULTILINE PCRE2_MULTILINE
@ -1091,10 +1091,10 @@ occurrences of ^ or $ in a pattern, setting PCRE2_MULTILINE has no effect.
PCRE2_NEVER_UCP PCRE2_NEVER_UCP
.sp .sp
This option locks out the use of Unicode properties for handling \eB, \eb, \eD, This option locks out the use of Unicode properties for handling \eB, \eb, \eD,
\ed, \eS, \es, \eW, \ew, and some of the POSIX character classes, as described \ed, \eS, \es, \eW, \ew, and some of the POSIX character classes, as described
for the PCRE2_UCP option below. In particular, it prevents the creator of the for the PCRE2_UCP option below. In particular, it prevents the creator of the
pattern from enabling this facility by starting the pattern with (*UCP). This pattern from enabling this facility by starting the pattern with (*UCP). This
may be useful in applications that process patterns from external sources. The may be useful in applications that process patterns from external sources. The
option combination PCRE_UCP and PCRE_NEVER_UCP causes an error. option combination PCRE_UCP and PCRE_NEVER_UCP causes an error.
.sp .sp
PCRE2_NEVER_UTF PCRE2_NEVER_UTF
@ -1167,7 +1167,7 @@ pattern
(*MARK:A)(X|Y) (*MARK:A)(X|Y)
.sp .sp
The minimum length for a match is one character. If the subject is "ABC", there The minimum length for a match is one character. If the subject is "ABC", there
will be attempts to match "ABC", "BC", and "C". An attempt to match an empty will be attempts to match "ABC", "BC", and "C". An attempt to match an empty
string at the end of the subject does not take place, because PCRE2 knows that string at the end of the subject does not take place, because PCRE2 knows that
the subject is now too short, and so the (*MARK) is never encountered. In this the subject is now too short, and so the (*MARK) is never encountered. In this
case, the optimization does not affect the overall match result, which is still case, the optimization does not affect the overall match result, which is still
@ -1194,7 +1194,7 @@ in the
.\" HREF .\" HREF
\fBpcre2unicode\fP \fBpcre2unicode\fP
.\" .\"
document. document.
If an invalid UTF sequence is found, \fBpcre2_compile()\fP returns a negative If an invalid UTF sequence is found, \fBpcre2_compile()\fP returns a negative
error code. error code.
.P .P
@ -1385,9 +1385,9 @@ The possible values for the second argument are defined in \fBpcre2.h\fP, and
are as follows: are as follows:
.sp .sp
PCRE2_INFO_ALLOPTIONS PCRE2_INFO_ALLOPTIONS
PCRE2_INFO_ARGOPTIONS PCRE2_INFO_ARGOPTIONS
.sp .sp
Return a copy of the pattern's options. The third argument should point to a Return a copy of the pattern's options. The third argument should point to a
\fBuint32_t\fP variable. PCRE2_INFO_ARGOPTIONS returns exactly the options that \fBuint32_t\fP variable. PCRE2_INFO_ARGOPTIONS returns exactly the options that
were passed to \fBpcre2_compile()\fP, whereas PCRE2_INFO_ALLOPTIONS returns were passed to \fBpcre2_compile()\fP, whereas PCRE2_INFO_ALLOPTIONS returns
the compile options as modified by any top-level option settings at the start the compile options as modified by any top-level option settings at the start
@ -1406,7 +1406,7 @@ alternatives begin with one of the following:
.* if PCRE2_DOTALL is set and there are no back .* if PCRE2_DOTALL is set and there are no back
references to the subpattern in which .* appears references to the subpattern in which .* appears
.sp .sp
For such patterns, the PCRE2_ANCHORED bit is set in the options returned for For such patterns, the PCRE2_ANCHORED bit is set in the options returned for
PCRE2_INFO_ALLOPTIONS. PCRE2_INFO_ALLOPTIONS.
.sp .sp
PCRE2_INFO_BACKREFMAX PCRE2_INFO_BACKREFMAX
@ -1490,7 +1490,7 @@ return zero. The third argument should point to a \fBsize_t\fP variable.
.sp .sp
Returns 1 if there is a rightmost literal code unit that must exist in any Returns 1 if there is a rightmost literal code unit that must exist in any
matched string, other than at its start. The third argument should point to an matched string, other than at its start. The third argument should point to an
\fBuint32_t\fP variable. If there is no such value, 0 is returned. When 1 is \fBuint32_t\fP variable. If there is no such value, 0 is returned. When 1 is
returned, the code unit value itself can be retrieved using returned, the code unit value itself can be retrieved using
PCRE2_INFO_LASTCODEUNIT. PCRE2_INFO_LASTCODEUNIT.
.P .P
@ -1617,7 +1617,7 @@ values are:
3 Carriage return, linefeed (CRLF) 3 Carriage return, linefeed (CRLF)
4 Any Unicode line ending 4 Any Unicode line ending
5 Any of CR, LF, or CRLF 5 Any of CR, LF, or CRLF
.sp .sp
The default can be overridden when a pattern is matched. The default can be overridden when a pattern is matched.
.sp .sp
PCRE2_INFO_RECURSIONLIMIT PCRE2_INFO_RECURSIONLIMIT
@ -1652,11 +1652,11 @@ pattern with the JIT compiler does not alter the value returned by this option.
.B void pcre2_match_data_free(pcre2_match_data *\fImatch_data\fP); .B void pcre2_match_data_free(pcre2_match_data *\fImatch_data\fP);
.fi .fi
.P .P
Information about successful and unsuccessful matches is placed in a match Information about successful and unsuccessful matches is placed in a match
data block, which is an opaque structure that is accessed by function calls. In data block, which is an opaque structure that is accessed by function calls. In
particular, the match data block contains a vector of offsets into the subject particular, the match data block contains a vector of offsets into the subject
string that define the matched part of the subject and any substrings that were string that define the matched part of the subject and any substrings that were
capured. This is know as the \fIovector\fP. capured. This is know as the \fIovector\fP.
.P .P
Before calling \fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP you must create a Before calling \fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP you must create a
match data block by calling one of the creation functions above. For match data block by calling one of the creation functions above. For
@ -1669,11 +1669,11 @@ pair is imposed by \fBpcre2_match_data_create()\fP, so it is always possible to
return the overall matched string. return the overall matched string.
.P .P
For \fBpcre2_match_data_create_from_pattern()\fP, the first argument is a For \fBpcre2_match_data_create_from_pattern()\fP, the first argument is a
pointer to a compiled pattern. In this case the ovector is created to be pointer to a compiled pattern. In this case the ovector is created to be
exactly the right size to hold all the substrings a pattern might capture. exactly the right size to hold all the substrings a pattern might capture.
.P .P
The second argument of both these functions ia a pointer to a general context, The second argument of both these functions ia a pointer to a general context,
which can specify custom memory management for obtaining the memory for the which can specify custom memory management for obtaining the memory for the
match data block. If you are not using custom memory management, pass NULL. match data block. If you are not using custom memory management, pass NULL.
.P .P
A match data block can be used many times, with the same or different compiled A match data block can be used many times, with the same or different compiled
@ -1729,8 +1729,8 @@ Here is an example of a simple call to \fBpcre2_match()\fP:
match_data, /* the match data block */ match_data, /* the match data block */
NULL); /* a match context; NULL means use defaults */ NULL); /* a match context; NULL means use defaults */
.sp .sp
If the subject string is zero-terminated, the length can be given as If the subject string is zero-terminated, the length can be given as
PCRE2_ZERO_TERMINATED. A match context must be provided if certain less common PCRE2_ZERO_TERMINATED. A match context must be provided if certain less common
matching parameters are to be changed. For details, see the section on matching parameters are to be changed. For details, see the section on
.\" HTML <a href="#matchcontext"> .\" HTML <a href="#matchcontext">
.\" </a> .\" </a>
@ -1746,7 +1746,7 @@ The subject string is passed to \fBpcre2_match()\fP as a pointer in
\fIsubject\fP, a length in \fIlength\fP, and a starting offset in \fIsubject\fP, a length in \fIlength\fP, and a starting offset in
\fIstartoffset\fP. The length and offset are in code units, not characters. \fIstartoffset\fP. The length and offset are in code units, not characters.
That is, they are in bytes for the 8-bit library, 16-bit code units for the That is, they are in bytes for the 8-bit library, 16-bit code units for the
16-bit library, and 32-bit code units for the 32-bit library, whether or not 16-bit library, and 32-bit code units for the 32-bit library, whether or not
UTF processing is enabled. UTF processing is enabled.
.P .P
If \fIstartoffset\fP is greater than the length of the subject, If \fIstartoffset\fP is greater than the length of the subject,
@ -1755,7 +1755,7 @@ zero, the search for a match starts at the beginning of the subject, and this
is by far the most common case. In UTF-8 or UTF-16 mode, the starting offset is by far the most common case. In UTF-8 or UTF-16 mode, the starting offset
must point to the start of a character, or to the end of the subject (in UTF-32 must point to the start of a character, or to the end of the subject (in UTF-32
mode, one code unit equals one character, so all offsets are valid). Like the mode, one code unit equals one character, so all offsets are valid). Like the
pattern string, the subject may contain binary zeroes. pattern string, the subject may contain binary zeroes.
.P .P
A non-zero starting offset is useful when searching for another match in the A non-zero starting offset is useful when searching for another match in the
same subject by calling \fBpcre2_match()\fP again after a previous success. same subject by calling \fBpcre2_match()\fP again after a previous success.
@ -1816,7 +1816,7 @@ JIT matching is disabled and the normal interpretive code in
The PCRE2_ANCHORED option limits \fBpcre2_match()\fP to matching at the first The PCRE2_ANCHORED option limits \fBpcre2_match()\fP to matching at the first
matching position. If a pattern was compiled with PCRE2_ANCHORED, or turned out matching position. If a pattern was compiled with PCRE2_ANCHORED, or turned out
to be anchored by virtue of its contents, it cannot be made unachored at to be anchored by virtue of its contents, it cannot be made unachored at
matching time. Note that setting the option at match time disables JIT matching time. Note that setting the option at match time disables JIT
matching. matching.
.sp .sp
PCRE2_NOTBOL PCRE2_NOTBOL
@ -1880,13 +1880,13 @@ in the
.\" HREF .\" HREF
\fBpcre2unicode\fP \fBpcre2unicode\fP
.\" .\"
page. page.
.P .P
If you know that your subject is valid, and you want to skip these checks for If you know that your subject is valid, and you want to skip these checks for
performance reasons, you can set the PCRE2_NO_UTF_CHECK option when calling performance reasons, you can set the PCRE2_NO_UTF_CHECK option when calling
\fBpcre2_match()\fP. You might want to do this for the second and subsequent \fBpcre2_match()\fP. You might want to do this for the second and subsequent
calls to \fBpcre2_match()\fP if you are making repeated calls to find all the calls to \fBpcre2_match()\fP if you are making repeated calls to find all the
matches in a single subject string. matches in a single subject string.
.P .P
NOTE: When PCRE2_NO_UTF_CHECK is set, the effect of passing an invalid string NOTE: When PCRE2_NO_UTF_CHECK is set, the effect of passing an invalid string
as a subject, or an invalid value of \fIstartoffset\fP, is undefined. Your as a subject, or an invalid value of \fIstartoffset\fP, is undefined. Your
@ -1921,10 +1921,10 @@ documentation.
. .
.SH "NEWLINE HANDLING WHEN MATCHING" .SH "NEWLINE HANDLING WHEN MATCHING"
.rs .rs
.sp .sp
When PCRE2 is built, a default newline convention is set; this is usually the When PCRE2 is built, a default newline convention is set; this is usually the
standard convention for the operating system. The default can be overridden in standard convention for the operating system. The default can be overridden in
either a either a
.\" HTML <a href="#compilecontext"> .\" HTML <a href="#compilecontext">
.\" </a> .\" </a>
compile context compile context
@ -1972,7 +1972,7 @@ valid newline sequence and explicit \er or \en escapes appear in the pattern.
.fi .fi
.P .P
In general, a pattern matches a certain portion of the subject, and in In general, a pattern matches a certain portion of the subject, and in
addition, further substrings from the subject may be picked out by addition, further substrings from the subject may be picked out by
parenthesized parts of the pattern. Following the usage in Jeffrey Friedl's parenthesized parts of the pattern. Following the usage in Jeffrey Friedl's
book, this is called "capturing" in what follows, and the phrase "capturing book, this is called "capturing" in what follows, and the phrase "capturing
subpattern" is used for a fragment of a pattern that picks out a substring. subpattern" is used for a fragment of a pattern that picks out a substring.
@ -1982,14 +1982,14 @@ used to find out how many capturing subpatterns there are in a compiled
pattern. pattern.
.P .P
The overall matched string and any captured substrings are returned to the The overall matched string and any captured substrings are returned to the
caller via a vector of PCRE2_SIZE values, called the \fBovector\fP. This is caller via a vector of PCRE2_SIZE values, called the \fBovector\fP. This is
contained within the contained within the
.\" HTML <a href="#matchdatablock"> .\" HTML <a href="#matchdatablock">
.\" </a> .\" </a>
match data block. match data block.
.\" .\"
You can obtain direct access to the ovector by calling You can obtain direct access to the ovector by calling
\fBpcre2_get_ovector_pointer()\fP to find its address, and \fBpcre2_get_ovector_pointer()\fP to find its address, and
\fBpcre2_get_ovector_count()\fP to find the number of pairs of values it \fBpcre2_get_ovector_count()\fP to find the number of pairs of values it
contains. Alternatively, you can use the auxiliary functions for accessing contains. Alternatively, you can use the auxiliary functions for accessing
captured substrings captured substrings
@ -2065,17 +2065,17 @@ had.
.B PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *\fImatch_data\fP); .B PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *\fImatch_data\fP);
.fi .fi
.P .P
In addition to the offsets in the ovector, other information about a match is In addition to the offsets in the ovector, other information about a match is
retained in the match data block and can be retrieved by the above functions. retained in the match data block and can be retrieved by the above functions.
.P .P
When a (*MARK) name is to be passed back, \fBpcre2_get_mark()\fP returns a When a (*MARK) name is to be passed back, \fBpcre2_get_mark()\fP returns a
pointer to the zero-terminated name, which is within the compiled pattern. pointer to the zero-terminated name, which is within the compiled pattern.
Otherwise NULL is returned. A (*MARK) name may be available after a failed Otherwise NULL is returned. A (*MARK) name may be available after a failed
match or a partial match, as well as after a successful one. match or a partial match, as well as after a successful one.
.P .P
The offset of the character at which the successful match started is The offset of the character at which the successful match started is
returned by \fBpcre2_get_startchar()\fP. This can be different to the value of returned by \fBpcre2_get_startchar()\fP. This can be different to the value of
\fIovector[0]\fP if the pattern contains the \eK escape sequence. Note, \fIovector[0]\fP if the pattern contains the \eK escape sequence. Note,
however, the \eK has no effect for a partial match. however, the \eK has no effect for a partial match.
. .
. .
@ -2083,7 +2083,7 @@ however, the \eK has no effect for a partial match.
.SS "Error return values from \fBpcre2_match()\fP" .SS "Error return values from \fBpcre2_match()\fP"
.rs .rs
.sp .sp
If \fBpcre2_match()\fP fails, it returns a negative number. This can be If \fBpcre2_match()\fP fails, it returns a negative number. This can be
converted to a text string by calling \fBpcre2_get_error_message()\fP. Negative converted to a text string by calling \fBpcre2_get_error_message()\fP. Negative
error codes are also returned by other functions, and are documented with them. error codes are also returned by other functions, and are documented with them.
The codes are given names in the header file. If UTF checking is in force and The codes are given names in the header file. If UTF checking is in force and
@ -2237,7 +2237,7 @@ extracting it by calling \fBpcre2_substring_length_bynumber()\fP. The first
argument is a pointer to the match data block, the second is the group number, argument is a pointer to the match data block, the second is the group number,
and the third is a pointer to a variable into which the length is placed. and the third is a pointer to a variable into which the length is placed.
.P .P
The \fBpcre2_substring_copy_bynumber()\fP function copies one string into a The \fBpcre2_substring_copy_bynumber()\fP function copies one string into a
supplied buffer, whereas \fBpcre2_substring_get_bynumber()\fP copies it into supplied buffer, whereas \fBpcre2_substring_get_bynumber()\fP copies it into
new memory, obtained using the same memory allocation function that was used new memory, obtained using the same memory allocation function that was used
for the match data block. The first two arguments of these functions are a for the match data block. The first two arguments of these functions are a
@ -2250,10 +2250,10 @@ the buffer and a pointer to a variable that contains its length in code units.
This is updated to contain the actual number of code units used, excluding the This is updated to contain the actual number of code units used, excluding the
terminating zero. terminating zero.
.P .P
For \fBpcre2_substring_get_bynumber()\fP the third and fourth arguments point For \fBpcre2_substring_get_bynumber()\fP the third and fourth arguments point
to variables that are updated with a pointer to the new memory and the number to variables that are updated with a pointer to the new memory and the number
of code units that comprise the substring, again excluding the terminating of code units that comprise the substring, again excluding the terminating
zero. When the substring is no longer needed, the memory should be freed by zero. When the substring is no longer needed, the memory should be freed by
calling \fBpcre2_substring_free()\fP. calling \fBpcre2_substring_free()\fP.
.P .P
The return value from these functions is zero for success, or one of these The return value from these functions is zero for success, or one of these
@ -2266,9 +2266,9 @@ attempt to get memory failed for \fBpcre2_substring_get_bynumber()\fP.
.sp .sp
PCRE2_ERROR_NOSUBSTRING PCRE2_ERROR_NOSUBSTRING
.sp .sp
No substring with the given number was captured. This could be because there is No substring with the given number was captured. This could be because there is
no capturing group of that number in the pattern, or because the group with no capturing group of that number in the pattern, or because the group with
that number did not participate in the match, or because the ovector was too that number did not participate in the match, or because the ovector was too
small to capture that group. small to capture that group.
. .
. .
@ -2284,7 +2284,7 @@ small to capture that group.
.P .P
The \fBpcre2_substring_list_get()\fP function extracts all available substrings The \fBpcre2_substring_list_get()\fP function extracts all available substrings
and builds a list of pointers to them, and a second list that contains their and builds a list of pointers to them, and a second list that contains their
lengths (in code units), excluding a terminating zero that is added to each of lengths (in code units), excluding a terminating zero that is added to each of
them. All this is done in a single block of memory that is obtained using the them. All this is done in a single block of memory that is obtained using the
same memory allocation function that was used to get the match data block. same memory allocation function that was used to get the match data block.
.P .P
@ -2295,7 +2295,7 @@ NULL pointer. The address of the list of lengths is returned via
therefore need the lengths, you may supply NULL as the \fBlengthsptr\fP therefore need the lengths, you may supply NULL as the \fBlengthsptr\fP
argument to disable the creation of a list of lengths. The yield of the argument to disable the creation of a list of lengths. The yield of the
function is zero if all went well, or PCRE2_ERROR_NOMEMORY if the memory block function is zero if all went well, or PCRE2_ERROR_NOMEMORY if the memory block
could not be obtained. When the list is no longer needed, it should be freed by could not be obtained. When the list is no longer needed, it should be freed by
calling \fBpcre2_substring_list_free()\fP. calling \fBpcre2_substring_list_free()\fP.
.P .P
If this function encounters a substring that is unset, which can happen when If this function encounters a substring that is unset, which can happen when
@ -2340,7 +2340,7 @@ name.
.P .P
Given the number, you can extract the substring directly, or use one of the Given the number, you can extract the substring directly, or use one of the
functions described in the previous section. For convenience, there are also functions described in the previous section. For convenience, there are also
"byname" functions that correspond to the "bynumber" functions, the only "byname" functions that correspond to the "bynumber" functions, the only
difference being that the second argument is a name instead of a number. difference being that the second argument is a name instead of a number.
However, if PCRE2_DUPNAMES is set and there are duplicate names, However, if PCRE2_DUPNAMES is set and there are duplicate names,
the behaviour may not be what you want (see the next section). the behaviour may not be what you want (see the next section).
@ -2413,7 +2413,7 @@ numbers, and hence the captured data.
.sp .sp
The traditional matching function uses a similar algorithm to Perl, which stops The traditional matching function uses a similar algorithm to Perl, which stops
when it finds the first match, starting at a given point in the subject. If you when it finds the first match, starting at a given point in the subject. If you
want to find all possible matches, or the longest possible match at a given want to find all possible matches, or the longest possible match at a given
position, consider using the alternative matching function (see below) instead. position, consider using the alternative matching function (see below) instead.
If you cannot use the alternative function, you can kludge it up by making use If you cannot use the alternative function, you can kludge it up by making use
of the callout facility, which is described in the of the callout facility, which is described in the
@ -2614,8 +2614,8 @@ fail, this error is given.
.SH "SEE ALSO" .SH "SEE ALSO"
.rs .rs
.sp .sp
\fBpcre2build\fP(3), \fBpcre2libs\fP(3), \fBpcre2callout\fP(3), \fBpcre2build\fP(3), \fBpcre2libs\fP(3), \fBpcre2callout\fP(3),
\fBpcre2matching\fP(3), \fBpcre2partial\fP(3), \fBpcre2posix\fP(3), \fBpcre2matching\fP(3), \fBpcre2partial\fP(3), \fBpcre2posix\fP(3),
\fBpcre2demo(3)\fP, \fBpcre2sample\fP(3), \fBpcre2stack\fP(3). \fBpcre2demo(3)\fP, \fBpcre2sample\fP(3), \fBpcre2stack\fP(3).
. .
. .

View File

@ -71,11 +71,11 @@ single-byte characters, or UTF-8 strings. You can also build two other
libraries, called \fBlibpcre2-16\fP and \fBlibpcre2-32\fP, which process libraries, called \fBlibpcre2-16\fP and \fBlibpcre2-32\fP, which process
strings that are contained in vectors of 16-bit and 32-bit code units, strings that are contained in vectors of 16-bit and 32-bit code units,
respectively. These can be interpreted either as single-unit characters or respectively. These can be interpreted either as single-unit characters or
UTF-16/UTF-32 strings. To build these additional libraries, add one or both of UTF-16/UTF-32 strings. To build these additional libraries, add one or both of
the following to the \fBconfigure\fP command: the following to the \fBconfigure\fP command:
.sp .sp
--enable-pcre16 --enable-pcre16
--enable-pcre32 --enable-pcre32
.sp .sp
If you do not want the 8-bit library, add If you do not want the 8-bit library, add
.sp .sp
@ -367,7 +367,7 @@ override this value by specifying a run-time option.
If you add one of If you add one of
.sp .sp
--enable-pcre2test-libreadline --enable-pcre2test-libreadline
--enable-pcre2test-libedit --enable-pcre2test-libedit
.sp .sp
to the \fBconfigure\fP command, \fBpcre2test\fP is linked with the to the \fBconfigure\fP command, \fBpcre2test\fP is linked with the
\fBlibreadline\fP or\fBlibedit\fP library, respectively, and when its input is \fBlibreadline\fP or\fBlibedit\fP library, respectively, and when its input is
@ -384,8 +384,8 @@ unmodified distribution version of readline is in use), some extra
configuration may be necessary. The INSTALL file for \fBlibreadline\fP says configuration may be necessary. The INSTALL file for \fBlibreadline\fP says
this: this:
.sp .sp
"Readline uses the termcap functions, but does not link with "Readline uses the termcap functions, but does not link with
the termcap or curses library itself, allowing applications the termcap or curses library itself, allowing applications
which link with readline the to choose an appropriate library." which link with readline the to choose an appropriate library."
.sp .sp
If your environment has not been set up so that an appropriate library is If your environment has not been set up so that an appropriate library is

View File

@ -16,9 +16,9 @@ PCRE2 provides a feature called "callout", which is a means of temporarily
passing control to the caller of PCRE2 in the middle of pattern matching. The passing control to the caller of PCRE2 in the middle of pattern matching. The
caller of PCRE2 provides an external function by putting its entry point in caller of PCRE2 provides an external function by putting its entry point in
a match context (see \fBpcre2_set_callout()\fP) in the a match context (see \fBpcre2_set_callout()\fP) in the
.\" HREF .\" HREF
\fBpcre2api\fP \fBpcre2api\fP
.\" .\"
documentation). documentation).
.P .P
Within a regular expression, (?C) indicates the points at which the external Within a regular expression, (?C) indicates the points at which the external

View File

@ -25,7 +25,7 @@ pcre2sample documentation for a short discussion ("man pcre2sample" if you have
the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is
incompatible with the original PCRE API. incompatible with the original PCRE API.
There are actually three libraries, each supporting a different code unit There are actually three libraries, each supporting a different code unit
width. This demonstration program uses the 8-bit library. width. This demonstration program uses the 8-bit library.
In Unix-like environments, if PCRE2 is installed in your standard system In Unix-like environments, if PCRE2 is installed in your standard system
@ -56,8 +56,8 @@ the following line. */
/* #define PCRE2_STATIC */ /* #define PCRE2_STATIC */
/* This macro must be defined before including pcre2.h. For a program that uses /* This macro must be defined before including pcre2.h. For a program that uses
only one code unit width, it makes it possible to use generic function names only one code unit width, it makes it possible to use generic function names
such as pcre2_compile(). */ such as pcre2_compile(). */
#define PCRE2_CODE_UNIT_WIDTH 8 #define PCRE2_CODE_UNIT_WIDTH 8
@ -141,7 +141,7 @@ subject_length = strlen((char *)subject);
re = pcre2_compile( re = pcre2_compile(
pattern, /* the pattern */ pattern, /* the pattern */
PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */ PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
0, /* default options */ 0, /* default options */
&errornumber, /* for error number */ &errornumber, /* for error number */
&erroroffset, /* for error offset */ &erroroffset, /* for error offset */
@ -151,9 +151,9 @@ re = pcre2_compile(
if (re == NULL) if (re == NULL)
{ {
PCRE2_UCHAR buffer[256]; PCRE2_UCHAR buffer[256];
pcre2_get_error_message(errornumber, buffer, sizeof(buffer)); pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
printf("PCRE2 compilation failed at offset %d: %s\en", (int)erroroffset, printf("PCRE2 compilation failed at offset %d: %s\en", (int)erroroffset,
buffer); buffer);
return 1; return 1;
} }
@ -197,7 +197,7 @@ if (rc < 0)
return 1; return 1;
} }
/* Match succeded. Get a pointer to the output vector, where string offsets are /* Match succeded. Get a pointer to the output vector, where string offsets are
stored. */ stored. */
ovector = pcre2_get_ovector_pointer(match_data); ovector = pcre2_get_ovector_pointer(match_data);
@ -210,7 +210,7 @@ printf("\enMatch succeeded at offset %d\en", (int)ovector[0]);
* captured. * * captured. *
*************************************************************************/ *************************************************************************/
/* The output vector wasn't big enough. This should not happen, because we used /* The output vector wasn't big enough. This should not happen, because we used
pcre2_match_data_create_from_pattern() above. */ pcre2_match_data_create_from_pattern() above. */
if (rc == 0) if (rc == 0)
@ -261,7 +261,7 @@ if (namecount <= 0) printf("No named substrings\en"); else
&name_entry_size); /* where to put the answer */ &name_entry_size); /* where to put the answer */
/* Now we can scan the table and, for each entry, print the number, the name, /* Now we can scan the table and, for each entry, print the number, the name,
and the substring itself. In the 8-bit library the number is held in two and the substring itself. In the 8-bit library the number is held in two
bytes, most significant first. */ bytes, most significant first. */
tabptr = name_table; tabptr = name_table;
@ -306,7 +306,7 @@ if (namecount <= 0) printf("No named substrings\en"); else
if (!find_all) /* Check for -g */ if (!find_all) /* Check for -g */
{ {
pcre2_match_data_free(match_data); /* Release the memory that was used */ pcre2_match_data_free(match_data); /* Release the memory that was used */
pcre2_code_free(re); /* for the match data and the pattern. */ pcre2_code_free(re); /* for the match data and the pattern. */
return 0; /* Exit the program. */ return 0; /* Exit the program. */
} }
@ -324,7 +324,7 @@ sequence. */
(void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &newline); (void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &newline);
crlf_is_newline = newline == PCRE2_NEWLINE_ANY || crlf_is_newline = newline == PCRE2_NEWLINE_ANY ||
newline == PCRE2_NEWLINE_CRLF || newline == PCRE2_NEWLINE_CRLF ||
newline == PCRE2_NEWLINE_ANYCRLF; newline == PCRE2_NEWLINE_ANYCRLF;
/* Loop for second and subsequent matches */ /* Loop for second and subsequent matches */

View File

@ -48,10 +48,10 @@ performance, there is also a "fast path" API that is JIT-specific.
.SH "SIMPLE USE OF JIT" .SH "SIMPLE USE OF JIT"
.rs .rs
.sp .sp
To make use of the JIT support in the simplest way, all you have to do is to To make use of the JIT support in the simplest way, all you have to do is to
call \fBpcre2_jit_compile()\fP after successfully compiling a pattern with call \fBpcre2_jit_compile()\fP after successfully compiling a pattern with
\fBpcre2_compile()\fP. This function has two arguments: the first is the \fBpcre2_compile()\fP. This function has two arguments: the first is the
compiled pattern pointer that was returned by \fBpcre2_compile()\fP, and the compiled pattern pointer that was returned by \fBpcre2_compile()\fP, and the
second is a set of option bits, which must include at least one of second is a set of option bits, which must include at least one of
PCRE2_JIT_COMPLETE, PCRE2_JIT_PARTIAL_HARD, or PCRE2_JIT_PARTIAL_SOFT. PCRE2_JIT_COMPLETE, PCRE2_JIT_PARTIAL_HARD, or PCRE2_JIT_PARTIAL_SOFT.
.P .P
@ -221,7 +221,7 @@ non-default JIT stacks might operate:
.sp .sp
All the functions described in this section do nothing if JIT is not available, All the functions described in this section do nothing if JIT is not available,
and \fBpcre2_jit_stack_assign()\fP does nothing unless the \fBcode\fP argument and \fBpcre2_jit_stack_assign()\fP does nothing unless the \fBcode\fP argument
is non-NULL and points to a \fBpcre2_code\fP block that has been successfully is non-NULL and points to a \fBpcre2_code\fP block that has been successfully
processed by \fBpcre2_jit_compile()\fP. processed by \fBpcre2_jit_compile()\fP.
. .
. .
@ -302,18 +302,18 @@ callback.
.sp .sp
int rc; int rc;
pcre2_code *re; pcre2_code *re;
pcre2_match_data *match_data; pcre2_match_data *match_data;
pcre2_jit_stack *jit_stack; pcre2_jit_stack *jit_stack;
.sp .sp
re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0, re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0,
&errornumber, &erroffset, NULL); &errornumber, &erroffset, NULL);
/* Check for errors */ /* Check for errors */
rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE); rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
/* Check for errors */ /* Check for errors */
jit_stack = pcre2_jit_stack_alloc(NULL, 32*1024, 512*1024); jit_stack = pcre2_jit_stack_alloc(NULL, 32*1024, 512*1024);
/* Check for error (NULL) */ /* Check for error (NULL) */
pcre2_jit_stack_assign(re, NULL, jit_stack); pcre2_jit_stack_assign(re, NULL, jit_stack);
match_data = pcre2_match_data_create(re, 10); match_data = pcre2_match_data_create(re, 10);
rc = pcre2_match(re, subject, length, 0, 0, match_data, NULL); rc = pcre2_match(re, subject, length, 0, 0, match_data, NULL);
/* Check results */ /* Check results */
pcre2_free(re); pcre2_free(re);

View File

@ -64,15 +64,15 @@ matched; without such a restriction there would always be a partial match of an
empty string at the end of the subject. empty string at the end of the subject.
.P .P
When a partial match is returned, the first two elements in the ovector point When a partial match is returned, the first two elements in the ovector point
to the portion of the subject that was matched. The appearance of \eK in the to the portion of the subject that was matched. The appearance of \eK in the
pattern has no effect for a partial match. Consider this pattern: pattern has no effect for a partial match. Consider this pattern:
.sp .sp
/abc\eK123/ /abc\eK123/
.sp .sp
If it is matched against "456abc123xyz" the result is a complete match, and the If it is matched against "456abc123xyz" the result is a complete match, and the
ovector defines the matched string as "123", because \eK resets the "start of ovector defines the matched string as "123", because \eK resets the "start of
match" point. However, if a partial match is requested and the subject string match" point. However, if a partial match is requested and the subject string
is "456abc12", a partial match is found for the string "abc12", because all is "456abc12", a partial match is found for the string "abc12", because all
these characters are needed for a subsequent re-match with additional these characters are needed for a subsequent re-match with additional
characters. characters.
.P .P
@ -316,14 +316,14 @@ same point as before.
For example, if the pattern "(?<=123)abc" is partially matched against the For example, if the pattern "(?<=123)abc" is partially matched against the
string "xx123ab", the ovector offsets are 5 and 7 ("ab"). The maximum string "xx123ab", the ovector offsets are 5 and 7 ("ab"). The maximum
lookbehind count is 3, so all characters before offset 2 can be discarded. The lookbehind count is 3, so all characters before offset 2 can be discarded. The
value of \fBstartoffset\fP for the next match should be 3. When \fBpcre2test\fP value of \fBstartoffset\fP for the next match should be 3. When \fBpcre2test\fP
displays a partial match, it indicates the lookbehind characters with '<' displays a partial match, it indicates the lookbehind characters with '<'
characters: characters:
.sp .sp
re> "(?<=123)abc" re> "(?<=123)abc"
data> xx123ab\e=ph data> xx123ab\e=ph
Partial match: 123ab Partial match: 123ab
<<< <<<
.P .P
3. Because a partial match must always contain at least one character, what 3. Because a partial match must always contain at least one character, what
might be considered a partial match of an empty string actually gives a "no might be considered a partial match of an empty string actually gives a "no

View File

@ -118,7 +118,7 @@ page has
.\" </a> .\" </a>
further discussion further discussion
.\" .\"
about newlines, and shows how to set the newline convention when calling about newlines, and shows how to set the newline convention when calling
\fBpcre2_compile()\fP. \fBpcre2_compile()\fP.
.P .P
It is also possible to specify a newline convention by starting a pattern It is also possible to specify a newline convention by starting a pattern
@ -196,7 +196,7 @@ corresponding characters in the subject. As a trivial example, the pattern
.sp .sp
matches a portion of a subject string that is identical to itself. When matches a portion of a subject string that is identical to itself. When
caseless matching is specified (the PCRE2_CASELESS option), letters are matched caseless matching is specified (the PCRE2_CASELESS option), letters are matched
independently of case. independently of case.
.P .P
The power of regular expressions comes from the ability to include alternatives The power of regular expressions comes from the ability to include alternatives
and repetitions in the pattern. These are encoded in the pattern by the use of and repetitions in the pattern. These are encoded in the pattern by the use of
@ -1199,8 +1199,8 @@ An opening square bracket introduces a character class, terminated by a closing
square bracket. A closing square bracket on its own is not special by default. square bracket. A closing square bracket on its own is not special by default.
If a closing square bracket is required as a member of the class, it should be If a closing square bracket is required as a member of the class, it should be
the first data character in the class (after an initial circumflex, if present) the first data character in the class (after an initial circumflex, if present)
or escaped with a backslash. This means that, by default, an empty class cannot or escaped with a backslash. This means that, by default, an empty class cannot
be defined. However, if the PCRE2_ALLOW_EMPTY_CLASS option is set, a closing be defined. However, if the PCRE2_ALLOW_EMPTY_CLASS option is set, a closing
square bracket at the start does end the (empty) class. square bracket at the start does end the (empty) class.
.P .P
A character class matches a single character in the subject. A matched A character class matches a single character in the subject. A matched
@ -1221,7 +1221,7 @@ string.
When caseless matching is set, any letters in a class represent both their When caseless matching is set, any letters in a class represent both their
upper case and lower case versions, so for example, a caseless [aeiou] matches upper case and lower case versions, so for example, a caseless [aeiou] matches
"A" as well as "a", and a caseless [^aeiou] does not match "A", whereas a "A" as well as "a", and a caseless [^aeiou] does not match "A", whereas a
caseful version would. caseful version would.
.P .P
Characters that might indicate line breaks are never treated in any special way Characters that might indicate line breaks are never treated in any special way
when matching character classes, whatever line-ending sequence is in use, and when matching character classes, whatever line-ending sequence is in use, and
@ -1340,7 +1340,7 @@ classes by other sequences, as follows:
[:alnum:] becomes \ep{Xan} [:alnum:] becomes \ep{Xan}
[:alpha:] becomes \ep{L} [:alpha:] becomes \ep{L}
[:blank:] becomes \eh [:blank:] becomes \eh
[:cntrl:] becomes \ep{Cc} [:cntrl:] becomes \ep{Cc}
[:digit:] becomes \ep{Nd} [:digit:] becomes \ep{Nd}
[:lower:] becomes \ep{Ll} [:lower:] becomes \ep{Ll}
[:space:] becomes \ep{Xps} [:space:] becomes \ep{Xps}
@ -1496,7 +1496,7 @@ match "cataract", "erpillar" or an empty string.
.sp .sp
2. It sets up the subpattern as a capturing subpattern. This means that, when 2. It sets up the subpattern as a capturing subpattern. This means that, when
the whole pattern matches, the portion of the subject string that matched the the whole pattern matches, the portion of the subject string that matched the
subpattern is passed back to the caller, separately from the portion that subpattern is passed back to the caller, separately from the portion that
matched the whole pattern. (This applies only to the traditional matching matched the whole pattern. (This applies only to the traditional matching
function; the DFA matching function does not support capturing.) function; the DFA matching function does not support capturing.)
.P .P
@ -1916,7 +1916,7 @@ at release 5.10.
PCRE2 has an optimization that automatically "possessifies" certain simple PCRE2 has an optimization that automatically "possessifies" certain simple
pattern constructs. For example, the sequence A+B is treated as A++B because pattern constructs. For example, the sequence A+B is treated as A++B because
there is no point in backtracking into a sequence of A's when B must follow. there is no point in backtracking into a sequence of A's when B must follow.
This feature can be disabled by the PCRE2_NO_AUTOPOSSESS option, or starting This feature can be disabled by the PCRE2_NO_AUTOPOSSESS option, or starting
the pattern with (*NO_AUTO_POSSESS). the pattern with (*NO_AUTO_POSSESS).
.P .P
When a pattern contains an unlimited repeat inside a subpattern that can itself When a pattern contains an unlimited repeat inside a subpattern that can itself
@ -2238,7 +2238,7 @@ if the pattern is written as
.sp .sp
^.*+(?<=abcd) ^.*+(?<=abcd)
.sp .sp
there can be no backtracking for the .*+ item because of the possessive there can be no backtracking for the .*+ item because of the possessive
quantifier; it can match only the entire string. The subsequent lookbehind quantifier; it can match only the entire string. The subsequent lookbehind
assertion does a single test on the last four characters. If it fails, the assertion does a single test on the last four characters. If it fails, the
match fails immediately. For long strings, this approach makes a significant match fails immediately. For long strings, this approach makes a significant
@ -2754,8 +2754,8 @@ same pair of parentheses when there is a repetition.
.P .P
PCRE2 provides a similar feature, but of course it cannot obey arbitrary Perl PCRE2 provides a similar feature, but of course it cannot obey arbitrary Perl
code. The feature is called "callout". The caller of PCRE2 provides an external code. The feature is called "callout". The caller of PCRE2 provides an external
function by putting its entry point in a match context using the function function by putting its entry point in a match context using the function
\fBpcre2_set_callout()\fP and passing the context to \fBpcre2_match()\fP or \fBpcre2_set_callout()\fP and passing the context to \fBpcre2_match()\fP or
\fBpcre2_dfa_match()\fP. If no match context is passed, or if the callout entry \fBpcre2_dfa_match()\fP. If no match context is passed, or if the callout entry
point is set to NULL, callouts are disabled. point is set to NULL, callouts are disabled.
.P .P
@ -3008,7 +3008,7 @@ output from \fBpcre2test\fP:
re> /(*COMMIT)abc/ re> /(*COMMIT)abc/
data> xyzabc data> xyzabc
0: abc 0: abc
data> data>
re> /(*COMMIT)abc/no_start_optimize re> /(*COMMIT)abc/no_start_optimize
data> xyzabc data> xyzabc
No match No match
@ -3035,7 +3035,7 @@ as (*COMMIT).
.P .P
The behaviour of (*PRUNE:NAME) is the not the same as (*MARK:NAME)(*PRUNE). The behaviour of (*PRUNE:NAME) is the not the same as (*MARK:NAME)(*PRUNE).
It is like (*MARK:NAME) in that the name is remembered for passing back to the It is like (*MARK:NAME) in that the name is remembered for passing back to the
caller. However, (*SKIP:NAME) searches only for names set with (*MARK), caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
ignoring those set by (*PRUNE) or (*THEN). ignoring those set by (*PRUNE) or (*THEN).
.sp .sp
(*SKIP) (*SKIP)
@ -3085,7 +3085,7 @@ group. If (*THEN) is not inside an alternation, it acts like (*PRUNE).
.P .P
The behaviour of (*THEN:NAME) is the not the same as (*MARK:NAME)(*THEN). The behaviour of (*THEN:NAME) is the not the same as (*MARK:NAME)(*THEN).
It is like (*MARK:NAME) in that the name is remembered for passing back to the It is like (*MARK:NAME) in that the name is remembered for passing back to the
caller. However, (*SKIP:NAME) searches only for names set with (*MARK), caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
ignoring those set by (*PRUNE) and (*THEN). ignoring those set by (*PRUNE) and (*THEN).
.P .P
A subpattern that does not contain a | character is just a part of the A subpattern that does not contain a | character is just a part of the

View File

@ -90,10 +90,10 @@ of how to do this are given in the
\fBpcre2build\fP \fBpcre2build\fP
.\" .\"
documentation. When built in this way, instead of using the stack, PCRE2 documentation. When built in this way, instead of using the stack, PCRE2
gets memory for remembering backup points from the heap. By default, the memory gets memory for remembering backup points from the heap. By default, the memory
is obtained by calling the system \fBmalloc()\fP function, but you can arrange is obtained by calling the system \fBmalloc()\fP function, but you can arrange
to supply your own memory management function. For details, see the section to supply your own memory management function. For details, see the section
entitled entitled
.\" HTML <a href="pcre2api.html#matchcontext"> .\" HTML <a href="pcre2api.html#matchcontext">
.\" </a> .\" </a>
"The match context" "The match context"
@ -104,8 +104,8 @@ in the
.\" .\"
documentation. Since the block sizes are always the same, it may be possible to documentation. Since the block sizes are always the same, it may be possible to
implement customized a memory handler that is more efficient than the standard implement customized a memory handler that is more efficient than the standard
function. The memory blocks obtained for this purpose are retained and re-used function. The memory blocks obtained for this purpose are retained and re-used
if possible while \fBpcre2_match()\fP is running. They are all freed just if possible while \fBpcre2_match()\fP is running. They are all freed just
before it exits. before it exits.
. .
. .

View File

@ -387,7 +387,7 @@ appear.
(*LIMIT_MATCH=d) set the match limit to d (decimal number) (*LIMIT_MATCH=d) set the match limit to d (decimal number)
(*LIMIT_RECURSION=d) set the recursion limit to d (decimal number) (*LIMIT_RECURSION=d) set the recursion limit to d (decimal number)
(*NOTEMPTY) set PCRE2_NOTEMPTY when matching (*NOTEMPTY) set PCRE2_NOTEMPTY when matching
(*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching (*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching
(*NO_AUTO_POSSESS) no auto-possessification (PCRE2_NO_AUTO_POSSESS) (*NO_AUTO_POSSESS) no auto-possessification (PCRE2_NO_AUTO_POSSESS)
(*NO_START_OPT) no start-match optimization (PCRE2_NO_START_OPTIMIZE) (*NO_START_OPT) no start-match optimization (PCRE2_NO_START_OPTIMIZE)
(*UTF) set appropriate UTF mode for the library in use (*UTF) set appropriate UTF mode for the library in use

View File

@ -433,7 +433,7 @@ about the pattern:
/I info show info about compiled pattern /I info show info about compiled pattern
hex pattern is coded in hexadecimal hex pattern is coded in hexadecimal
jit[=<number>] use JIT jit[=<number>] use JIT
jitverify verify JIT use jitverify verify JIT use
locale=<name> use this locale locale=<name> use this locale
memory show memory used memory show memory used
newline=<type> set newline type newline=<type> set newline type
@ -518,7 +518,7 @@ number in the range 0 to 7:
7 all three modes 7 all three modes
.sp .sp
If no number is given, 7 is assumed. If JIT compilation is successful, the If no number is given, 7 is assumed. If JIT compilation is successful, the
compiled JIT code will automatically be used when \fBpcre2_match()\fP is run compiled JIT code will automatically be used when \fBpcre2_match()\fP is run
for the appropriate type of match, except when incompatible run-time options for the appropriate type of match, except when incompatible run-time options
are specified. For more details, see the are specified. For more details, see the
.\" HREF .\" HREF
@ -670,7 +670,7 @@ for a description of their effects.
partial_hard (or ph) set PCRE2_PARTIAL_HARD partial_hard (or ph) set PCRE2_PARTIAL_HARD
partial_soft (or ps) set PCRE2_PARTIAL_SOFT partial_soft (or ps) set PCRE2_PARTIAL_SOFT
.sp .sp
The partial matching modifiers are provided with abbreviations because they The partial matching modifiers are provided with abbreviations because they
appear frequently in tests. appear frequently in tests.
.P .P
If the \fB/posix\fP modifier was present on the pattern, causing the POSIX If the \fB/posix\fP modifier was present on the pattern, causing the POSIX
@ -844,8 +844,8 @@ context via \fBpcre2_set_match_limit()\fP and \fBpcre2_set_recursion_limit()\fP
until it finds the minimum values for each parameter that allow until it finds the minimum values for each parameter that allow
\fBpcre2_match()\fP to complete without error. \fBpcre2_match()\fP to complete without error.
.P .P
If JIT is being used, only the match limit is relevant. If DFA matching is If JIT is being used, only the match limit is relevant. If DFA matching is
being used, neither limit is relevant, and this modifier is ignored (with a being used, neither limit is relevant, and this modifier is ignored (with a
warning message). warning message).
.P .P
The \fImatch_limit\fP number is a measure of the amount of backtracking The \fImatch_limit\fP number is a measure of the amount of backtracking
@ -890,10 +890,10 @@ appears, though of course it can also be used to set a default in a
\fB#subject\fP command. It specifies the number of pairs of offsets that are \fB#subject\fP command. It specifies the number of pairs of offsets that are
available for storing matching information. The default is 15. available for storing matching information. The default is 15.
.P .P
At least one pair of offsets is always created by At least one pair of offsets is always created by
\fBpcre2_match_data_create()\fP, for matching with PCRE2's native API, so a \fBpcre2_match_data_create()\fP, for matching with PCRE2's native API, so a
value of 0 is the same as 1. However a value of 0 is useful when testing the value of 0 is the same as 1. However a value of 0 is useful when testing the
POSIX API because it causes \fBregexec()\fP to be called with a NULL capture POSIX API because it causes \fBregexec()\fP to be called with a NULL capture
vector. vector.
. .
. .

View File

@ -57,7 +57,7 @@ individual code units.
In UTF modes, the dot metacharacter matches one UTF character instead of a In UTF modes, the dot metacharacter matches one UTF character instead of a
single code unit. single code unit.
.P .P
The escape sequence \eC can be used to match a single code unit, in a UTF mode, The escape sequence \eC can be used to match a single code unit, in a UTF mode,
but its use can lead to some strange effects because it breaks up multi-unit but its use can lead to some strange effects because it breaks up multi-unit
characters (see the description of \eC in the characters (see the description of \eC in the
.\" HREF .\" HREF
@ -107,8 +107,8 @@ case-equivalent, and these are treated as such.
.rs .rs
.sp .sp
When the PCRE2_UTF option is set, the strings passed as patterns and subjects When the PCRE2_UTF option is set, the strings passed as patterns and subjects
are (by default) checked for validity on entry to the relevant functions. are (by default) checked for validity on entry to the relevant functions.
If an invalid UTF string is passed, an error return is given. If an invalid UTF string is passed, an error return is given.
.P .P
UTF-16 and UTF-32 strings can indicate their endianness by special code knows UTF-16 and UTF-32 strings can indicate their endianness by special code knows
as a byte-order mark (BOM). The PCRE2 functions do not handle this, expecting as a byte-order mark (BOM). The PCRE2 functions do not handle this, expecting

View File

@ -82,13 +82,13 @@ for (;;)
chomp($pattern); chomp($pattern);
$pattern =~ s/\s+$//; $pattern =~ s/\s+$//;
# Split the pattern from the modifiers and adjust them as necessary. # Split the pattern from the modifiers and adjust them as necessary.
$pattern =~ /^\s*((.).*\2)(.*)$/s; $pattern =~ /^\s*((.).*\2)(.*)$/s;
$pat = $1; $pat = $1;
$mod = $3; $mod = $3;
# The private "aftertext" modifier means "print $' afterwards". # The private "aftertext" modifier means "print $' afterwards".
$showrest = ($mod =~ s/aftertext,?//); $showrest = ($mod =~ s/aftertext,?//);
@ -131,9 +131,9 @@ for (;;)
for (;;) for (;;)
{ {
last if ! ($_ = <$infile>); last if ! ($_ = <$infile>);
last if $_ =~ /^\s*$/; last if $_ =~ /^\s*$/;
} }
} }
next NEXT_RE; next NEXT_RE;
} }

View File

@ -41,7 +41,7 @@ POSSIBILITY OF SUCH DAMAGE.
/* This is a freestanding support program to generate a file containing /* This is a freestanding support program to generate a file containing
character tables for PCRE2. The tables are built according to the current character tables for PCRE2. The tables are built according to the current
locale using the pcre2_maketables() function, which is part of the PCRE2 API. locale using the pcre2_maketables() function, which is part of the PCRE2 API.
*/ */
#ifdef HAVE_CONFIG_H #ifdef HAVE_CONFIG_H

View File

@ -38,7 +38,7 @@ POSSIBILITY OF SUCH DAMAGE.
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
*/ */
/* This module contains functions that scan a compiled pattern and change /* This module contains functions that scan a compiled pattern and change
repeats into possessive repeats where possible. */ repeats into possessive repeats where possible. */
@ -359,8 +359,8 @@ Returns: points to the start of the next opcode if *code is accepted
NULL if *code is not accepted NULL if *code is not accepted
*/ */
static PCRE2_SPTR static PCRE2_SPTR
get_chr_property_list(PCRE2_SPTR code, BOOL utf, const uint8_t *fcc, get_chr_property_list(PCRE2_SPTR code, BOOL utf, const uint8_t *fcc,
uint32_t *list) uint32_t *list)
{ {
PCRE2_UCHAR c = *code; PCRE2_UCHAR c = *code;
@ -387,7 +387,7 @@ if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
if (c == OP_UPTO || c == OP_MINUPTO || c == OP_EXACT || c == OP_POSUPTO) if (c == OP_UPTO || c == OP_MINUPTO || c == OP_EXACT || c == OP_POSUPTO)
code += IMM2_SIZE; code += IMM2_SIZE;
list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT && list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT &&
c != OP_POSPLUS); c != OP_POSPLUS);
switch(base) switch(base)
@ -595,7 +595,7 @@ for(;;)
Therefore infinite recursions are not possible. */ Therefore infinite recursions are not possible. */
c = *code; c = *code;
/* Skip over callouts */ /* Skip over callouts */
if (c == OP_CALLOUT) if (c == OP_CALLOUT)
@ -624,7 +624,7 @@ for(;;)
/* If the bracket is capturing, and referenced by an OP_RECURSE, or /* If the bracket is capturing, and referenced by an OP_RECURSE, or
it is an atomic sub-pattern (assert, once, etc.) the non-greedy case it is an atomic sub-pattern (assert, once, etc.) the non-greedy case
cannot be converted to a possessive form. */ cannot be converted to a possessive form. */
if (base_list[1] == 0) return FALSE; if (base_list[1] == 0) return FALSE;
switch(*(code - GET(code, 1))) switch(*(code - GET(code, 1)))
@ -636,7 +636,7 @@ for(;;)
case OP_ONCE: case OP_ONCE:
case OP_ONCE_NC: case OP_ONCE_NC:
/* Atomic sub-patterns and assertions can always auto-possessify their /* Atomic sub-patterns and assertions can always auto-possessify their
last iterator. However, if the group was entered as a result of checking last iterator. However, if the group was entered as a result of checking
a previous iterator, this is not possible. */ a previous iterator, this is not possible. */
return !entered_a_group; return !entered_a_group;
@ -672,7 +672,7 @@ for(;;)
do next_code += GET(next_code, 1); while (*next_code == OP_ALT); do next_code += GET(next_code, 1); while (*next_code == OP_ALT);
/* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */ /* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */
next_code += 1 + LINK_SIZE; next_code += 1 + LINK_SIZE;
if (!compare_opcodes(next_code, utf, cb, base_list, base_end)) if (!compare_opcodes(next_code, utf, cb, base_list, base_end))
return FALSE; return FALSE;
@ -681,14 +681,14 @@ for(;;)
continue; continue;
default: default:
break; break;
} }
/* Check for a supported opcode, and load its properties. */ /* Check for a supported opcode, and load its properties. */
code = get_chr_property_list(code, utf, cb->fcc, list); code = get_chr_property_list(code, utf, cb->fcc, list);
if (code == NULL) return FALSE; /* Unsupported */ if (code == NULL) return FALSE; /* Unsupported */
/* If either opcode is a small character list, set pointers for comparing /* If either opcode is a small character list, set pointers for comparing
characters from that list with another list, or with a property. */ characters from that list with another list, or with a property. */
@ -778,7 +778,7 @@ for(;;)
/* Because the bit sets are unaligned bytes, we need to perform byte /* Because the bit sets are unaligned bytes, we need to perform byte
comparison here. */ comparison here. */
set_end = set1 + 32; set_end = set1 + 32;
if (invert_bits) if (invert_bits)
{ {
@ -922,7 +922,7 @@ for(;;)
autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP]; autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP];
if (!accepted) return FALSE; if (!accepted) return FALSE;
if (list[1] == 0) return TRUE; if (list[1] == 0) return TRUE;
/* Might be an empty repeat. */ /* Might be an empty repeat. */
continue; continue;
@ -1093,8 +1093,8 @@ but some compilers complain about an unreachable statement. */
if appropriate. This function modifies the compiled opcode! if appropriate. This function modifies the compiled opcode!
Arguments: Arguments:
code points to start of the byte code code points to start of the byte code
utf TRUE in UTF mode utf TRUE in UTF mode
cb compile data block cb compile data block
Returns: nothing Returns: nothing
@ -1111,7 +1111,7 @@ uint32_t list[8];
for (;;) for (;;)
{ {
c = *code; c = *code;
if (c >= OP_STAR && c <= OP_TYPEPOSUPTO) if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
{ {
c -= get_repeat_base(c) - OP_STAR; c -= get_repeat_base(c) - OP_STAR;
@ -1244,7 +1244,7 @@ for (;;)
} }
/* Add in the fixed length from the table */ /* Add in the fixed length from the table */
code += PRIV(OP_lengths)[c]; code += PRIV(OP_lengths)[c];
/* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be

View File

@ -594,7 +594,7 @@ static pso pso_list[] = {
{ (uint8_t *)STRING_UTF_RIGHTPAR, 4, PSO_OPT, PCRE2_UTF }, { (uint8_t *)STRING_UTF_RIGHTPAR, 4, PSO_OPT, PCRE2_UTF },
{ (uint8_t *)STRING_UCP_RIGHTPAR, 4, PSO_OPT, PCRE2_UCP }, { (uint8_t *)STRING_UCP_RIGHTPAR, 4, PSO_OPT, PCRE2_UCP },
{ (uint8_t *)STRING_NOTEMPTY_RIGHTPAR, 9, PSO_FLG, PCRE2_NOTEMPTY_SET }, { (uint8_t *)STRING_NOTEMPTY_RIGHTPAR, 9, PSO_FLG, PCRE2_NOTEMPTY_SET },
{ (uint8_t *)STRING_NOTEMPTY_ATSTART_RIGHTPAR,17, PSO_FLG, PCRE2_NE_ATST_SET }, { (uint8_t *)STRING_NOTEMPTY_ATSTART_RIGHTPAR,17, PSO_FLG, PCRE2_NE_ATST_SET },
{ (uint8_t *)STRING_NO_AUTO_POSSESS_RIGHTPAR, 16, PSO_OPT, PCRE2_NO_AUTO_POSSESS }, { (uint8_t *)STRING_NO_AUTO_POSSESS_RIGHTPAR, 16, PSO_OPT, PCRE2_NO_AUTO_POSSESS },
{ (uint8_t *)STRING_NO_START_OPT_RIGHTPAR, 13, PSO_OPT, PCRE2_NO_START_OPTIMIZE }, { (uint8_t *)STRING_NO_START_OPT_RIGHTPAR, 13, PSO_OPT, PCRE2_NO_START_OPTIMIZE },
{ (uint8_t *)STRING_LIMIT_MATCH_EQ, 12, PSO_LIMM, 0 }, { (uint8_t *)STRING_LIMIT_MATCH_EQ, 12, PSO_LIMM, 0 },
@ -675,12 +675,12 @@ static const uint8_t opcode_possessify[] = {
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_code_free(pcre2_code *code) pcre2_code_free(pcre2_code *code)
{ {
if (code != NULL) if (code != NULL)
{ {
if (code->executable_jit != NULL) if (code->executable_jit != NULL)
PRIV(jit_free)(code->executable_jit, &code->memctl); PRIV(jit_free)(code->executable_jit, &code->memctl);
code->memctl.free(code, code->memctl.memory_data); code->memctl.free(code, code->memctl.memory_data);
} }
} }
@ -4462,7 +4462,7 @@ for (;; ptr++)
syntax, so we just ignore the repeat. */ syntax, so we just ignore the repeat. */
if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_FALSE && if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_FALSE &&
previous[GET(previous, 1)] != OP_ALT) previous[GET(previous, 1)] != OP_ALT)
goto END_REPEAT; goto END_REPEAT;
/* There is no sense in actually repeating assertions. The only potential /* There is no sense in actually repeating assertions. The only potential
@ -5169,64 +5169,64 @@ for (;; ptr++)
namelen = -1; /* => not a name; must set to avoid warning */ namelen = -1; /* => not a name; must set to avoid warning */
name = NULL; /* Always set to avoid warning */ name = NULL; /* Always set to avoid warning */
recno = 0; /* Always set to avoid warning */ recno = 0; /* Always set to avoid warning */
/* Point at character after (?( */ /* Point at character after (?( */
ptr++; ptr++;
/* Check for (?(VERSION[>]=n.m), which is a facility whereby indirect /* Check for (?(VERSION[>]=n.m), which is a facility whereby indirect
users of PCRE2 via an application can discover which release of PCRE2 users of PCRE2 via an application can discover which release of PCRE2
is being used. */ is being used. */
if (PRIV(strncmp_c8)(ptr, STRING_VERSION, 7) == 0 && if (PRIV(strncmp_c8)(ptr, STRING_VERSION, 7) == 0 &&
ptr[7] != CHAR_RIGHT_PARENTHESIS) ptr[7] != CHAR_RIGHT_PARENTHESIS)
{ {
BOOL ge = FALSE; BOOL ge = FALSE;
int major = 0; int major = 0;
int minor = 0; int minor = 0;
ptr += 7; ptr += 7;
if (*ptr == CHAR_GREATER_THAN_SIGN) if (*ptr == CHAR_GREATER_THAN_SIGN)
{ {
ge = TRUE; ge = TRUE;
ptr++; ptr++;
} }
/* NOTE: cannot write IS_DIGIT(*(++ptr)) here because IS_DIGIT /* NOTE: cannot write IS_DIGIT(*(++ptr)) here because IS_DIGIT
references its argument twice. */ references its argument twice. */
if (*ptr != CHAR_EQUALS_SIGN || (ptr++, !IS_DIGIT(*ptr))) if (*ptr != CHAR_EQUALS_SIGN || (ptr++, !IS_DIGIT(*ptr)))
{ {
*errorcodeptr = ERR79; *errorcodeptr = ERR79;
goto FAILED; goto FAILED;
} }
while (IS_DIGIT(*ptr)) major = major * 10 + *ptr++ - '0'; while (IS_DIGIT(*ptr)) major = major * 10 + *ptr++ - '0';
if (*ptr == CHAR_DOT) if (*ptr == CHAR_DOT)
{ {
ptr++; ptr++;
while (IS_DIGIT(*ptr)) minor = minor * 10 + *ptr++ - '0'; while (IS_DIGIT(*ptr)) minor = minor * 10 + *ptr++ - '0';
} }
if (*ptr != CHAR_RIGHT_PARENTHESIS) if (*ptr != CHAR_RIGHT_PARENTHESIS)
{ {
*errorcodeptr = ERR79; *errorcodeptr = ERR79;
goto FAILED; goto FAILED;
} }
if (ge) if (ge)
code[1+LINK_SIZE] = ((PCRE2_MAJOR > major) || code[1+LINK_SIZE] = ((PCRE2_MAJOR > major) ||
(PCRE2_MAJOR == major && PCRE2_MINOR >= minor))? (PCRE2_MAJOR == major && PCRE2_MINOR >= minor))?
OP_TRUE : OP_FALSE; OP_TRUE : OP_FALSE;
else else
code[1+LINK_SIZE] = (PCRE2_MAJOR == major && PCRE2_MINOR == minor)? code[1+LINK_SIZE] = (PCRE2_MAJOR == major && PCRE2_MINOR == minor)?
OP_TRUE : OP_FALSE; OP_TRUE : OP_FALSE;
ptr++; ptr++;
skipbytes = 1; skipbytes = 1;
break; /* End of condition processing */ break; /* End of condition processing */
} }
/* Check for a test for recursion in a named group. */ /* Check for a test for recursion in a named group. */
if (*ptr == CHAR_R && ptr[1] == CHAR_AMPERSAND) if (*ptr == CHAR_R && ptr[1] == CHAR_AMPERSAND)
@ -5404,8 +5404,8 @@ for (;; ptr++)
} }
/* Similarly, check for the (?(DEFINE) "condition", which is always /* Similarly, check for the (?(DEFINE) "condition", which is always
false. During compilation we set OP_DEFINE to distinguish this from false. During compilation we set OP_DEFINE to distinguish this from
other OP_FALSE conditions so that it can be checked for having only one other OP_FALSE conditions so that it can be checked for having only one
branch, but after that the opcode is changed to OP_FALSE. */ branch, but after that the opcode is changed to OP_FALSE. */
else if (namelen == 6 && PRIV(strncmp_c8)(name, STRING_DEFINE, 6) == 0) else if (namelen == 6 && PRIV(strncmp_c8)(name, STRING_DEFINE, 6) == 0)
@ -6133,7 +6133,7 @@ for (;; ptr++)
while (*tc != OP_KET); while (*tc != OP_KET);
/* A DEFINE group is never obeyed inline (the "condition" is always /* A DEFINE group is never obeyed inline (the "condition" is always
false). It must have only one branch. Having checked this, change the false). It must have only one branch. Having checked this, change the
opcode to OP_FALSE. */ opcode to OP_FALSE. */
if (code[LINK_SIZE+1] == OP_DEFINE) if (code[LINK_SIZE+1] == OP_DEFINE)
@ -6143,7 +6143,7 @@ for (;; ptr++)
*errorcodeptr = ERR54; *errorcodeptr = ERR54;
goto FAILED; goto FAILED;
} }
code[LINK_SIZE+1] = OP_FALSE; code[LINK_SIZE+1] = OP_FALSE;
bravalue = OP_DEFINE; /* Just a flag to suppress char handling below */ bravalue = OP_DEFINE; /* Just a flag to suppress char handling below */
} }
@ -6219,7 +6219,7 @@ for (;; ptr++)
than one can replicate it as reqcu if necessary. If the subpattern has than one can replicate it as reqcu if necessary. If the subpattern has
no firstcu, set "none" for the whole branch. In both cases, a zero no firstcu, set "none" for the whole branch. In both cases, a zero
repeat forces firstcu to "none". */ repeat forces firstcu to "none". */
if (firstcuflags == REQ_UNSET && subfirstcuflags != REQ_UNSET) if (firstcuflags == REQ_UNSET && subfirstcuflags != REQ_UNSET)
{ {
if (subfirstcuflags >= 0) if (subfirstcuflags >= 0)
@ -6759,7 +6759,7 @@ for (;;)
reqcu = firstcu; reqcu = firstcu;
reqcuflags = firstcuflags; reqcuflags = firstcuflags;
} }
} }
firstcuflags = REQ_NONE; firstcuflags = REQ_NONE;
} }
@ -7389,12 +7389,12 @@ if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0)
/* A NULL compile context means "use a default context" */ /* A NULL compile context means "use a default context" */
if (ccontext == NULL) if (ccontext == NULL)
ccontext = (pcre2_compile_context *)(&PRIV(default_compile_context)); ccontext = (pcre2_compile_context *)(&PRIV(default_compile_context));
/* A zero-terminated pattern is indicated by the special length value /* A zero-terminated pattern is indicated by the special length value
PCRE2_ZERO_TERMINATED. Otherwise, we make a copy of the pattern and add a zero, PCRE2_ZERO_TERMINATED. Otherwise, we make a copy of the pattern and add a zero,
to ensure that it is always possible to look one code unit beyond the end of to ensure that it is always possible to look one code unit beyond the end of
the pattern's characters. */ the pattern's characters. */
if (patlen == PCRE2_ZERO_TERMINATED) patlen = PRIV(strlen)(pattern); else if (patlen == PCRE2_ZERO_TERMINATED) patlen = PRIV(strlen)(pattern); else
@ -7481,19 +7481,19 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
case PSO_OPT: case PSO_OPT:
cb.external_options |= p->value; cb.external_options |= p->value;
break; break;
case PSO_FLG: case PSO_FLG:
setflags |= p->value; setflags |= p->value;
break; break;
case PSO_NL: case PSO_NL:
newline = p->value; newline = p->value;
setflags |= PCRE2_NL_SET; setflags |= PCRE2_NL_SET;
break; break;
case PSO_BSR: case PSO_BSR:
bsr = p->value; bsr = p->value;
setflags |= PCRE2_BSR_SET; setflags |= PCRE2_BSR_SET;
break; break;
case PSO_LIMM: case PSO_LIMM:
@ -7883,8 +7883,8 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0 &&
/* If the pattern is still not anchored and we do not have a first code unit, /* If the pattern is still not anchored and we do not have a first code unit,
see if there is one that is asserted (these are not saved during the compile see if there is one that is asserted (these are not saved during the compile
because they can cause conflicts with actual literals that follow). This code because they can cause conflicts with actual literals that follow). This code
need not be obeyed if PCRE2_NO_START_OPTIMIZE is set, as the data it would need not be obeyed if PCRE2_NO_START_OPTIMIZE is set, as the data it would
create will not be used. */ create will not be used. */
if ((re->overall_options & (PCRE2_ANCHORED|PCRE2_NO_START_OPTIMIZE)) == 0) if ((re->overall_options & (PCRE2_ANCHORED|PCRE2_NO_START_OPTIMIZE)) == 0)
@ -7930,7 +7930,7 @@ if ((re->overall_options & (PCRE2_ANCHORED|PCRE2_NO_START_OPTIMIZE)) == 0)
} }
/* Handle the "required code unit", if one is set. In the case of an anchored /* Handle the "required code unit", if one is set. In the case of an anchored
pattern, do this only if it follows a variable length item in the pattern. pattern, do this only if it follows a variable length item in the pattern.
Again, skip this if PCRE2_NO_START_OPTIMIZE is set. */ Again, skip this if PCRE2_NO_START_OPTIMIZE is set. */
if (reqcuflags >= 0 && if (reqcuflags >= 0 &&
@ -7973,7 +7973,7 @@ while (*codestart == OP_ALT);
to set up information such as a bitmap of starting code units and a minimum to set up information such as a bitmap of starting code units and a minimum
matching length. */ matching length. */
if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 &&
PRIV(study)(re) != 0) PRIV(study)(re) != 0)
{ {
errorcode = ERR31; errorcode = ERR31;

View File

@ -42,7 +42,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "config.h" #include "config.h"
#endif #endif
/* Save the configured link size, which is in bytes. In 16-bit and 32-bit modes /* Save the configured link size, which is in bytes. In 16-bit and 32-bit modes
its value gets changed by pcre2_internal.h to be in code units. */ its value gets changed by pcre2_internal.h to be in code units. */
static int configured_link_size = LINK_SIZE; static int configured_link_size = LINK_SIZE;
@ -69,7 +69,7 @@ Arguments:
Returns: 0 if data returned Returns: 0 if data returned
>= 0 if where is NULL, giving length required >= 0 if where is NULL, giving length required
PCRE2_ERROR_BADOPTION if "where" not recognized PCRE2_ERROR_BADOPTION if "where" not recognized
or JIT target requested when JIT not enabled or JIT target requested when JIT not enabled
*/ */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
@ -80,33 +80,33 @@ if (where == NULL) /* Requests a length */
switch(what) switch(what)
{ {
default: default:
return PCRE2_ERROR_BADOPTION; return PCRE2_ERROR_BADOPTION;
case PCRE2_CONFIG_BSR: case PCRE2_CONFIG_BSR:
case PCRE2_CONFIG_JIT: case PCRE2_CONFIG_JIT:
case PCRE2_CONFIG_LINKSIZE: case PCRE2_CONFIG_LINKSIZE:
case PCRE2_CONFIG_NEWLINE: case PCRE2_CONFIG_NEWLINE:
case PCRE2_CONFIG_STACKRECURSE: case PCRE2_CONFIG_STACKRECURSE:
case PCRE2_CONFIG_UNICODE: case PCRE2_CONFIG_UNICODE:
return sizeof(int); return sizeof(int);
case PCRE2_CONFIG_MATCHLIMIT: case PCRE2_CONFIG_MATCHLIMIT:
case PCRE2_CONFIG_PARENSLIMIT: case PCRE2_CONFIG_PARENSLIMIT:
case PCRE2_CONFIG_RECURSIONLIMIT: case PCRE2_CONFIG_RECURSIONLIMIT:
return sizeof(long int); return sizeof(long int);
/* These are handled below */ /* These are handled below */
case PCRE2_CONFIG_JITTARGET: case PCRE2_CONFIG_JITTARGET:
case PCRE2_CONFIG_UNICODE_VERSION: case PCRE2_CONFIG_UNICODE_VERSION:
case PCRE2_CONFIG_VERSION: case PCRE2_CONFIG_VERSION:
break; break;
} }
} }
switch (what) switch (what)
{ {
default: default:
return PCRE2_ERROR_BADOPTION; return PCRE2_ERROR_BADOPTION;
case PCRE2_CONFIG_BSR: case PCRE2_CONFIG_BSR:
@ -129,9 +129,9 @@ switch (what)
#ifdef SUPPORT_JIT #ifdef SUPPORT_JIT
{ {
const char *v = PRIV(jit_get_target)(); const char *v = PRIV(jit_get_target)();
return (where == NULL)? (int)strlen(v) : return (where == NULL)? (int)strlen(v) :
PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v); PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v);
} }
#else #else
return PCRE2_ERROR_BADOPTION; return PCRE2_ERROR_BADOPTION;
#endif #endif
@ -163,9 +163,9 @@ switch (what)
*((int *)where) = 1; *((int *)where) = 1;
#endif #endif
break; break;
case PCRE2_CONFIG_UNICODE_VERSION: case PCRE2_CONFIG_UNICODE_VERSION:
{ {
#if defined SUPPORT_UNICODE #if defined SUPPORT_UNICODE
const char *v = PRIV(unicode_version); const char *v = PRIV(unicode_version);
#else #else
@ -183,15 +183,15 @@ switch (what)
*((int *)where) = 0; *((int *)where) = 0;
#endif #endif
break; break;
/* The hackery in setting "v" below is to cope with the case when /* The hackery in setting "v" below is to cope with the case when
PCRE2_PRERELEASE is set to an empty string (which it is for real releases). PCRE2_PRERELEASE is set to an empty string (which it is for real releases).
If the second alternative is used in this case, it does not leave a space If the second alternative is used in this case, it does not leave a space
before the date. On the other hand, if all four macros are put into a single before the date. On the other hand, if all four macros are put into a single
XSTRING when PCRE2_PRERELEASE is not empty, an unwanted space is inserted. XSTRING when PCRE2_PRERELEASE is not empty, an unwanted space is inserted.
There are problems using an "obvious" approach like this: There are problems using an "obvious" approach like this:
XSTRING(PCRE2_MAJOR) "." XSTRING(PCRE_MINOR) XSTRING(PCRE2_MAJOR) "." XSTRING(PCRE_MINOR)
XSTRING(PCRE2_PRERELEASE) " " XSTRING(PCRE_DATE) XSTRING(PCRE2_PRERELEASE) " " XSTRING(PCRE_DATE)
because, when PCRE2_PRERELEASE is empty, this leads to an attempted expansion because, when PCRE2_PRERELEASE is empty, this leads to an attempted expansion
@ -199,18 +199,18 @@ switch (what)
argument consists of no preprocessing tokens, the behavior is undefined." It argument consists of no preprocessing tokens, the behavior is undefined." It
turns out the gcc treats this case as a single empty string - which is what turns out the gcc treats this case as a single empty string - which is what
we really want - but Visual C grumbles about the lack of an argument for the we really want - but Visual C grumbles about the lack of an argument for the
macro. Unfortunately, both are within their rights. As there seems to be no macro. Unfortunately, both are within their rights. As there seems to be no
way to test for a macro's value being empty at compile time, we have to way to test for a macro's value being empty at compile time, we have to
resort to a runtime test. */ resort to a runtime test. */
case PCRE2_CONFIG_VERSION: case PCRE2_CONFIG_VERSION:
{ {
const char *v = (XSTRING(Z PCRE2_PRERELEASE)[1] == 0)? const char *v = (XSTRING(Z PCRE2_PRERELEASE)[1] == 0)?
XSTRING(PCRE2_MAJOR.PCRE2_MINOR PCRE2_DATE) : XSTRING(PCRE2_MAJOR.PCRE2_MINOR PCRE2_DATE) :
XSTRING(PCRE2_MAJOR.PCRE2_MINOR) XSTRING(PCRE2_PRERELEASE PCRE2_DATE); XSTRING(PCRE2_MAJOR.PCRE2_MINOR) XSTRING(PCRE2_PRERELEASE PCRE2_DATE);
return (where == NULL)? (int)strlen(v) : return (where == NULL)? (int)strlen(v) :
PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v); PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v);
} }
} }
return 0; return 0;

View File

@ -72,15 +72,15 @@ free(block);
* Get a block and save memory control * * Get a block and save memory control *
*************************************************/ *************************************************/
/* This internal function is called to get a block of memory in which the /* This internal function is called to get a block of memory in which the
memory control data is to be stored at the start for future use. memory control data is to be stored at the start for future use.
Arguments: Arguments:
size amount of memory required size amount of memory required
memctl pointer to a memctl block or NULL memctl pointer to a memctl block or NULL
Returns: pointer to memory or NULL on failure Returns: pointer to memory or NULL on failure
*/ */
PCRE2_EXP_DEFN void * PCRE2_EXP_DEFN void *
PRIV(memctl_malloc)(size_t size, pcre2_memctl *memctl) PRIV(memctl_malloc)(size_t size, pcre2_memctl *memctl)
@ -88,7 +88,7 @@ PRIV(memctl_malloc)(size_t size, pcre2_memctl *memctl)
pcre2_memctl *newmemctl; pcre2_memctl *newmemctl;
void *yield = (memctl == NULL)? malloc(size) : void *yield = (memctl == NULL)? malloc(size) :
memctl->malloc(size, memctl->memory_data); memctl->malloc(size, memctl->memory_data);
if (yield == NULL) return NULL; if (yield == NULL) return NULL;
newmemctl = (pcre2_memctl *)yield; newmemctl = (pcre2_memctl *)yield;
if (memctl == NULL) if (memctl == NULL)
{ {
@ -96,9 +96,9 @@ if (memctl == NULL)
newmemctl->free = default_free; newmemctl->free = default_free;
newmemctl->memory_data = NULL; newmemctl->memory_data = NULL;
} }
else *newmemctl = *memctl; else *newmemctl = *memctl;
return yield; return yield;
} }
@ -108,11 +108,11 @@ return yield;
/* Initializing for compile and match contexts is done in separate, private /* Initializing for compile and match contexts is done in separate, private
functions so that these can be called from functions such as pcre2_compile() functions so that these can be called from functions such as pcre2_compile()
when an external context is not supplied. The initializing functions have an when an external context is not supplied. The initializing functions have an
option to set up default memory management. */ option to set up default memory management. */
PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION
pcre2_general_context_create(void *(*private_malloc)(size_t, void *), pcre2_general_context_create(void *(*private_malloc)(size_t, void *),
void (*private_free)(void *, void *), void *memory_data) void (*private_free)(void *, void *), void *memory_data)
{ {
pcre2_general_context *gcontext; pcre2_general_context *gcontext;
@ -121,7 +121,7 @@ if (private_free == NULL) private_free = default_free;
gcontext = private_malloc(sizeof(pcre2_real_general_context), memory_data); gcontext = private_malloc(sizeof(pcre2_real_general_context), memory_data);
if (gcontext == NULL) return NULL; if (gcontext == NULL) return NULL;
gcontext->memctl.malloc = private_malloc; gcontext->memctl.malloc = private_malloc;
gcontext->memctl.free = private_free; gcontext->memctl.free = private_free;
gcontext->memctl.memory_data = memory_data; gcontext->memctl.memory_data = memory_data;
return gcontext; return gcontext;
} }
@ -136,7 +136,7 @@ const pcre2_compile_context PRIV(default_compile_context) = {
PRIV(default_tables), PRIV(default_tables),
BSR_DEFAULT, BSR_DEFAULT,
NEWLINE_DEFAULT, NEWLINE_DEFAULT,
PARENS_NEST_LIMIT }; PARENS_NEST_LIMIT };
/* The create function copies the default into the new memory, but must /* The create function copies the default into the new memory, but must
override the default memory handling functions if a gcontext was provided. */ override the default memory handling functions if a gcontext was provided. */
@ -145,8 +145,8 @@ PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION
pcre2_compile_context_create(pcre2_general_context *gcontext) pcre2_compile_context_create(pcre2_general_context *gcontext)
{ {
pcre2_compile_context *ccontext = PRIV(memctl_malloc)( pcre2_compile_context *ccontext = PRIV(memctl_malloc)(
sizeof(pcre2_real_compile_context), (pcre2_memctl *)gcontext); sizeof(pcre2_real_compile_context), (pcre2_memctl *)gcontext);
if (ccontext == NULL) return NULL; if (ccontext == NULL) return NULL;
*ccontext = PRIV(default_compile_context); *ccontext = PRIV(default_compile_context);
if (gcontext != NULL) if (gcontext != NULL)
*((pcre2_memctl *)ccontext) = *((pcre2_memctl *)gcontext); *((pcre2_memctl *)ccontext) = *((pcre2_memctl *)gcontext);
@ -159,14 +159,14 @@ when no context is supplied to a match function. */
const pcre2_match_context PRIV(default_match_context) = { const pcre2_match_context PRIV(default_match_context) = {
{ default_malloc, default_free, NULL }, { default_malloc, default_free, NULL },
#ifdef HEAP_MATCH_RECURSE #ifdef HEAP_MATCH_RECURSE
{ default_malloc, default_free, NULL }, { default_malloc, default_free, NULL },
#endif #endif
NULL, NULL,
NULL, NULL,
MATCH_LIMIT, MATCH_LIMIT,
MATCH_LIMIT_RECURSION }; MATCH_LIMIT_RECURSION };
/* The create function copies the default into the new memory, but must /* The create function copies the default into the new memory, but must
override the default memory handling functions if a gcontext was provided. */ override the default memory handling functions if a gcontext was provided. */
@ -174,8 +174,8 @@ PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION
pcre2_match_context_create(pcre2_general_context *gcontext) pcre2_match_context_create(pcre2_general_context *gcontext)
{ {
pcre2_match_context *mcontext = PRIV(memctl_malloc)( pcre2_match_context *mcontext = PRIV(memctl_malloc)(
sizeof(pcre2_real_match_context), (pcre2_memctl *)gcontext); sizeof(pcre2_real_match_context), (pcre2_memctl *)gcontext);
if (mcontext == NULL) return NULL; if (mcontext == NULL) return NULL;
*mcontext = PRIV(default_match_context); *mcontext = PRIV(default_match_context);
if (gcontext != NULL) if (gcontext != NULL)
*((pcre2_memctl *)mcontext) = *((pcre2_memctl *)gcontext); *((pcre2_memctl *)mcontext) = *((pcre2_memctl *)gcontext);
@ -190,8 +190,8 @@ return mcontext;
PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION
pcre2_general_context_copy(pcre2_general_context *gcontext) pcre2_general_context_copy(pcre2_general_context *gcontext)
{ {
pcre2_general_context *new = pcre2_general_context *new =
gcontext->memctl.malloc(sizeof(pcre2_real_general_context), gcontext->memctl.malloc(sizeof(pcre2_real_general_context),
gcontext->memctl.memory_data); gcontext->memctl.memory_data);
if (new == NULL) return NULL; if (new == NULL) return NULL;
memcpy(new, gcontext, sizeof(pcre2_real_general_context)); memcpy(new, gcontext, sizeof(pcre2_real_general_context));
@ -202,8 +202,8 @@ return new;
PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION
pcre2_compile_context_copy(pcre2_compile_context *ccontext) pcre2_compile_context_copy(pcre2_compile_context *ccontext)
{ {
pcre2_compile_context *new = pcre2_compile_context *new =
ccontext->memctl.malloc(sizeof(pcre2_real_compile_context), ccontext->memctl.malloc(sizeof(pcre2_real_compile_context),
ccontext->memctl.memory_data); ccontext->memctl.memory_data);
if (new == NULL) return NULL; if (new == NULL) return NULL;
memcpy(new, ccontext, sizeof(pcre2_real_compile_context)); memcpy(new, ccontext, sizeof(pcre2_real_compile_context));
@ -214,8 +214,8 @@ return new;
PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION
pcre2_match_context_copy(pcre2_match_context *mcontext) pcre2_match_context_copy(pcre2_match_context *mcontext)
{ {
pcre2_match_context *new = pcre2_match_context *new =
mcontext->memctl.malloc(sizeof(pcre2_real_match_context), mcontext->memctl.malloc(sizeof(pcre2_real_match_context),
mcontext->memctl.memory_data); mcontext->memctl.memory_data);
if (new == NULL) return NULL; if (new == NULL) return NULL;
memcpy(new, mcontext, sizeof(pcre2_real_match_context)); memcpy(new, mcontext, sizeof(pcre2_real_match_context));
@ -267,14 +267,14 @@ data. */
/* ------------ Compile contexts ------------ */ /* ------------ Compile contexts ------------ */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_character_tables(pcre2_compile_context *ccontext, pcre2_set_character_tables(pcre2_compile_context *ccontext,
const unsigned char *tables) const unsigned char *tables)
{ {
ccontext->tables = tables; ccontext->tables = tables;
return 0; return 0;
} }
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_bsr(pcre2_compile_context *ccontext, uint32_t value) pcre2_set_bsr(pcre2_compile_context *ccontext, uint32_t value)
{ {
switch(value) switch(value)
@ -283,13 +283,13 @@ switch(value)
case PCRE2_BSR_UNICODE: case PCRE2_BSR_UNICODE:
ccontext->bsr_convention = value; ccontext->bsr_convention = value;
return 0; return 0;
default: default:
return PCRE2_ERROR_BADDATA; return PCRE2_ERROR_BADDATA;
} }
} }
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_newline(pcre2_compile_context *ccontext, uint32_t newline) pcre2_set_newline(pcre2_compile_context *ccontext, uint32_t newline)
{ {
switch(newline) switch(newline)
@ -301,10 +301,10 @@ switch(newline)
case PCRE2_NEWLINE_ANYCRLF: case PCRE2_NEWLINE_ANYCRLF:
ccontext->newline_convention = newline; ccontext->newline_convention = newline;
return 0; return 0;
default: default:
return PCRE2_ERROR_BADDATA; return PCRE2_ERROR_BADDATA;
} }
} }
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
@ -315,7 +315,7 @@ return 0;
} }
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext, pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
int (*guard)(uint32_t)) int (*guard)(uint32_t))
{ {
ccontext->stack_guard = guard; ccontext->stack_guard = guard;
@ -325,8 +325,8 @@ return 0;
/* ------------ Match contexts ------------ */ /* ------------ Match contexts ------------ */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_callout(pcre2_match_context *mcontext, pcre2_set_callout(pcre2_match_context *mcontext,
int (*callout)(pcre2_callout_block *), void *callout_data) int (*callout)(pcre2_callout_block *), void *callout_data)
{ {
mcontext->callout = callout; mcontext->callout = callout;
@ -349,8 +349,8 @@ return 0;
} }
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_recursion_memory_management(pcre2_match_context *mcontext, pcre2_set_recursion_memory_management(pcre2_match_context *mcontext,
void *(*mymalloc)(size_t, void *), void (*myfree)(void *, void *), void *(*mymalloc)(size_t, void *), void (*myfree)(void *, void *),
void *mydata) void *mydata)
{ {
#ifdef HEAP_MATCH_RECURSE #ifdef HEAP_MATCH_RECURSE
@ -364,6 +364,6 @@ mcontext->stack_memctl.memory_data = mydata;
(void)mydata; (void)mydata;
#endif #endif
return 0; return 0;
} }
/* End of pcre2_context.c */ /* End of pcre2_context.c */

View File

@ -376,7 +376,7 @@ stateblock *next_active_state, *next_new_state;
const uint8_t *ctypes, *lcc, *fcc; const uint8_t *ctypes, *lcc, *fcc;
PCRE2_SPTR ptr; PCRE2_SPTR ptr;
PCRE2_SPTR end_code; PCRE2_SPTR end_code;
PCRE2_SPTR first_op; PCRE2_SPTR first_op;
dfa_recursion_info new_recursive; dfa_recursion_info new_recursive;
@ -542,8 +542,8 @@ for (;;)
BOOL partial_newline = FALSE; BOOL partial_newline = FALSE;
BOOL could_continue = reset_could_continue; BOOL could_continue = reset_could_continue;
reset_could_continue = FALSE; reset_could_continue = FALSE;
if (ptr > mb->last_used_ptr) mb->last_used_ptr = ptr; if (ptr > mb->last_used_ptr) mb->last_used_ptr = ptr;
/* Make the new state list into the active state list and empty the /* Make the new state list into the active state list and empty the
new state list. */ new state list. */
@ -633,7 +633,7 @@ for (;;)
/* If this opcode inspects a character, but we are at the end of the /* If this opcode inspects a character, but we are at the end of the
subject, remember the fact for use when testing for a partial match. */ subject, remember the fact for use when testing for a partial match. */
if (clen == 0 && poptable[codevalue] != 0) if (clen == 0 && poptable[codevalue] != 0)
could_continue = TRUE; could_continue = TRUE;
@ -975,7 +975,7 @@ for (;;)
if (utf) { FORWARDCHARTEST(temp, mb->end_subject); } if (utf) { FORWARDCHARTEST(temp, mb->end_subject); }
#endif #endif
mb->last_used_ptr = temp; mb->last_used_ptr = temp;
} }
#ifdef SUPPORT_UNICODE #ifdef SUPPORT_UNICODE
if ((mb->poptions & PCRE2_UCP) != 0) if ((mb->poptions & PCRE2_UCP) != 0)
{ {
@ -2643,7 +2643,7 @@ for (;;)
if (condcode == OP_FALSE) if (condcode == OP_FALSE)
{ ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); } { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
/* There is also an always-true condition */ /* There is also an always-true condition */
if (condcode == OP_TRUE) if (condcode == OP_TRUE)
@ -2999,7 +2999,7 @@ for (;;)
The "could_continue" variable is true if a state could have continued but The "could_continue" variable is true if a state could have continued but
for the fact that the end of the subject was reached. */ for the fact that the end of the subject was reached. */
if (new_count <= 0) if (new_count <= 0)
{ {
if (rlevel == 1 && /* Top level, and */ if (rlevel == 1 && /* Top level, and */
@ -3098,7 +3098,7 @@ if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
/* Plausibility checks */ /* Plausibility checks */
if ((options & ~PUBLIC_DFA_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION; if ((options & ~PUBLIC_DFA_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
if (re == NULL || subject == NULL || workspace == NULL || match_data == NULL) if (re == NULL || subject == NULL || workspace == NULL || match_data == NULL)
return PCRE2_ERROR_NULL; return PCRE2_ERROR_NULL;
if (wscount < 20) return PCRE2_ERROR_DFA_WSSIZE; if (wscount < 20) return PCRE2_ERROR_DFA_WSSIZE;
if (start_offset > length) return PCRE2_ERROR_BADOFFSET; if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
@ -3127,19 +3127,19 @@ with different endianness. */
if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8) if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
return PCRE2_ERROR_BADMODE; return PCRE2_ERROR_BADMODE;
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the /* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
options variable for this function. Users of PCRE2 who are not calling the options variable for this function. Users of PCRE2 who are not calling the
function directly would like to have a way of setting these flags, in the same function directly would like to have a way of setting these flags, in the same
way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be (*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be
transferred to the options for this function. The bits are guaranteed to be transferred to the options for this function. The bits are guaranteed to be
adjacent, but do not have the same values. This bit of Boolean trickery assumes adjacent, but do not have the same values. This bit of Boolean trickery assumes
that the match-time bits are not more significant than the flag bits. If by that the match-time bits are not more significant than the flag bits. If by
accident this is not the case, a compile-time division by zero error will accident this is not the case, a compile-time division by zero error will
occur. */ occur. */
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET) #define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART) #define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
options |= (re->flags & FF) / ((FF & -FF) / (OO & -OO)); options |= (re->flags & FF) / ((FF & -FF) / (OO & -OO));
#undef FF #undef FF
@ -3168,7 +3168,7 @@ end_subject = subject + length;
req_cu_ptr = start_match - 1; req_cu_ptr = start_match - 1;
anchored = (options & (PCRE2_ANCHORED|PCRE2_DFA_RESTART)) != 0 || anchored = (options & (PCRE2_ANCHORED|PCRE2_DFA_RESTART)) != 0 ||
(re->overall_options & PCRE2_ANCHORED) != 0; (re->overall_options & PCRE2_ANCHORED) != 0;
/* The "must be at the start of a line" flags are used in a loop when finding /* The "must be at the start of a line" flags are used in a loop when finding
where to start. */ where to start. */
@ -3307,7 +3307,7 @@ for (;;)
/* There are some optimizations that avoid running the match if a known /* There are some optimizations that avoid running the match if a known
starting point is not found, or if a known later code unit is not present. starting point is not found, or if a known later code unit is not present.
However, there is an option (settable at compile time) that disables However, there is an option (settable at compile time) that disables
these, for testing and for ensuring that all callouts do actually occur. these, for testing and for ensuring that all callouts do actually occur.
The optimizations must also be avoided when restarting a DFA match. */ The optimizations must also be avoided when restarting a DFA match. */
if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 &&
@ -3493,7 +3493,7 @@ for (;;)
/* Anything other than "no match" means we are done, always; otherwise, carry /* Anything other than "no match" means we are done, always; otherwise, carry
on only if not anchored. */ on only if not anchored. */
if (rc != PCRE2_ERROR_NOMATCH || anchored) if (rc != PCRE2_ERROR_NOMATCH || anchored)
{ {
if (rc == PCRE2_ERROR_PARTIAL && match_data->oveccount > 0) if (rc == PCRE2_ERROR_PARTIAL && match_data->oveccount > 0)
@ -3504,7 +3504,7 @@ for (;;)
match_data->leftchar = (PCRE2_SIZE)(mb->start_used_ptr - subject); match_data->leftchar = (PCRE2_SIZE)(mb->start_used_ptr - subject);
match_data->rightchar = mb->last_used_ptr - subject; match_data->rightchar = mb->last_used_ptr - subject;
match_data->startchar = (PCRE2_SIZE)(start_match - subject); match_data->startchar = (PCRE2_SIZE)(start_match - subject);
match_data->rc = rc; match_data->rc = rc;
return rc; return rc;
} }

View File

@ -48,7 +48,7 @@ POSSIBILITY OF SUCH DAMAGE.
#define STRING(a) # a #define STRING(a) # a
#define XSTRING(s) STRING(s) #define XSTRING(s) STRING(s)
/* The texts of compile-time error messages. Compile-time error numbers start /* The texts of compile-time error messages. Compile-time error numbers start
at COMPILE_ERROR_BASE (100). at COMPILE_ERROR_BASE (100).
Do not ever re-use any error number, because they are documented. Always add a Do not ever re-use any error number, because they are documented. Always add a
@ -101,7 +101,7 @@ static const char compile_error_texts[] =
"(?R or (?[+-]digits must be followed by )\0" "(?R or (?[+-]digits must be followed by )\0"
/* 30 */ /* 30 */
"unknown POSIX class name\0" "unknown POSIX class name\0"
"internal error in pcre2_study(): should not occur\0" "internal error in pcre2_study(): should not occur\0"
"this version of PCRE does not have UTF or Unicode property support\0" "this version of PCRE does not have UTF or Unicode property support\0"
"parentheses are too deeply nested (stack check)\0" "parentheses are too deeply nested (stack check)\0"
"character code point value in \\x{} or \\o{} is too large\0" "character code point value in \\x{} or \\o{} is too large\0"
@ -158,94 +158,94 @@ static const char compile_error_texts[] =
"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0" "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
"character code point value in \\u.... sequence is too large\0" "character code point value in \\u.... sequence is too large\0"
"digits missing in \\x{} or \\o{}\0" "digits missing in \\x{} or \\o{}\0"
"syntax error in (?(VERSION condition\0" "syntax error in (?(VERSION condition\0"
; ;
/* Match-time and UTF error texts are in the same format. */ /* Match-time and UTF error texts are in the same format. */
static const char match_error_texts[] = static const char match_error_texts[] =
"no error\0" "no error\0"
"no match\0" "no match\0"
"partial match\0" "partial match\0"
"UTF-8 error: 1 byte missing at end\0" "UTF-8 error: 1 byte missing at end\0"
"UTF-8 error: 2 bytes missing at end\0" "UTF-8 error: 2 bytes missing at end\0"
/* 5 */ /* 5 */
"UTF-8 error: 3 bytes missing at end\0" "UTF-8 error: 3 bytes missing at end\0"
"UTF-8 error: 4 bytes missing at end\0" "UTF-8 error: 4 bytes missing at end\0"
"UTF-8 error: 5 bytes missing at end\0" "UTF-8 error: 5 bytes missing at end\0"
"UTF-8 error: byte 2 top bits not 0x80\0" "UTF-8 error: byte 2 top bits not 0x80\0"
"UTF-8 error: byte 3 top bits not 0x80\0" "UTF-8 error: byte 3 top bits not 0x80\0"
/* 10 */ /* 10 */
"UTF-8 error: byte 4 top bits not 0x80\0" "UTF-8 error: byte 4 top bits not 0x80\0"
"UTF-8 error: byte 5 top bits not 0x80\0" "UTF-8 error: byte 5 top bits not 0x80\0"
"UTF-8 error: byte 6 top bits not 0x80\0" "UTF-8 error: byte 6 top bits not 0x80\0"
"UTF-8 error: 5-byte character is not allowed (RFC 3629)\0" "UTF-8 error: 5-byte character is not allowed (RFC 3629)\0"
"UTF-8 error: 6-byte character is not allowed (RFC 3629)\0" "UTF-8 error: 6-byte character is not allowed (RFC 3629)\0"
/* 15 */ /* 15 */
"UTF-8 error: code points greater than 0x10ffff are not defined\0" "UTF-8 error: code points greater than 0x10ffff are not defined\0"
"UTF-8 error: code points 0xd800-0xdfff are not defined\0" "UTF-8 error: code points 0xd800-0xdfff are not defined\0"
"UTF-8 error: overlong 2-byte sequence\0" "UTF-8 error: overlong 2-byte sequence\0"
"UTF-8 error: overlong 3-byte sequence\0" "UTF-8 error: overlong 3-byte sequence\0"
"UTF-8 error: overlong 4-byte sequence\0" "UTF-8 error: overlong 4-byte sequence\0"
/* 20 */ /* 20 */
"UTF-8 error: overlong 5-byte sequence\0" "UTF-8 error: overlong 5-byte sequence\0"
"UTF-8 error: overlong 6-byte sequence\0" "UTF-8 error: overlong 6-byte sequence\0"
"UTF-8 error: isolated 0x80 byte\0" "UTF-8 error: isolated 0x80 byte\0"
"UTF-8 error: illegal byte (0xfe or 0xff)\0" "UTF-8 error: illegal byte (0xfe or 0xff)\0"
"UTF-16 error: missing low surrogate at end\0" "UTF-16 error: missing low surrogate at end\0"
/* 25 */ /* 25 */
"UTF-16 error: invalid low surrogate\0" "UTF-16 error: invalid low surrogate\0"
"UTF-16 error: isolated low surrogate\0" "UTF-16 error: isolated low surrogate\0"
"UTF-32 error: code points 0xd800-0xdfff are not defined\0" "UTF-32 error: code points 0xd800-0xdfff are not defined\0"
"UTF-32 error: code points greater than 0x10ffff are not defined\0" "UTF-32 error: code points greater than 0x10ffff are not defined\0"
"bad data value\0" "bad data value\0"
/* 30 */ /* 30 */
"bad length\0" "bad length\0"
"magic number missing\0" "magic number missing\0"
"pattern compiled in wrong mode: 8/16/32-bit error\0" "pattern compiled in wrong mode: 8/16/32-bit error\0"
"bad offset value\0" "bad offset value\0"
"bad option value\0" "bad option value\0"
/* 35 */ /* 35 */
"bad offset into UTF string\0" "bad offset into UTF string\0"
"callout error code\0" /* Never returned by PCRE2 itself */ "callout error code\0" /* Never returned by PCRE2 itself */
"invalid data in workspace for DFA restart\0" "invalid data in workspace for DFA restart\0"
"too much recursion for DFA matching\0" "too much recursion for DFA matching\0"
"backreference condition or recursion test not supported for DFA matching\0" "backreference condition or recursion test not supported for DFA matching\0"
/* 40 */ /* 40 */
"item unsupported for DFA matching\0" "item unsupported for DFA matching\0"
"workspace size exceeded in DFA matching\0" "workspace size exceeded in DFA matching\0"
"internal error - pattern overwritten?\0" "internal error - pattern overwritten?\0"
"bad JIT option\0" "bad JIT option\0"
"JIT stack limit reached\0" "JIT stack limit reached\0"
/* 45 */ /* 45 */
"match limit exceeded\0" "match limit exceeded\0"
"no more memory\0" "no more memory\0"
"unknown or unset substring\0" "unknown or unset substring\0"
"NULL argument passed\0" "NULL argument passed\0"
"nested recursion at the same subject position\0" "nested recursion at the same subject position\0"
/* 50 */ /* 50 */
"recursion limit exceeded\0" "recursion limit exceeded\0"
"requested value is not set\0" "requested value is not set\0"
; ;
/************************************************* /*************************************************
* Return error message * * Return error message *
*************************************************/ *************************************************/
/* This function copies an error message into a buffer whose units are of an /* This function copies an error message into a buffer whose units are of an
appropriate width. Error numbers are positive for compile-time errors, and appropriate width. Error numbers are positive for compile-time errors, and
negative for match-time errors (except for UTF errors), but the numbers are all negative for match-time errors (except for UTF errors), but the numbers are all
distinct. distinct.
Arguments: Arguments:
enumber error number enumber error number
buffer where to put the message (zero terminated) buffer where to put the message (zero terminated)
size size of the buffer size size of the buffer
Returns: length of message if all is well Returns: length of message if all is well
negative on error negative on error
*/ */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_get_error_message(int enumber, PCRE2_UCHAR *buffer, size_t size) pcre2_get_error_message(int enumber, PCRE2_UCHAR *buffer, size_t size)
@ -260,23 +260,23 @@ if (size == 0) return PCRE2_ERROR_NOMEMORY;
if (enumber > COMPILE_ERROR_BASE) /* Compile error */ if (enumber > COMPILE_ERROR_BASE) /* Compile error */
{ {
message = compile_error_texts; message = compile_error_texts;
n = enumber - COMPILE_ERROR_BASE; n = enumber - COMPILE_ERROR_BASE;
} }
else /* Match or UTF error */ else /* Match or UTF error */
{ {
message = match_error_texts; message = match_error_texts;
n = -enumber; n = -enumber;
} }
for (; n > 0; n--) for (; n > 0; n--)
{ {
while (*message++ != CHAR_NULL) {}; while (*message++ != CHAR_NULL) {};
if (*message == CHAR_NULL) if (*message == CHAR_NULL)
{ {
sprintf(xbuff, "Internal error: no text for error %d", enumber); sprintf(xbuff, "Internal error: no text for error %d", enumber);
break; break;
} }
} }
for (i = 0; *message != 0; i++) for (i = 0; *message != 0; i++)
{ {
@ -287,9 +287,9 @@ for (i = 0; *message != 0; i++)
} }
buffer[i] = *message++; buffer[i] = *message++;
} }
buffer[i] = 0; buffer[i] = 0;
return i; return i;
} }
/* End of pcre2_error.c */ /* End of pcre2_error.c */

View File

@ -1553,11 +1553,11 @@ enum {
/* This is used to skip a subpattern with a {0} quantifier */ /* This is used to skip a subpattern with a {0} quantifier */
OP_SKIPZERO, /* 162 */ OP_SKIPZERO, /* 162 */
/* This is used to identify a DEFINE group during compilation so that it can /* This is used to identify a DEFINE group during compilation so that it can
be checked for having only one branch. It is changed to OP_FALSE before be checked for having only one branch. It is changed to OP_FALSE before
compilation finishes. */ compilation finishes. */
OP_DEFINE, /* 163 */ OP_DEFINE, /* 163 */
/* This is not an opcode, but is used to check that tables indexed by opcode /* This is not an opcode, but is used to check that tables indexed by opcode
@ -1565,7 +1565,7 @@ enum {
some in the past. */ some in the past. */
OP_TABLE_LENGTH OP_TABLE_LENGTH
}; };
/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro /* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
@ -1708,7 +1708,7 @@ in UTF-8 mode. The code that uses this table must know about such things. */
1, 3, /* THEN, THEN_ARG */ \ 1, 3, /* THEN, THEN_ARG */ \
1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \ 1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \
1+IMM2_SIZE, 1, /* CLOSE, SKIPZERO */ \ 1+IMM2_SIZE, 1, /* CLOSE, SKIPZERO */ \
1 /* DEFINE */ 1 /* DEFINE */
/* A magic value for OP_RREF to indicate the "any recursion" condition. */ /* A magic value for OP_RREF to indicate the "any recursion" condition. */
@ -1830,9 +1830,9 @@ extern const uint8_t PRIV(ucd_stage1)[];
extern const uint16_t PRIV(ucd_stage2)[]; extern const uint16_t PRIV(ucd_stage2)[];
extern const uint32_t PRIV(ucp_gbtable)[]; extern const uint32_t PRIV(ucp_gbtable)[];
extern const uint32_t PRIV(ucp_gentype)[]; extern const uint32_t PRIV(ucp_gentype)[];
#ifdef SUPPORT_JIT #ifdef SUPPORT_JIT
extern const int PRIV(ucp_typerange)[]; extern const int PRIV(ucp_typerange)[];
#endif #endif
extern const char *PRIV(unicode_version); extern const char *PRIV(unicode_version);
extern const ucp_type_table PRIV(utt)[]; extern const ucp_type_table PRIV(utt)[];
extern const char PRIV(utt_names)[]; extern const char PRIV(utt_names)[];

View File

@ -39,16 +39,16 @@ POSSIBILITY OF SUCH DAMAGE.
*/ */
/* This module contains mode-dependent macro and structure definitions. The /* This module contains mode-dependent macro and structure definitions. The
file is #included by pcre2_internal.h if PCRE2_CODE_UNIT_WIDTH is defined. file is #included by pcre2_internal.h if PCRE2_CODE_UNIT_WIDTH is defined.
These mode-dependent items are kept in a separate file so that they can also be These mode-dependent items are kept in a separate file so that they can also be
#included multiple times for different code unit widths by pcre2test in order #included multiple times for different code unit widths by pcre2test in order
to have access to the hidden structures at all supported widths. to have access to the hidden structures at all supported widths.
Some of the mode-dependent macros are required at different widths for Some of the mode-dependent macros are required at different widths for
different parts of the pcre2test code (in particular, the included different parts of the pcre2test code (in particular, the included
pcre_printint.c file). We undefine them here so that they can be re-defined for pcre_printint.c file). We undefine them here so that they can be re-defined for
multiple inclusions. Not all of these are used in pcre2test, but it's easier multiple inclusions. Not all of these are used in pcre2test, but it's easier
just to undefine them all. */ just to undefine them all. */
#undef ACROSSCHAR #undef ACROSSCHAR
@ -93,7 +93,7 @@ request for an even bigger limit. For this reason, and also to make the code
easier to maintain, the storing and loading of offsets from the compiled code easier to maintain, the storing and loading of offsets from the compiled code
unit string is now handled by the macros that are defined here. unit string is now handled by the macros that are defined here.
The macros are controlled by the value of LINK_SIZE. This defaults to 2, but The macros are controlled by the value of LINK_SIZE. This defaults to 2, but
values of 2 or 4 are also supported. */ values of 2 or 4 are also supported. */
/* ------------------- 8-bit support ------------------ */ /* ------------------- 8-bit support ------------------ */
@ -173,14 +173,14 @@ values of 2 or 4 are also supported. */
#else #else
#error Unsupported compiling mode #error Unsupported compiling mode
#endif #endif
/* --------------- Other mode-specific macros ----------------- */ /* --------------- Other mode-specific macros ----------------- */
/* PCRE uses some other (at least) 16-bit quantities that do not change when /* PCRE uses some other (at least) 16-bit quantities that do not change when
the size of offsets changes. There are used for repeat counts and for other the size of offsets changes. There are used for repeat counts and for other
things such as capturing parenthesis numbers in back references. things such as capturing parenthesis numbers in back references.
Define the number of code units required to hold a 16-bit count/offset, and Define the number of code units required to hold a 16-bit count/offset, and
macros to load and store such a value. For reasons that I do not understand, macros to load and store such a value. For reasons that I do not understand,
@ -196,7 +196,7 @@ arithmetic results in a signed value. Hence the cast. */
#else /* Code units are 16 or 32 bits */ #else /* Code units are 16 or 32 bits */
#define IMM2_SIZE 1 #define IMM2_SIZE 1
#define GET2(a,n) a[n] #define GET2(a,n) a[n]
#define PUT2(a,n,d) a[n] = d #define PUT2(a,n,d) a[n] = d
#endif #endif
/* Other macros that are different for 8-bit mode. The MAX_255 macro checks /* Other macros that are different for 8-bit mode. The MAX_255 macro checks
@ -346,7 +346,7 @@ because almost all calls are already within a block of UTF-8 only code. */
/* Same as above, but it allows a fully customizable form. */ /* Same as above, but it allows a fully customizable form. */
#define ACROSSCHAR(condition, eptr, action) \ #define ACROSSCHAR(condition, eptr, action) \
while((condition) && ((eptr) & 0xc0) == 0x80) action while((condition) && ((eptr) & 0xc0) == 0x80) action
/* Deposit a character into memory, returning the number of code units. */ /* Deposit a character into memory, returning the number of code units. */
#define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \ #define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \
@ -545,10 +545,10 @@ These are all no-ops since all UTF-32 characters fit into one pcre_uchar. */
/* ----------------------- HIDDEN STRUCTURES ----------------------------- */ /* ----------------------- HIDDEN STRUCTURES ----------------------------- */
/* NOTE: All these structures *must* start with a pcre2_memctl structure. The /* NOTE: All these structures *must* start with a pcre2_memctl structure. The
code that uses them is simpler because it assumes this. */ code that uses them is simpler because it assumes this. */
/* The real general context structure. At present it holds only data for custom /* The real general context structure. At present it holds only data for custom
memory control. */ memory control. */
typedef struct pcre2_real_general_context { typedef struct pcre2_real_general_context {
@ -572,9 +572,9 @@ typedef struct pcre2_real_match_context {
pcre2_memctl memctl; pcre2_memctl memctl;
#ifdef HEAP_MATCH_RECURSE #ifdef HEAP_MATCH_RECURSE
pcre2_memctl stack_memctl; pcre2_memctl stack_memctl;
#endif #endif
int (*callout)(pcre2_callout_block *); int (*callout)(pcre2_callout_block *);
void *callout_data; void *callout_data;
uint32_t match_limit; uint32_t match_limit;
uint32_t recursion_limit; uint32_t recursion_limit;
} pcre2_real_match_context; } pcre2_real_match_context;
@ -584,9 +584,9 @@ typedef struct pcre2_real_match_context {
typedef struct pcre2_real_code { typedef struct pcre2_real_code {
pcre2_memctl memctl; /* Memory control fields */ pcre2_memctl memctl; /* Memory control fields */
const uint8_t *tables; /* The character tables */ const uint8_t *tables; /* The character tables */
void *executable_jit; /* Pointer to JIT code */ void *executable_jit; /* Pointer to JIT code */
uint8_t start_bitmap[32]; /* Bitmap for starting code unit < 256 */ uint8_t start_bitmap[32]; /* Bitmap for starting code unit < 256 */
size_t blocksize; /* Total (bytes) that was malloc-ed */ size_t blocksize; /* Total (bytes) that was malloc-ed */
uint32_t magic_number; /* Paranoid and endianness check */ uint32_t magic_number; /* Paranoid and endianness check */
uint32_t compile_options; /* Options passed to pcre2_compile() */ uint32_t compile_options; /* Options passed to pcre2_compile() */
uint32_t overall_options; /* Options after processing the pattern */ uint32_t overall_options; /* Options after processing the pattern */
@ -596,10 +596,10 @@ typedef struct pcre2_real_code {
uint32_t first_codeunit; /* Starting code unit */ uint32_t first_codeunit; /* Starting code unit */
uint32_t last_codeunit; /* This codeunit must be seen */ uint32_t last_codeunit; /* This codeunit must be seen */
uint16_t bsr_convention; /* What \R matches */ uint16_t bsr_convention; /* What \R matches */
uint16_t newline_convention; /* What is a newline? */ uint16_t newline_convention; /* What is a newline? */
uint16_t max_lookbehind; /* Longest lookbehind (characters) */ uint16_t max_lookbehind; /* Longest lookbehind (characters) */
uint16_t minlength; /* Minimum length of match */ uint16_t minlength; /* Minimum length of match */
uint16_t top_bracket; /* Highest numbered group */ uint16_t top_bracket; /* Highest numbered group */
uint16_t top_backref; /* Highest numbered back reference */ uint16_t top_backref; /* Highest numbered back reference */
uint16_t name_entry_size; /* Size (code units) of table entries */ uint16_t name_entry_size; /* Size (code units) of table entries */
uint16_t name_count; /* Number of name entries in the table */ uint16_t name_count; /* Number of name entries in the table */
@ -614,10 +614,10 @@ typedef struct pcre2_real_match_data {
int rc; /* The return code from the match */ int rc; /* The return code from the match */
PCRE2_SIZE leftchar; /* Offset to leftmost code unit */ PCRE2_SIZE leftchar; /* Offset to leftmost code unit */
PCRE2_SIZE rightchar; /* Offset to rightmost code unit */ PCRE2_SIZE rightchar; /* Offset to rightmost code unit */
PCRE2_SIZE startchar; /* Offset to starting code unit */ PCRE2_SIZE startchar; /* Offset to starting code unit */
PCRE2_SPTR mark; /* Pointer to last mark */ PCRE2_SPTR mark; /* Pointer to last mark */
uint16_t oveccount; /* Number of pairs */ uint16_t oveccount; /* Number of pairs */
PCRE2_SIZE ovector[1]; /* The first field */ PCRE2_SIZE ovector[1]; /* The first field */
} pcre2_real_match_data; } pcre2_real_match_data;
@ -700,7 +700,7 @@ the system stack. */
typedef struct ovecsave_frame { typedef struct ovecsave_frame {
struct ovecsave_frame *next; /* Next frame on free chain */ struct ovecsave_frame *next; /* Next frame on free chain */
PCRE2_SIZE saved_ovec[1]; /* First vector element */ PCRE2_SIZE saved_ovec[1]; /* First vector element */
} ovecsave_frame; } ovecsave_frame;
/* Structure for items in a linked list that represents an explicit recursive /* Structure for items in a linked list that represents an explicit recursive
call within the pattern; used by pcre_match(). */ call within the pattern; used by pcre_match(). */
@ -738,7 +738,7 @@ typedef struct match_block {
pcre2_memctl memctl; /* For general use */ pcre2_memctl memctl; /* For general use */
#ifdef HEAP_MATCH_RECURSE #ifdef HEAP_MATCH_RECURSE
pcre2_memctl stack_memctl; /* For "stack" frames */ pcre2_memctl stack_memctl; /* For "stack" frames */
#endif #endif
uint32_t match_call_count; /* As it says */ uint32_t match_call_count; /* As it says */
uint32_t match_limit; /* As it says */ uint32_t match_limit; /* As it says */
uint32_t match_limit_recursion; /* As it says */ uint32_t match_limit_recursion; /* As it says */
@ -763,7 +763,7 @@ typedef struct match_block {
PCRE2_SPTR start_match_ptr; /* Start of matched string */ PCRE2_SPTR start_match_ptr; /* Start of matched string */
PCRE2_SPTR end_match_ptr; /* Subject position at end match */ PCRE2_SPTR end_match_ptr; /* Subject position at end match */
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */ PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
PCRE2_SPTR last_used_ptr; /* Latest consulted character */ PCRE2_SPTR last_used_ptr; /* Latest consulted character */
PCRE2_SPTR mark; /* Mark pointer to pass back on success */ PCRE2_SPTR mark; /* Mark pointer to pass back on success */
PCRE2_SPTR nomatch_mark; /* Mark pointer to pass back on failure */ PCRE2_SPTR nomatch_mark; /* Mark pointer to pass back on failure */
PCRE2_SPTR once_target; /* Where to back up to for atomic groups */ PCRE2_SPTR once_target; /* Where to back up to for atomic groups */
@ -778,7 +778,7 @@ typedef struct match_block {
PCRE2_UCHAR nl[4]; /* Newline string when fixed */ PCRE2_UCHAR nl[4]; /* Newline string when fixed */
eptrblock *eptrchain; /* Chain of eptrblocks for tail recursions */ eptrblock *eptrchain; /* Chain of eptrblocks for tail recursions */
recursion_info *recursive; /* Linked list of recursion data */ recursion_info *recursive; /* Linked list of recursion data */
ovecsave_frame *ovecsave_chain; /* Linked list of free ovecsave blocks */ ovecsave_frame *ovecsave_chain; /* Linked list of free ovecsave blocks */
void *callout_data; /* To pass back to callouts */ void *callout_data; /* To pass back to callouts */
int (*callout)(pcre2_callout_block *); /* Callout function or NULL */ int (*callout)(pcre2_callout_block *); /* Callout function or NULL */
#ifdef HEAP_MATCH_RECURSE #ifdef HEAP_MATCH_RECURSE
@ -795,7 +795,7 @@ typedef struct dfa_match_block {
PCRE2_SPTR start_subject ; /* Start of the subject string */ PCRE2_SPTR start_subject ; /* Start of the subject string */
PCRE2_SPTR end_subject; /* End of subject string */ PCRE2_SPTR end_subject; /* End of subject string */
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */ PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
PCRE2_SPTR last_used_ptr; /* Latest consulted character */ PCRE2_SPTR last_used_ptr; /* Latest consulted character */
const uint8_t *tables; /* Character tables */ const uint8_t *tables; /* Character tables */
PCRE2_SIZE start_offset; /* The start offset value */ PCRE2_SIZE start_offset; /* The start offset value */
uint32_t moptions; /* Match options */ uint32_t moptions; /* Match options */

View File

@ -72,9 +72,9 @@ Arguments:
length length of subject string (may contain binary zeros) length length of subject string (may contain binary zeros)
start_offset where to start in the subject string start_offset where to start in the subject string
options option bits options option bits
match_data points to a match_data block match_data points to a match_data block
mcontext points to a match context mcontext points to a match context
jit_stack points to a JIT stack jit_stack points to a JIT stack
Returns: > 0 => success; value is the number of ovector pairs filled Returns: > 0 => success; value is the number of ovector pairs filled
= 0 => success, but ovector is not big enough = 0 => success, but ovector is not big enough

View File

@ -60,9 +60,9 @@ compilation of dftables.c, in which case the macro DFTABLES is defined. */
/* This function builds a set of character tables for use by PCRE2 and returns /* This function builds a set of character tables for use by PCRE2 and returns
a pointer to them. They are build using the ctype functions, and consequently a pointer to them. They are build using the ctype functions, and consequently
their contents will depend upon the current locale setting. When compiled as their contents will depend upon the current locale setting. When compiled as
part of the library, the store is obtained via a general context malloc, if part of the library, the store is obtained via a general context malloc, if
supplied, but when DFTABLES is defined (when compiling the dftables auxiliary supplied, but when DFTABLES is defined (when compiling the dftables auxiliary
program) malloc() is used, and the function has a different name so as not to program) malloc() is used, and the function has a different name so as not to
clash with the prototype in pcre2.h. clash with the prototype in pcre2.h.
Arguments: none when DFTABLES is defined Arguments: none when DFTABLES is defined

View File

@ -56,7 +56,7 @@ POSSIBILITY OF SUCH DAMAGE.
(PCRE2_ANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \ (PCRE2_ANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \ PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \
PCRE2_PARTIAL_SOFT) PCRE2_PARTIAL_SOFT)
#define PUBLIC_JIT_MATCH_OPTIONS \ #define PUBLIC_JIT_MATCH_OPTIONS \
(PCRE2_NO_UTF_CHECK|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY|\ (PCRE2_NO_UTF_CHECK|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY|\
PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_SOFT|PCRE2_PARTIAL_HARD) PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_SOFT|PCRE2_PARTIAL_HARD)
@ -125,24 +125,24 @@ ovector length is always a multiple of 3. */
/* This function is called only when it is known that the offset lies within /* This function is called only when it is known that the offset lies within
the offsets that have so far been used in the match. Note that in caseless the offsets that have so far been used in the match. Note that in caseless
UTF-8 mode, the number of subject bytes matched may be different to the number UTF-8 mode, the number of subject bytes matched may be different to the number
of reference bytes. (In theory this could also happen in UTF-16 mode, but it of reference bytes. (In theory this could also happen in UTF-16 mode, but it
seems unlikely.) seems unlikely.)
Arguments: Arguments:
offset index into the offset vector offset index into the offset vector
offset_top top of the used offset vector offset_top top of the used offset vector
eptr pointer into the subject eptr pointer into the subject
mb points to match block mb points to match block
caseless TRUE if caseless caseless TRUE if caseless
lengthptr pointer for returning the length matched lengthptr pointer for returning the length matched
Returns: = 0 sucessful match; number of code units matched is set Returns: = 0 sucessful match; number of code units matched is set
< 0 no match < 0 no match
> 0 partial match > 0 partial match
*/ */
static int static int
match_ref(PCRE2_SIZE offset, PCRE2_SIZE offset_top, register PCRE2_SPTR eptr, match_ref(PCRE2_SIZE offset, PCRE2_SIZE offset_top, register PCRE2_SPTR eptr,
match_block *mb, BOOL caseless, PCRE2_SIZE *lengthptr) match_block *mb, BOOL caseless, PCRE2_SIZE *lengthptr)
{ {
#if defined SUPPORT_UNICODE #if defined SUPPORT_UNICODE
@ -153,7 +153,7 @@ register PCRE2_SPTR p;
PCRE2_SIZE length; PCRE2_SIZE length;
PCRE2_SPTR eptr_start = eptr; PCRE2_SPTR eptr_start = eptr;
/* Deal with an unset group. The default is no match, but there is an option to /* Deal with an unset group. The default is no match, but there is an option to
match an empty string. */ match an empty string. */
if (offset >= offset_top || mb->ovector[offset] == PCRE2_UNSET) if (offset >= offset_top || mb->ovector[offset] == PCRE2_UNSET)
@ -164,7 +164,7 @@ if (offset >= offset_top || mb->ovector[offset] == PCRE2_UNSET)
return 0; /* Match */ return 0; /* Match */
} }
else return -1; /* No match */ else return -1; /* No match */
} }
/* Separate the caseless and UTF cases for speed. */ /* Separate the caseless and UTF cases for speed. */
@ -217,7 +217,7 @@ if (caseless)
if (eptr >= mb->end_subject) return 1; /* Partial match */ if (eptr >= mb->end_subject) return 1; /* Partial match */
cc = UCHAR21TEST(eptr); cc = UCHAR21TEST(eptr);
cp = UCHAR21TEST(p); cp = UCHAR21TEST(p);
if (TABLE_GET(cp, mb->lcc, cp) != TABLE_GET(cc, mb->lcc, cc)) if (TABLE_GET(cp, mb->lcc, cp) != TABLE_GET(cc, mb->lcc, cc))
return -1; /* No match */ return -1; /* No match */
p++; p++;
eptr++; eptr++;
@ -345,7 +345,7 @@ argument of match(), which never changes. */
} }
/* Structure for remembering the local variables in a private frame. Arrange it /* Structure for remembering the local variables in a private frame. Arrange it
so as to minimize the number of holes. */ so as to minimize the number of holes. */
typedef struct heapframe { typedef struct heapframe {
@ -364,7 +364,7 @@ typedef struct heapframe {
PCRE2_SPTR Xpp; PCRE2_SPTR Xpp;
PCRE2_SPTR Xprev; PCRE2_SPTR Xprev;
PCRE2_SPTR Xsaved_eptr; PCRE2_SPTR Xsaved_eptr;
eptrblock *Xeptrb; eptrblock *Xeptrb;
PCRE2_SIZE Xlength; PCRE2_SIZE Xlength;
@ -377,7 +377,7 @@ typedef struct heapframe {
uint32_t Xrdepth; uint32_t Xrdepth;
uint32_t Xop; uint32_t Xop;
uint32_t Xsave_capture_last; uint32_t Xsave_capture_last;
#ifdef SUPPORT_UNICODE #ifdef SUPPORT_UNICODE
uint32_t Xprop_value; uint32_t Xprop_value;
int Xprop_type; int Xprop_type;
@ -401,7 +401,7 @@ typedef struct heapframe {
#ifdef SUPPORT_UNICODE #ifdef SUPPORT_UNICODE
PCRE2_UCHAR Xocchars[6]; PCRE2_UCHAR Xocchars[6];
#endif #endif
} heapframe; } heapframe;
#endif #endif
@ -414,9 +414,9 @@ typedef struct heapframe {
/* When HEAP_MATCH_RECURSE is not defined, the match() function implements /* When HEAP_MATCH_RECURSE is not defined, the match() function implements
backtrack points by calling itself recursively in all but one case. The one backtrack points by calling itself recursively in all but one case. The one
special case is when processing OP_RECURSE, which specifies recursion in the special case is when processing OP_RECURSE, which specifies recursion in the
pattern. The entire ovector must be saved and restored while processing pattern. The entire ovector must be saved and restored while processing
OP_RECURSE. If the ovector is small enough, instead of calling match() OP_RECURSE. If the ovector is small enough, instead of calling match()
directly, op_recurse_ovecsave() is called. This function uses the system stack directly, op_recurse_ovecsave() is called. This function uses the system stack
to save the ovector while calling match() to process the pattern recursion. */ to save the ovector while calling match() to process the pattern recursion. */
#ifndef HEAP_MATCH_RECURSE #ifndef HEAP_MATCH_RECURSE
@ -425,7 +425,7 @@ to save the ovector while calling match() to process the pattern recursion. */
op_recurse_ovecsave(). */ op_recurse_ovecsave(). */
static int static int
match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart, match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart,
PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb, uint32_t rdepth); PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb, uint32_t rdepth);
@ -433,7 +433,7 @@ match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart,
* Process OP_RECURSE, stacking ovector * * Process OP_RECURSE, stacking ovector *
*************************************************/ *************************************************/
/* When this function is called, mb->recursive has already been updated to /* When this function is called, mb->recursive has already been updated to
point to a new recursion data block, and all its fields other than ovec_save point to a new recursion data block, and all its fields other than ovec_save
have been set. have been set.
@ -447,9 +447,9 @@ Arguments:
eptrb pointer to chain of blocks containing eptr at start of eptrb pointer to chain of blocks containing eptr at start of
brackets - for testing for empty matches brackets - for testing for empty matches
rdepth the recursion depth rdepth the recursion depth
Returns: a match() return code Returns: a match() return code
*/ */
static int static int
op_recurse_ovecsave(REGISTER PCRE2_SPTR eptr, PCRE2_SPTR callpat, op_recurse_ovecsave(REGISTER PCRE2_SPTR eptr, PCRE2_SPTR callpat,
@ -472,7 +472,7 @@ data and the last captured value. */
do do
{ {
if (cbegroup) mb->match_function_type = MATCH_CBEGROUP; if (cbegroup) mb->match_function_type = MATCH_CBEGROUP;
rrc = match(eptr, callpat + PRIV(OP_lengths)[*callpat], mstart, offset_top, rrc = match(eptr, callpat + PRIV(OP_lengths)[*callpat], mstart, offset_top,
mb, eptrb, rdepth + 1); mb, eptrb, rdepth + 1);
memcpy(mb->ovector, new_recursive->ovec_save, memcpy(mb->ovector, new_recursive->ovec_save,
mb->offset_end * sizeof(PCRE2_SIZE)); mb->offset_end * sizeof(PCRE2_SIZE));
@ -560,7 +560,7 @@ Returns: MATCH_MATCH if matched ) these values are >= 0
*/ */
static int static int
match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart, match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart,
PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb, uint32_t rdepth) PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb, uint32_t rdepth)
{ {
/* These variables do not need to be preserved over recursion in this function, /* These variables do not need to be preserved over recursion in this function,
@ -1382,10 +1382,10 @@ for (;;)
case OP_FALSE: case OP_FALSE:
break; break;
case OP_TRUE: case OP_TRUE:
condition = TRUE; condition = TRUE;
break; break;
/* The condition is an assertion. Call match() to evaluate it - setting /* The condition is an assertion. Call match() to evaluate it - setting
mb->match_function_type to MATCH_CONDASSERT causes it to stop at the end mb->match_function_type to MATCH_CONDASSERT causes it to stop at the end
@ -1475,7 +1475,7 @@ for (;;)
update the last used pointer. */ update the last used pointer. */
case OP_ASSERT_ACCEPT: case OP_ASSERT_ACCEPT:
if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr; if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
case OP_ACCEPT: case OP_ACCEPT:
case OP_END: case OP_END:
@ -1735,7 +1735,7 @@ for (;;)
case OP_RECURSE: case OP_RECURSE:
{ {
ovecsave_frame *fr; ovecsave_frame *fr;
recursion_info *ri; recursion_info *ri;
uint32_t recno; uint32_t recno;
@ -1762,15 +1762,15 @@ for (;;)
ecode += 1 + LINK_SIZE; ecode += 1 + LINK_SIZE;
/* When we are using the system stack for match() recursion we can call a /* When we are using the system stack for match() recursion we can call a
function that uses the system stack for preserving the ovector while function that uses the system stack for preserving the ovector while
processing the pattern recursion, but only if the ovector is small processing the pattern recursion, but only if the ovector is small
enough. */ enough. */
#ifndef HEAP_MATCH_RECURSE #ifndef HEAP_MATCH_RECURSE
if (mb->offset_end <= OP_RECURSE_STACK_SAVE_MAX) if (mb->offset_end <= OP_RECURSE_STACK_SAVE_MAX)
{ {
rrc = op_recurse_ovecsave(eptr, callpat, mstart, offset_top, mb, rrc = op_recurse_ovecsave(eptr, callpat, mstart, offset_top, mb,
eptrb, rdepth); eptrb, rdepth);
mb->recursive = new_recursive.prevrec; mb->recursive = new_recursive.prevrec;
if (rrc != MATCH_MATCH && rrc != MATCH_ACCEPT) RRETURN(rrc); if (rrc != MATCH_MATCH && rrc != MATCH_ACCEPT) RRETURN(rrc);
@ -1785,10 +1785,10 @@ for (;;)
} }
#endif #endif
/* If the ovector is too big, or if we are using the heap for match() /* If the ovector is too big, or if we are using the heap for match()
recursion, we have to use the heap for saving the ovector. Used ovecsave recursion, we have to use the heap for saving the ovector. Used ovecsave
frames are kept on a chain and re-used. This makes a small improvement in frames are kept on a chain and re-used. This makes a small improvement in
execution time on Linux. */ execution time on Linux. */
if (mb->ovecsave_chain != NULL) if (mb->ovecsave_chain != NULL)
{ {
new_recursive.ovec_save = mb->ovecsave_chain->saved_ovec; new_recursive.ovec_save = mb->ovecsave_chain->saved_ovec;
@ -1800,17 +1800,17 @@ for (;;)
mb->offset_end * sizeof(PCRE2_SIZE), mb->memctl.memory_data)); mb->offset_end * sizeof(PCRE2_SIZE), mb->memctl.memory_data));
if (fr == NULL) RRETURN(PCRE2_ERROR_NOMEMORY); if (fr == NULL) RRETURN(PCRE2_ERROR_NOMEMORY);
new_recursive.ovec_save = fr->saved_ovec; new_recursive.ovec_save = fr->saved_ovec;
} }
memcpy(new_recursive.ovec_save, mb->ovector, memcpy(new_recursive.ovec_save, mb->ovector,
mb->offset_end * sizeof(PCRE2_SIZE)); mb->offset_end * sizeof(PCRE2_SIZE));
/* Do the recursion. After processing each alternative, restore the /* Do the recursion. After processing each alternative, restore the
ovector data and the last captured value. This code has the same overall ovector data and the last captured value. This code has the same overall
logic as the code in the op_recurse_ovecsave() function, but is adapted logic as the code in the op_recurse_ovecsave() function, but is adapted
to use RMATCH/RRETURN and to release the heap block containing the saved to use RMATCH/RRETURN and to release the heap block containing the saved
ovector. */ ovector. */
cbegroup = (*callpat >= OP_SBRA); cbegroup = (*callpat >= OP_SBRA);
do do
{ {
@ -1821,51 +1821,51 @@ for (;;)
mb->offset_end * sizeof(PCRE2_SIZE)); mb->offset_end * sizeof(PCRE2_SIZE));
mb->capture_last = new_recursive.saved_capture_last; mb->capture_last = new_recursive.saved_capture_last;
mb->recursive = new_recursive.prevrec; mb->recursive = new_recursive.prevrec;
if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
{ {
fr = (ovecsave_frame *) fr = (ovecsave_frame *)
((uint8_t *)new_recursive.ovec_save - sizeof(ovecsave_frame *)); ((uint8_t *)new_recursive.ovec_save - sizeof(ovecsave_frame *));
fr->next = mb->ovecsave_chain; fr->next = mb->ovecsave_chain;
mb->ovecsave_chain = fr; mb->ovecsave_chain = fr;
/* Set where we got to in the subject, and reset the start, in case /* Set where we got to in the subject, and reset the start, in case
it was changed by \K. This *is* propagated back out of a recursion, it was changed by \K. This *is* propagated back out of a recursion,
for Perl compatibility. */ for Perl compatibility. */
eptr = mb->end_match_ptr; eptr = mb->end_match_ptr;
mstart = mb->start_match_ptr; mstart = mb->start_match_ptr;
goto RECURSION_MATCHED; /* Exit loop; end processing */ goto RECURSION_MATCHED; /* Exit loop; end processing */
} }
/* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a /* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
recursion; they cause a NOMATCH for the entire recursion. These codes recursion; they cause a NOMATCH for the entire recursion. These codes
are defined in a range that can be tested for. */ are defined in a range that can be tested for. */
if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX) if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
{ {
rrc = MATCH_NOMATCH; rrc = MATCH_NOMATCH;
goto RECURSION_RETURN; goto RECURSION_RETURN;
} }
/* Any return code other than NOMATCH is an error. */ /* Any return code other than NOMATCH is an error. */
if (rrc != MATCH_NOMATCH) goto RECURSION_RETURN; if (rrc != MATCH_NOMATCH) goto RECURSION_RETURN;
mb->recursive = &new_recursive; mb->recursive = &new_recursive;
callpat += GET(callpat, 1); callpat += GET(callpat, 1);
} }
while (*callpat == OP_ALT); while (*callpat == OP_ALT);
RECURSION_RETURN: RECURSION_RETURN:
mb->recursive = new_recursive.prevrec; mb->recursive = new_recursive.prevrec;
fr = (ovecsave_frame *) fr = (ovecsave_frame *)
((uint8_t *)new_recursive.ovec_save - sizeof(ovecsave_frame *)); ((uint8_t *)new_recursive.ovec_save - sizeof(ovecsave_frame *));
fr->next = mb->ovecsave_chain; fr->next = mb->ovecsave_chain;
mb->ovecsave_chain = fr; mb->ovecsave_chain = fr;
RRETURN(rrc); RRETURN(rrc);
} }
RECURSION_MATCHED: RECURSION_MATCHED:
break; break;
/* An alternation is the end of a branch; scan along to find the end of the /* An alternation is the end of a branch; scan along to find the end of the
@ -1942,7 +1942,7 @@ for (;;)
mb->end_match_ptr = eptr; /* For ONCE_NC */ mb->end_match_ptr = eptr; /* For ONCE_NC */
mb->end_offset_top = offset_top; mb->end_offset_top = offset_top;
mb->start_match_ptr = mstart; mb->start_match_ptr = mstart;
if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr; if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
RRETURN(MATCH_MATCH); /* Sets mb->mark */ RRETURN(MATCH_MATCH); /* Sets mb->mark */
} }
@ -1966,7 +1966,7 @@ for (;;)
{ {
mb->end_match_ptr = eptr; mb->end_match_ptr = eptr;
mb->start_match_ptr = mstart; mb->start_match_ptr = mstart;
if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr; if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
RRETURN(MATCH_MATCH); RRETURN(MATCH_MATCH);
} }
@ -2010,7 +2010,7 @@ for (;;)
mb->start_match_ptr = mstart; /* In case \K reset it */ mb->start_match_ptr = mstart; /* In case \K reset it */
mb->end_match_ptr = eptr; mb->end_match_ptr = eptr;
mb->end_offset_top = offset_top; mb->end_offset_top = offset_top;
if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr; if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
RRETURN(MATCH_KETRPOS); RRETURN(MATCH_KETRPOS);
} }
@ -2230,8 +2230,8 @@ for (;;)
else else
{ {
PCRE2_SPTR nextptr = eptr + 1; PCRE2_SPTR nextptr = eptr + 1;
FORWARDCHARTEST(nextptr, mb->end_subject); FORWARDCHARTEST(nextptr, mb->end_subject);
if (nextptr > mb->last_used_ptr) mb->last_used_ptr = nextptr; if (nextptr > mb->last_used_ptr) mb->last_used_ptr = nextptr;
GETCHAR(c, eptr); GETCHAR(c, eptr);
if ((mb->poptions & PCRE2_UCP) != 0) if ((mb->poptions & PCRE2_UCP) != 0)
{ {
@ -2282,7 +2282,7 @@ for (;;)
} }
else else
{ {
if (eptr >= mb->last_used_ptr) mb->last_used_ptr = eptr + 1; if (eptr >= mb->last_used_ptr) mb->last_used_ptr = eptr + 1;
#ifdef SUPPORT_UNICODE #ifdef SUPPORT_UNICODE
if ((mb->poptions & PCRE2_UCP) != 0) if ((mb->poptions & PCRE2_UCP) != 0)
{ {
@ -2297,7 +2297,7 @@ for (;;)
#endif #endif
cur_is_word = MAX_255(*eptr) cur_is_word = MAX_255(*eptr)
&& ((mb->ctypes[*eptr] & ctype_word) != 0); && ((mb->ctypes[*eptr] & ctype_word) != 0);
} }
} }
/* Now see if the situation is what we want */ /* Now see if the situation is what we want */
@ -2689,7 +2689,7 @@ for (;;)
/* Match a back reference, possibly repeatedly. Look past the end of the /* Match a back reference, possibly repeatedly. Look past the end of the
item to see if there is repeat information following. item to see if there is repeat information following.
The OP_REF and OP_REFI opcodes are used for a reference to a numbered group The OP_REF and OP_REFI opcodes are used for a reference to a numbered group
or to a non-duplicated named group. For a duplicated named group, OP_DNREF or to a non-duplicated named group. For a duplicated named group, OP_DNREF
and OP_DNREFI are used. In this case we must scan the list of groups to and OP_DNREFI are used. In this case we must scan the list of groups to
@ -2705,7 +2705,7 @@ for (;;)
/* Initializing 'offset' avoids a compiler warning in the REF_REPEAT /* Initializing 'offset' avoids a compiler warning in the REF_REPEAT
code. */ code. */
offset = 0; offset = 0;
while (count-- > 0) while (count-- > 0)
{ {
@ -2721,7 +2721,7 @@ for (;;)
caseless = op == OP_REFI; caseless = op == OP_REFI;
offset = GET2(ecode, 1) << 1; /* Doubled ref number */ offset = GET2(ecode, 1) << 1; /* Doubled ref number */
ecode += 1 + IMM2_SIZE; ecode += 1 + IMM2_SIZE;
/* Set up for repetition, or handle the non-repeated case */ /* Set up for repetition, or handle the non-repeated case */
REF_REPEAT: REF_REPEAT:
@ -2750,7 +2750,7 @@ for (;;)
break; break;
default: /* No repeat follows */ default: /* No repeat follows */
{ {
int rc = match_ref(offset, offset_top, eptr, mb, caseless, &length); int rc = match_ref(offset, offset_top, eptr, mb, caseless, &length);
if (rc != 0) if (rc != 0)
{ {
@ -2758,7 +2758,7 @@ for (;;)
CHECK_PARTIAL(); CHECK_PARTIAL();
RRETURN(MATCH_NOMATCH); RRETURN(MATCH_NOMATCH);
} }
} }
eptr += length; eptr += length;
continue; /* With the main loop */ continue; /* With the main loop */
} }
@ -2769,16 +2769,16 @@ for (;;)
also continue if PCRE2_MATCH_UNSET_BACKREF is set, because this makes unset also continue if PCRE2_MATCH_UNSET_BACKREF is set, because this makes unset
group be have as a zero-length group. For any other unset cases, carrying group be have as a zero-length group. For any other unset cases, carrying
on will result in NOMATCH. */ on will result in NOMATCH. */
if (offset < offset_top && mb->ovector[offset] != PCRE2_UNSET) if (offset < offset_top && mb->ovector[offset] != PCRE2_UNSET)
{ {
if (mb->ovector[offset] == mb->ovector[offset + 1]) continue; if (mb->ovector[offset] == mb->ovector[offset + 1]) continue;
} }
else /* Group is not set */ else /* Group is not set */
{ {
if (min == 0 || (mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0) if (min == 0 || (mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
continue; continue;
} }
/* First, ensure the minimum number of matches are present. We get back /* First, ensure the minimum number of matches are present. We get back
the length of the reference string explicitly rather than passing the the length of the reference string explicitly rather than passing the
@ -2787,7 +2787,7 @@ for (;;)
for (i = 1; i <= min; i++) for (i = 1; i <= min; i++)
{ {
PCRE2_SIZE slength; PCRE2_SIZE slength;
int rc = match_ref(offset, offset_top, eptr, mb, caseless, &slength); int rc = match_ref(offset, offset_top, eptr, mb, caseless, &slength);
if (rc != 0) if (rc != 0)
{ {
if (rc > 0) eptr = mb->end_subject; /* Partial match */ if (rc > 0) eptr = mb->end_subject; /* Partial match */
@ -2808,13 +2808,13 @@ for (;;)
{ {
for (fi = min;; fi++) for (fi = min;; fi++)
{ {
int rc; int rc;
PCRE2_SIZE slength; PCRE2_SIZE slength;
RMATCH(eptr, ecode, offset_top, mb, eptrb, RM14); RMATCH(eptr, ecode, offset_top, mb, eptrb, RM14);
if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (fi >= max) RRETURN(MATCH_NOMATCH); if (fi >= max) RRETURN(MATCH_NOMATCH);
rc = match_ref(offset, offset_top, eptr, mb, caseless, &slength); rc = match_ref(offset, offset_top, eptr, mb, caseless, &slength);
if (rc != 0) if (rc != 0)
{ {
if (rc > 0) eptr = mb->end_subject; /* Partial match */ if (rc > 0) eptr = mb->end_subject; /* Partial match */
CHECK_PARTIAL(); CHECK_PARTIAL();
@ -2825,12 +2825,12 @@ for (;;)
/* Control never gets here */ /* Control never gets here */
} }
/* If maximizing, find the longest string and work backwards, as long as /* If maximizing, find the longest string and work backwards, as long as
the matched lengths for each iteration are the same. */ the matched lengths for each iteration are the same. */
else else
{ {
BOOL samelengths = TRUE; BOOL samelengths = TRUE;
pp = eptr; pp = eptr;
length = mb->ovector[offset+1] - mb->ovector[offset]; length = mb->ovector[offset+1] - mb->ovector[offset];
@ -2839,7 +2839,7 @@ for (;;)
PCRE2_SIZE slength; PCRE2_SIZE slength;
int rc = match_ref(offset, offset_top, eptr, mb, caseless, &slength); int rc = match_ref(offset, offset_top, eptr, mb, caseless, &slength);
if (rc != 0) if (rc != 0)
{ {
/* Can't use CHECK_PARTIAL because we don't want to update eptr in /* Can't use CHECK_PARTIAL because we don't want to update eptr in
the soft partial matching case. */ the soft partial matching case. */
@ -2857,14 +2857,14 @@ for (;;)
eptr += slength; eptr += slength;
} }
/* If the length matched for each repetition is the same as the length of /* If the length matched for each repetition is the same as the length of
the captured group, we can easily work backwards. This is the normal the captured group, we can easily work backwards. This is the normal
case. However, in caseless UTF-8 mode there are pairs of case-equivalent case. However, in caseless UTF-8 mode there are pairs of case-equivalent
characters whose lengths (in terms of code units) differ. However, this characters whose lengths (in terms of code units) differ. However, this
is very rare, so we handle it by re-matching fewer and fewer times. */ is very rare, so we handle it by re-matching fewer and fewer times. */
if (samelengths) if (samelengths)
{ {
while (eptr >= pp) while (eptr >= pp)
{ {
RMATCH(eptr, ecode, offset_top, mb, eptrb, RM15); RMATCH(eptr, ecode, offset_top, mb, eptrb, RM15);
@ -2872,20 +2872,20 @@ for (;;)
eptr -= length; eptr -= length;
} }
} }
/* The rare case of non-matching lengths. Re-scan the repetition for each /* The rare case of non-matching lengths. Re-scan the repetition for each
iteration. We know that match_ref() will succeed every time. */ iteration. We know that match_ref() will succeed every time. */
else else
{ {
max = i; max = i;
for (;;) for (;;)
{ {
RMATCH(eptr, ecode, offset_top, mb, eptrb, RM68); RMATCH(eptr, ecode, offset_top, mb, eptrb, RM68);
if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (eptr == pp) break; /* Failed after minimal repetition */ if (eptr == pp) break; /* Failed after minimal repetition */
eptr = pp; eptr = pp;
max--; max--;
for (i = min; i < max; i++) for (i = min; i < max; i++)
{ {
PCRE2_SIZE slength; PCRE2_SIZE slength;
@ -2893,8 +2893,8 @@ for (;;)
eptr += slength; eptr += slength;
} }
} }
} }
RRETURN(MATCH_NOMATCH); RRETURN(MATCH_NOMATCH);
} }
/* Control never gets here */ /* Control never gets here */
@ -6417,20 +6417,20 @@ with different endianness. */
if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8) if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
return PCRE2_ERROR_BADMODE; return PCRE2_ERROR_BADMODE;
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the /* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
options variable for this function. Users of PCRE2 who are not calling the options variable for this function. Users of PCRE2 who are not calling the
function directly would like to have a way of setting these flags, in the same function directly would like to have a way of setting these flags, in the same
way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be (*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be
transferred to the options for this function. The bits are guaranteed to be transferred to the options for this function. The bits are guaranteed to be
adjacent, but do not have the same values. This bit of Boolean trickery assumes adjacent, but do not have the same values. This bit of Boolean trickery assumes
that the match-time bits are not more significant than the flag bits. If by that the match-time bits are not more significant than the flag bits. If by
accident this is not the case, a compile-time division by zero error will accident this is not the case, a compile-time division by zero error will
occur. */ occur. */
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET) #define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART) #define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
options |= (re->flags & FF) / ((FF & -FF) / (OO & -OO)); options |= (re->flags & FF) / ((FF & -FF) / (OO & -OO));
#undef FF #undef FF
@ -6541,7 +6541,7 @@ mb->match_limit = (mcontext->match_limit < re->limit_match)?
mcontext->match_limit : re->limit_match; mcontext->match_limit : re->limit_match;
mb->match_limit_recursion = (mcontext->recursion_limit < re->limit_recursion)? mb->match_limit_recursion = (mcontext->recursion_limit < re->limit_recursion)?
mcontext->recursion_limit : re->limit_recursion; mcontext->recursion_limit : re->limit_recursion;
/* Pointers to the individual character tables */ /* Pointers to the individual character tables */
mb->lcc = re->tables + lcc_offset; mb->lcc = re->tables + lcc_offset;
@ -6580,7 +6580,7 @@ switch(re->newline_convention)
default: return PCRE2_ERROR_INTERNAL; default: return PCRE2_ERROR_INTERNAL;
} }
/* If the expression has got more back references than the offsets supplied can /* If the expression has got more back references than the offsets supplied can
hold, we get a temporary chunk of memory to use during the matching. Otherwise, hold, we get a temporary chunk of memory to use during the matching. Otherwise,
we can use the vector supplied. The size of the ovector is three times the we can use the vector supplied. The size of the ovector is three times the
@ -6854,7 +6854,7 @@ for(;;)
mb->start_match_ptr = start_match; mb->start_match_ptr = start_match;
mb->start_used_ptr = start_match; mb->start_used_ptr = start_match;
mb->last_used_ptr = start_match; mb->last_used_ptr = start_match;
mb->match_call_count = 0; mb->match_call_count = 0;
mb->match_function_type = 0; mb->match_function_type = 0;
mb->end_offset_top = 0; mb->end_offset_top = 0;
@ -6990,7 +6990,7 @@ while (mb->ovecsave_chain != NULL)
ovecsave_frame *this = mb->ovecsave_chain; ovecsave_frame *this = mb->ovecsave_chain;
mb->ovecsave_chain = this->next; mb->ovecsave_chain = this->next;
mb->memctl.free(this, mb->memctl.memory_data); mb->memctl.free(this, mb->memctl.memory_data);
} }
/* Fill in fields that are always returned in the match data. */ /* Fill in fields that are always returned in the match data. */
@ -7057,9 +7057,9 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
match_data->ovector[0] = mb->start_match_ptr - mb->start_subject; match_data->ovector[0] = mb->start_match_ptr - mb->start_subject;
match_data->ovector[1] = mb->end_match_ptr - mb->start_subject; match_data->ovector[1] = mb->end_match_ptr - mb->start_subject;
} }
/* Set the remaining returned values */ /* Set the remaining returned values */
match_data->startchar = start_match - subject; match_data->startchar = start_match - subject;
match_data->leftchar = mb->start_used_ptr - subject; match_data->leftchar = mb->start_used_ptr - subject;
match_data->rightchar = ((mb->last_used_ptr > mb->end_match_ptr)? match_data->rightchar = ((mb->last_used_ptr > mb->end_match_ptr)?
@ -7068,7 +7068,7 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
} }
/* Control gets here if there has been a partial match, an error, or if the /* Control gets here if there has been a partial match, an error, or if the
overall match attempt has failed at all permitted starting positions. Any mark overall match attempt has failed at all permitted starting positions. Any mark
data is in the nomatch_mark field. */ data is in the nomatch_mark field. */
match_data->mark = mb->nomatch_mark; match_data->mark = mb->nomatch_mark;

View File

@ -72,10 +72,10 @@ return yield;
*************************************************/ *************************************************/
PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
pcre2_match_data_create_from_pattern(pcre2_code *code, pcre2_match_data_create_from_pattern(pcre2_code *code,
pcre2_general_context *gcontext) pcre2_general_context *gcontext)
{ {
return pcre2_match_data_create(((pcre2_real_code *)code)->top_bracket + 1, return pcre2_match_data_create(((pcre2_real_code *)code)->top_bracket + 1,
gcontext); gcontext);
} }
@ -88,7 +88,7 @@ return pcre2_match_data_create(((pcre2_real_code *)code)->top_bracket + 1,
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_match_data_free(pcre2_match_data *match_data) pcre2_match_data_free(pcre2_match_data *match_data)
{ {
if (match_data != NULL) if (match_data != NULL)
match_data->memctl.free(match_data, match_data->memctl.memory_data); match_data->memctl.free(match_data, match_data->memctl.memory_data);
} }

View File

@ -60,9 +60,9 @@ http://unicode.org/unicode/reports/tr18/. */
* Check for newline at given position * * Check for newline at given position *
*************************************************/ *************************************************/
/* This function is called only via the IS_NEWLINE macro, which does so only /* This function is called only via the IS_NEWLINE macro, which does so only
when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the code unit newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the code unit
pointed to by ptr is less than the end of the string. pointed to by ptr is less than the end of the string.
Arguments: Arguments:
@ -76,7 +76,7 @@ Returns: TRUE or FALSE
*/ */
BOOL BOOL
PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr, PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr,
uint32_t *lenptr, BOOL utf) uint32_t *lenptr, BOOL utf)
{ {
uint32_t c; uint32_t c;
@ -90,15 +90,15 @@ c = *ptr;
if (type == NLTYPE_ANYCRLF) switch(c) if (type == NLTYPE_ANYCRLF) switch(c)
{ {
case CHAR_LF: case CHAR_LF:
*lenptr = 1; *lenptr = 1;
return TRUE; return TRUE;
case CHAR_CR: case CHAR_CR:
*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1; *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
return TRUE; return TRUE;
default: default:
return FALSE; return FALSE;
} }
@ -111,8 +111,8 @@ else switch(c)
#endif #endif
case CHAR_LF: case CHAR_LF:
case CHAR_VT: case CHAR_VT:
case CHAR_FF: case CHAR_FF:
*lenptr = 1; *lenptr = 1;
return TRUE; return TRUE;
case CHAR_CR: case CHAR_CR:
@ -121,25 +121,25 @@ else switch(c)
#ifndef EBCDIC #ifndef EBCDIC
#if PCRE2_CODE_UNIT_WIDTH == 8 #if PCRE2_CODE_UNIT_WIDTH == 8
case CHAR_NEL: case CHAR_NEL:
*lenptr = utf? 2 : 1; *lenptr = utf? 2 : 1;
return TRUE; return TRUE;
case 0x2028: /* LS */ case 0x2028: /* LS */
case 0x2029: /* PS */ case 0x2029: /* PS */
*lenptr = 3; *lenptr = 3;
return TRUE; return TRUE;
#else /* 16-bit or 32-bit code units */ #else /* 16-bit or 32-bit code units */
case CHAR_NEL: case CHAR_NEL:
case 0x2028: /* LS */ case 0x2028: /* LS */
case 0x2029: /* PS */ case 0x2029: /* PS */
*lenptr = 1; *lenptr = 1;
return TRUE; return TRUE;
#endif #endif
#endif /* Not EBCDIC */ #endif /* Not EBCDIC */
default: default:
return FALSE; return FALSE;
} }
} }
@ -166,7 +166,7 @@ Returns: TRUE or FALSE
*/ */
BOOL BOOL
PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr, PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr,
uint32_t *lenptr, BOOL utf) uint32_t *lenptr, BOOL utf)
{ {
uint32_t c; uint32_t c;
@ -190,11 +190,11 @@ if (type == NLTYPE_ANYCRLF) switch(c)
*lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1; *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
return TRUE; return TRUE;
case CHAR_CR: case CHAR_CR:
*lenptr = 1; *lenptr = 1;
return TRUE; return TRUE;
default: default:
return FALSE; return FALSE;
} }
@ -211,31 +211,31 @@ else switch(c)
#endif #endif
case CHAR_VT: case CHAR_VT:
case CHAR_FF: case CHAR_FF:
case CHAR_CR: case CHAR_CR:
*lenptr = 1; *lenptr = 1;
return TRUE; return TRUE;
#ifndef EBCDIC #ifndef EBCDIC
#if PCRE2_CODE_UNIT_WIDTH == 8 #if PCRE2_CODE_UNIT_WIDTH == 8
case CHAR_NEL: case CHAR_NEL:
*lenptr = utf? 2 : 1; *lenptr = utf? 2 : 1;
return TRUE; return TRUE;
case 0x2028: /* LS */ case 0x2028: /* LS */
case 0x2029: /* PS */ case 0x2029: /* PS */
*lenptr = 3; *lenptr = 3;
return TRUE; return TRUE;
#else /* 16-bit or 32-bit code units */ #else /* 16-bit or 32-bit code units */
case CHAR_NEL: case CHAR_NEL:
case 0x2028: /* LS */ case 0x2028: /* LS */
case 0x2029: /* PS */ case 0x2029: /* PS */
*lenptr = 1; *lenptr = 1;
return TRUE; return TRUE;
#endif #endif
#endif /* Not EBCDIC */ #endif /* Not EBCDIC */
default: default:
return FALSE; return FALSE;
} }
} }

View File

@ -89,17 +89,17 @@ if (where == NULL) /* Requests field length */
case PCRE2_INFO_NAMECOUNT: case PCRE2_INFO_NAMECOUNT:
case PCRE2_INFO_NEWLINE: case PCRE2_INFO_NEWLINE:
case PCRE2_INFO_RECURSIONLIMIT: case PCRE2_INFO_RECURSIONLIMIT:
return sizeof(uint32_t); return sizeof(uint32_t);
case PCRE2_INFO_FIRSTBITMAP: case PCRE2_INFO_FIRSTBITMAP:
return sizeof(const uint8_t *); return sizeof(const uint8_t *);
case PCRE2_INFO_JITSIZE: case PCRE2_INFO_JITSIZE:
case PCRE2_INFO_SIZE: case PCRE2_INFO_SIZE:
return sizeof(size_t); return sizeof(size_t);
case PCRE2_INFO_NAMETABLE: case PCRE2_INFO_NAMETABLE:
return sizeof(PCRE2_SPTR); return sizeof(PCRE2_SPTR);
} }
} }

View File

@ -41,8 +41,8 @@ POSSIBILITY OF SUCH DAMAGE.
/* This module contains a PCRE private debugging function for printing out the /* This module contains a PCRE private debugging function for printing out the
internal form of a compiled regular expression, along with some supporting internal form of a compiled regular expression, along with some supporting
local functions. This source file is #included in pcre2test.c at each supported local functions. This source file is #included in pcre2test.c at each supported
code unit width, with PCRE2_SUFFIX set appropriately, just like the functions code unit width, with PCRE2_SUFFIX set appropriately, just like the functions
that comprise the library. */ that comprise the library. */
@ -82,9 +82,9 @@ Arguments:
f file to write to f file to write to
ptr pointer to first code unit of the character ptr pointer to first code unit of the character
utf TRUE if string is UTF (will be FALSE if UTF is not supported) utf TRUE if string is UTF (will be FALSE if UTF is not supported)
Returns: number of additional code units used Returns: number of additional code units used
*/ */
static unsigned int static unsigned int
print_char(FILE *f, PCRE2_SPTR ptr, BOOL utf) print_char(FILE *f, PCRE2_SPTR ptr, BOOL utf)
@ -105,7 +105,7 @@ if (utf)
one_code_unit = (c & 0xfffff800u) != 0xd800u; one_code_unit = (c & 0xfffff800u) != 0xd800u;
#endif /* CODE_UNIT_WIDTH */ #endif /* CODE_UNIT_WIDTH */
} }
#endif /* SUPPORT_UNICODE */ #endif /* SUPPORT_UNICODE */
/* Handle a valid one-code-unit character at any width. */ /* Handle a valid one-code-unit character at any width. */
@ -115,10 +115,10 @@ if (one_code_unit)
else if (c < 0x80) fprintf(f, "\\x%02x", c); else if (c < 0x80) fprintf(f, "\\x%02x", c);
else fprintf(f, "\\x{%02x}", c); else fprintf(f, "\\x{%02x}", c);
return 0; return 0;
} }
/* Code for invalid UTF code units and multi-unit UTF characters is different /* Code for invalid UTF code units and multi-unit UTF characters is different
for each width. If UTF is not supported, control should never get here, but we for each width. If UTF is not supported, control should never get here, but we
need a return statement to keep the compiler happy. */ need a return statement to keep the compiler happy. */
#ifndef SUPPORT_UNICODE #ifndef SUPPORT_UNICODE
@ -134,10 +134,10 @@ if ((c & 0xc0) != 0xc0)
{ {
fprintf(f, "\\X{%x}", c); /* Invalid starting byte */ fprintf(f, "\\X{%x}", c); /* Invalid starting byte */
return 0; return 0;
} }
else else
{ {
int i; int i;
int a = utf8_table4[c & 0x3f]; /* Number of additional bytes */ int a = utf8_table4[c & 0x3f]; /* Number of additional bytes */
int s = 6*a; int s = 6*a;
c = (c & utf8_table3[a]) << s; c = (c & utf8_table3[a]) << s;
@ -153,7 +153,7 @@ else
} }
fprintf(f, "\\x{%x}", c); fprintf(f, "\\x{%x}", c);
return a; return a;
} }
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
/* UTF-16: rather than swallow a low surrogate, just stop if we hit a bad one. /* UTF-16: rather than swallow a low surrogate, just stop if we hit a bad one.
@ -173,7 +173,7 @@ return 1;
/* For UTF-32 we get here only for a malformed code unit, which should only /* For UTF-32 we get here only for a malformed code unit, which should only
occur if the sanity check has been turned off. Print it with \X instead of \x occur if the sanity check has been turned off. Print it with \X instead of \x
as an indication. */ as an indication. */
#if PCRE2_CODE_UNIT_WIDTH == 32 #if PCRE2_CODE_UNIT_WIDTH == 32
fprintf(f, "\\X{%x}", c); fprintf(f, "\\X{%x}", c);
return 0; return 0;
@ -187,15 +187,15 @@ return 0;
* Print string as a list of code units * * Print string as a list of code units *
*************************************************/ *************************************************/
/* This takes no account of UTF as it always prints each individual code unit. /* This takes no account of UTF as it always prints each individual code unit.
The string is zero-terminated. The string is zero-terminated.
Arguments: Arguments:
f file to write to f file to write to
ptr point to the string ptr point to the string
Returns: nothing Returns: nothing
*/ */
static void static void
print_custring(FILE *f, PCRE2_SPTR ptr) print_custring(FILE *f, PCRE2_SPTR ptr)
@ -213,9 +213,9 @@ while (*ptr != '\0')
* Find Unicode property name * * Find Unicode property name *
*************************************************/ *************************************************/
/* When there is no UTF/UCP support, the table of names does not exist. This /* When there is no UTF/UCP support, the table of names does not exist. This
function should not be called in such configurations, because a pattern that function should not be called in such configurations, because a pattern that
tries to use Unicode properties won't compile. Rather than put lots of #ifdefs tries to use Unicode properties won't compile. Rather than put lots of #ifdefs
into the main code, however, we just put one into this function. */ into the main code, however, we just put one into this function. */
static const char * static const char *
@ -244,15 +244,15 @@ return "??";
/* "Normal" properties can be printed from tables. The PT_CLIST property is a /* "Normal" properties can be printed from tables. The PT_CLIST property is a
pseudo-property that contains a pointer to a list of case-equivalent pseudo-property that contains a pointer to a list of case-equivalent
characters. characters.
Arguments: Arguments:
f file to write to f file to write to
code pointer in the compiled code code pointer in the compiled code
before text to print before before text to print before
after text to print after after text to print after
Returns: nothing Returns: nothing
*/ */
static void static void
@ -281,14 +281,14 @@ else
/* The print_lengths flag controls whether offsets and lengths of items are /* The print_lengths flag controls whether offsets and lengths of items are
printed. Lenths can be turned off from pcre2test so that automatic tests on printed. Lenths can be turned off from pcre2test so that automatic tests on
bytecode can be written that do not depend on the value of LINK_SIZE. bytecode can be written that do not depend on the value of LINK_SIZE.
Arguments: Arguments:
re a compiled pattern re a compiled pattern
f the file to write to f the file to write to
print_lenghts show various lengths print_lenghts show various lengths
Returns: nothing Returns: nothing
*/ */
static void static void
@ -460,7 +460,7 @@ for(;;)
case OP_TYPEMINQUERY: case OP_TYPEMINQUERY:
case OP_TYPEPOSQUERY: case OP_TYPEPOSQUERY:
fprintf(f, " %s ", flag); fprintf(f, " %s ", flag);
if (*code >= OP_TYPESTAR) if (*code >= OP_TYPESTAR)
{ {
if (code[1] == OP_PROP || code[1] == OP_NOTPROP) if (code[1] == OP_PROP || code[1] == OP_NOTPROP)

View File

@ -39,7 +39,7 @@ POSSIBILITY OF SUCH DAMAGE.
*/ */
/* This module contains internal functions for comparing and finding the length /* This module contains internal functions for comparing and finding the length
of strings. These are used instead of strcmp() etc because the standard of strings. These are used instead of strcmp() etc because the standard
functions work only on 8-bit data. */ functions work only on 8-bit data. */
@ -54,7 +54,7 @@ functions work only on 8-bit data. */
* Compare two zero-terminated PCRE2 strings * * Compare two zero-terminated PCRE2 strings *
*************************************************/ *************************************************/
/* /*
Arguments: Arguments:
str1 first string str1 first string
str2 second string str2 second string
@ -80,7 +80,7 @@ return 0;
* Compare zero-terminated PCRE2 & 8-bit strings * * Compare zero-terminated PCRE2 & 8-bit strings *
*************************************************/ *************************************************/
/* As the 8-bit string is almost always a literal, its type is specified as /* As the 8-bit string is almost always a literal, its type is specified as
const char *. const char *.
Arguments: Arguments:
@ -108,7 +108,7 @@ return 0;
* Compare two PCRE2 strings, given a length * * Compare two PCRE2 strings, given a length *
*************************************************/ *************************************************/
/* /*
Arguments: Arguments:
str1 first string str1 first string
str2 second string str2 second string
@ -135,7 +135,7 @@ return 0;
* Compare PCRE2 string to 8-bit string by length * * Compare PCRE2 string to 8-bit string by length *
*************************************************/ *************************************************/
/* As the 8-bit string is almost always a literal, its type is specified as /* As the 8-bit string is almost always a literal, its type is specified as
const char *. const char *.
Arguments: Arguments:
@ -164,7 +164,7 @@ return 0;
* Find the length of a PCRE2 string * * Find the length of a PCRE2 string *
*************************************************/ *************************************************/
/* /*
Argument: the string Argument: the string
Returns: the length Returns: the length
*/ */
@ -185,9 +185,9 @@ return c;
/* Arguments: /* Arguments:
str1 buffer to receive the string str1 buffer to receive the string
str2 8-bit string to be copied str2 8-bit string to be copied
Returns: the number of code units used (excluding trailing zero) Returns: the number of code units used (excluding trailing zero)
*/ */
int int
PRIV(strcpy_c8)(PCRE2_UCHAR *str1, const char *str2) PRIV(strcpy_c8)(PCRE2_UCHAR *str1, const char *str2)

View File

@ -74,7 +74,7 @@ Arguments:
code pointer to start of group (the bracket) code pointer to start of group (the bracket)
startcode pointer to start of the whole pattern's code startcode pointer to start of the whole pattern's code
recurse_depth RECURSE depth recurse_depth RECURSE depth
utf UTF flag utf UTF flag
Returns: the minimum length Returns: the minimum length
-1 if \C in UTF-8 mode or (*ACCEPT) was encountered -1 if \C in UTF-8 mode or (*ACCEPT) was encountered
@ -388,10 +388,10 @@ for (;;)
if ((re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0) if ((re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
{ {
int count = GET2(cc, 1+IMM2_SIZE); int count = GET2(cc, 1+IMM2_SIZE);
PCRE2_UCHAR *slot = PCRE2_UCHAR *slot =
(PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) + (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) +
GET2(cc, 1) * re->name_entry_size; GET2(cc, 1) * re->name_entry_size;
d = INT_MAX; d = INT_MAX;
while (count-- > 0) while (count-- > 0)
{ {
@ -579,7 +579,7 @@ for (;;)
*************************************************/ *************************************************/
/* Given a character, set its first code unit's bit in the table, and also the /* Given a character, set its first code unit's bit in the table, and also the
corresponding bit for the other version of a letter if we are caseless. corresponding bit for the other version of a letter if we are caseless.
Arguments: Arguments:
re points to the regex block re points to the regex block
@ -590,20 +590,20 @@ Arguments:
Returns: pointer after the character Returns: pointer after the character
*/ */
static PCRE2_SPTR static PCRE2_SPTR
set_table_bit(pcre2_real_code *re, PCRE2_SPTR p, BOOL caseless, BOOL utf) set_table_bit(pcre2_real_code *re, PCRE2_SPTR p, BOOL caseless, BOOL utf)
{ {
uint32_t c = *p++; /* First code unit */ uint32_t c = *p++; /* First code unit */
(void)utf; /* Stop compiler warning when UTF not supported */ (void)utf; /* Stop compiler warning when UTF not supported */
/* In 16-bit and 32-bit modes, code units greater than 0xff set the bit for /* In 16-bit and 32-bit modes, code units greater than 0xff set the bit for
0xff. */ 0xff. */
#if PCRE2_CODE_UNIT_WIDTH != 8 #if PCRE2_CODE_UNIT_WIDTH != 8
if (c > 0xff) SET_BIT(0xff); else if (c > 0xff) SET_BIT(0xff); else
#endif #endif
SET_BIT(c); SET_BIT(c);
/* In UTF-8 or UTF-16 mode, pick up the remaining code units in order to find /* In UTF-8 or UTF-16 mode, pick up the remaining code units in order to find
the end of the character, even when caseless. */ the end of the character, even when caseless. */
@ -617,7 +617,7 @@ if (utf)
if ((c & 0xfc00) == 0xd800) GETUTF16INC(c, p); if ((c & 0xfc00) == 0xd800) GETUTF16INC(c, p);
#endif #endif
} }
#endif /* SUPPORT_UNICODE */ #endif /* SUPPORT_UNICODE */
/* If caseless, handle the other case of the character. */ /* If caseless, handle the other case of the character. */
@ -669,7 +669,7 @@ static void
set_type_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit) set_type_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit)
{ {
register uint32_t c; register uint32_t c;
for (c = 0; c < table_limit; c++) for (c = 0; c < table_limit; c++)
re->start_bitmap[c] |= re->tables[c+cbits_offset+cbit_type]; re->start_bitmap[c] |= re->tables[c+cbits_offset+cbit_type];
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
if (table_limit == 32) return; if (table_limit == 32) return;
@ -710,7 +710,7 @@ static void
set_nottype_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit) set_nottype_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit)
{ {
register uint32_t c; register uint32_t c;
for (c = 0; c < table_limit; c++) for (c = 0; c < table_limit; c++)
re->start_bitmap[c] |= ~(re->tables[c+cbits_offset+cbit_type]); re->start_bitmap[c] |= ~(re->tables[c+cbits_offset+cbit_type]);
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmap[c] = 0xff; if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmap[c] = 0xff;
@ -724,10 +724,10 @@ if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmap[c] = 0xff;
*************************************************/ *************************************************/
/* This function scans a compiled unanchored expression recursively and /* This function scans a compiled unanchored expression recursively and
attempts to build a bitmap of the set of possible starting code units whose attempts to build a bitmap of the set of possible starting code units whose
values are less than 256. In 16-bit and 32-bit mode, values above 255 all cause values are less than 256. In 16-bit and 32-bit mode, values above 255 all cause
the 255 bit to be set. When calling set[_not]_type_bits() in UTF-8 (sic) mode the 255 bit to be set. When calling set[_not]_type_bits() in UTF-8 (sic) mode
we pass a value of 16 rather than 32 as the final argument. (See comments in we pass a value of 16 rather than 32 as the final argument. (See comments in
those functions for the reason.) those functions for the reason.)
The SSB_CONTINUE return is useful for parenthesized groups in patterns such as The SSB_CONTINUE return is useful for parenthesized groups in patterns such as
@ -769,8 +769,8 @@ do
while (try_next) /* Loop for items in this branch */ while (try_next) /* Loop for items in this branch */
{ {
int rc; int rc;
uint8_t *classmap = NULL; uint8_t *classmap = NULL;
switch(*tcode) switch(*tcode)
{ {
/* If we reach something we don't understand, it means a new opcode has /* If we reach something we don't understand, it means a new opcode has
@ -854,31 +854,31 @@ do
case OP_THEN: case OP_THEN:
case OP_THEN_ARG: case OP_THEN_ARG:
return SSB_FAIL; return SSB_FAIL;
/* A "real" property test implies no starting bits, but the fake property /* A "real" property test implies no starting bits, but the fake property
PT_CLIST identifies a list of characters. These lists are short, as they PT_CLIST identifies a list of characters. These lists are short, as they
are used for characters with more than one "other case", so there is no are used for characters with more than one "other case", so there is no
point in recognizing them for OP_NOTPROP. */ point in recognizing them for OP_NOTPROP. */
case OP_PROP: case OP_PROP:
if (tcode[1] != PT_CLIST) return SSB_FAIL; if (tcode[1] != PT_CLIST) return SSB_FAIL;
{ {
const uint32_t *p = PRIV(ucd_caseless_sets) + tcode[2]; const uint32_t *p = PRIV(ucd_caseless_sets) + tcode[2];
while ((c = *p++) < NOTACHAR) while ((c = *p++) < NOTACHAR)
{ {
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
if (utf) if (utf)
{ {
PCRE2_UCHAR buff[6]; PCRE2_UCHAR buff[6];
(void)PRIV(ord2utf)(c, buff); (void)PRIV(ord2utf)(c, buff);
c = buff[0]; c = buff[0];
} }
#endif #endif
if (c > 0xff) SET_BIT(0xff); else SET_BIT(c); if (c > 0xff) SET_BIT(0xff); else SET_BIT(c);
} }
} }
try_next = FALSE; try_next = FALSE;
break; break;
/* We can ignore word boundary tests. */ /* We can ignore word boundary tests. */
@ -1032,14 +1032,14 @@ do
SET_BIT(CHAR_HT); SET_BIT(CHAR_HT);
SET_BIT(CHAR_SPACE); SET_BIT(CHAR_SPACE);
/* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set /* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set
the bits for 0xA0 and for code units >= 255, independently of UTF. */ the bits for 0xA0 and for code units >= 255, independently of UTF. */
#if PCRE2_CODE_UNIT_WIDTH != 8 #if PCRE2_CODE_UNIT_WIDTH != 8
SET_BIT(0xA0); SET_BIT(0xA0);
SET_BIT(0xFF); SET_BIT(0xFF);
#else #else
/* For the 8-bit library in UTF-8 mode, set the bits for the first code /* For the 8-bit library in UTF-8 mode, set the bits for the first code
units of horizontal space characters. */ units of horizontal space characters. */
#ifdef SUPPORT_UNICODE #ifdef SUPPORT_UNICODE
@ -1052,7 +1052,7 @@ do
} }
else else
#endif #endif
/* For the 8-bit library not in UTF-8 mode, set the bit for 0xA0, unless /* For the 8-bit library not in UTF-8 mode, set the bit for 0xA0, unless
the code is EBCDIC. */ the code is EBCDIC. */
{ {
#ifndef EBCDIC #ifndef EBCDIC
@ -1060,7 +1060,7 @@ do
#endif /* Not EBCDIC */ #endif /* Not EBCDIC */
} }
#endif /* 8-bit support */ #endif /* 8-bit support */
try_next = FALSE; try_next = FALSE;
break; break;
@ -1071,16 +1071,16 @@ do
SET_BIT(CHAR_FF); SET_BIT(CHAR_FF);
SET_BIT(CHAR_CR); SET_BIT(CHAR_CR);
/* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set /* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set
the bits for NEL and for code units >= 255, independently of UTF. */ the bits for NEL and for code units >= 255, independently of UTF. */
#if PCRE2_CODE_UNIT_WIDTH != 8 #if PCRE2_CODE_UNIT_WIDTH != 8
SET_BIT(CHAR_NEL); SET_BIT(CHAR_NEL);
SET_BIT(0xFF); SET_BIT(0xFF);
#else #else
/* For the 8-bit library in UTF-8 mode, set the bits for the first code /* For the 8-bit library in UTF-8 mode, set the bits for the first code
units of vertical space characters. */ units of vertical space characters. */
#ifdef SUPPORT_UNICODE #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ {
@ -1093,8 +1093,8 @@ do
{ {
SET_BIT(CHAR_NEL); SET_BIT(CHAR_NEL);
} }
#endif /* 8-bit support */ #endif /* 8-bit support */
try_next = FALSE; try_next = FALSE;
break; break;
@ -1166,7 +1166,7 @@ do
case OP_ANY: case OP_ANY:
case OP_ALLANY: case OP_ALLANY:
return SSB_FAIL; return SSB_FAIL;
case OP_HSPACE: case OP_HSPACE:
SET_BIT(CHAR_HT); SET_BIT(CHAR_HT);
SET_BIT(CHAR_SPACE); SET_BIT(CHAR_SPACE);
@ -1178,7 +1178,7 @@ do
SET_BIT(0xA0); SET_BIT(0xA0);
SET_BIT(0xFF); SET_BIT(0xFF);
#else #else
/* For the 8-bit library in UTF-8 mode, set the bits for the first code /* For the 8-bit library in UTF-8 mode, set the bits for the first code
units of horizontal space characters. */ units of horizontal space characters. */
#ifdef SUPPORT_UNICODE #ifdef SUPPORT_UNICODE
@ -1191,7 +1191,7 @@ do
} }
else else
#endif #endif
/* For the 8-bit library not in UTF-8 mode, set the bit for 0xA0, unless /* For the 8-bit library not in UTF-8 mode, set the bit for 0xA0, unless
the code is EBCDIC. */ the code is EBCDIC. */
{ {
#ifndef EBCDIC #ifndef EBCDIC
@ -1208,16 +1208,16 @@ do
SET_BIT(CHAR_FF); SET_BIT(CHAR_FF);
SET_BIT(CHAR_CR); SET_BIT(CHAR_CR);
/* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set /* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set
the bits for NEL and for code units >= 255, independently of UTF. */ the bits for NEL and for code units >= 255, independently of UTF. */
#if PCRE2_CODE_UNIT_WIDTH != 8 #if PCRE2_CODE_UNIT_WIDTH != 8
SET_BIT(CHAR_NEL); SET_BIT(CHAR_NEL);
SET_BIT(0xFF); SET_BIT(0xFF);
#else #else
/* For the 8-bit library in UTF-8 mode, set the bits for the first code /* For the 8-bit library in UTF-8 mode, set the bits for the first code
units of vertical space characters. */ units of vertical space characters. */
#ifdef SUPPORT_UNICODE #ifdef SUPPORT_UNICODE
if (utf) if (utf)
{ {
@ -1230,7 +1230,7 @@ do
{ {
SET_BIT(CHAR_NEL); SET_BIT(CHAR_NEL);
} }
#endif /* 8-bit support */ #endif /* 8-bit support */
break; break;
case OP_NOT_DIGIT: case OP_NOT_DIGIT:
@ -1260,8 +1260,8 @@ do
tcode += 2; tcode += 2;
break; break;
/* Extended class: if there are any property checks, or if this is a /* Extended class: if there are any property checks, or if this is a
negative XCLASS without a map, give up. If there are no property checks, negative XCLASS without a map, give up. If there are no property checks,
there must be wide characters on the XCLASS list, because otherwise an there must be wide characters on the XCLASS list, because otherwise an
XCLASS would not have been created. This means that code points >= 255 XCLASS would not have been created. This means that code points >= 255
@ -1270,19 +1270,19 @@ do
#ifdef SUPPORT_WIDE_CHARS #ifdef SUPPORT_WIDE_CHARS
case OP_XCLASS: case OP_XCLASS:
if ((tcode[1 + LINK_SIZE] & XCL_HASPROP) != 0 || if ((tcode[1 + LINK_SIZE] & XCL_HASPROP) != 0 ||
(tcode[1 + LINK_SIZE] & (XCL_MAP|XCL_NOT)) == XCL_NOT) (tcode[1 + LINK_SIZE] & (XCL_MAP|XCL_NOT)) == XCL_NOT)
return SSB_FAIL; return SSB_FAIL;
/* We have a positive XCLASS or a negative one without a map. Set up the /* We have a positive XCLASS or a negative one without a map. Set up the
map pointer if there is one, and fall through. */ map pointer if there is one, and fall through. */
classmap = ((tcode[1 + LINK_SIZE] & XCL_MAP) == 0)? NULL : classmap = ((tcode[1 + LINK_SIZE] & XCL_MAP) == 0)? NULL :
(uint8_t *)(tcode + 1 + LINK_SIZE + 1); (uint8_t *)(tcode + 1 + LINK_SIZE + 1);
#endif #endif
/* Enter here for a negative non-XCLASS. In the 8-bit library, if we are /* Enter here for a negative non-XCLASS. In the 8-bit library, if we are
in UTF mode, any byte with a value >= 0xc4 is a potentially valid starter in UTF mode, any byte with a value >= 0xc4 is a potentially valid starter
because it starts a character with a value > 255. In 8-bit non-UTF mode, because it starts a character with a value > 255. In 8-bit non-UTF mode,
there is no difference between CLASS and NCLASS. In all other wide there is no difference between CLASS and NCLASS. In all other wide
character modes, set the 0xFF bit to indicate code units >= 255. */ character modes, set the 0xFF bit to indicate code units >= 255. */
@ -1298,26 +1298,26 @@ do
#endif #endif
/* Fall through */ /* Fall through */
/* Enter here for a positive non-XCLASS. If we have fallen through from /* Enter here for a positive non-XCLASS. If we have fallen through from
an XCLASS, classmap will already be set; just advance the code pointer. an XCLASS, classmap will already be set; just advance the code pointer.
Otherwise, set up classmap for a a non-XCLASS and advance past it. */ Otherwise, set up classmap for a a non-XCLASS and advance past it. */
case OP_CLASS: case OP_CLASS:
if (*tcode == OP_XCLASS) tcode += GET(tcode, 1); else if (*tcode == OP_XCLASS) tcode += GET(tcode, 1); else
{ {
classmap = (uint8_t *)(++tcode); classmap = (uint8_t *)(++tcode);
tcode += 32 / sizeof(PCRE2_UCHAR); tcode += 32 / sizeof(PCRE2_UCHAR);
} }
/* When wide characters are supported, classmap may be NULL. In UTF-8 /* When wide characters are supported, classmap may be NULL. In UTF-8
(sic) mode, the bits in a class bit map correspond to character values, (sic) mode, the bits in a class bit map correspond to character values,
not to byte values. However, the bit map we are constructing is for byte not to byte values. However, the bit map we are constructing is for byte
values. So we have to do a conversion for characters whose code point is values. So we have to do a conversion for characters whose code point is
greater than 127. In fact, there are only two possible starting bytes for greater than 127. In fact, there are only two possible starting bytes for
characters in the range 128 - 255. */ characters in the range 128 - 255. */
if (classmap != NULL) if (classmap != NULL)
{ {
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
if (utf) if (utf)
{ {
@ -1334,11 +1334,11 @@ do
} }
else else
#endif #endif
/* In all modes except UTF-8, the two bit maps are compatible. */ /* In all modes except UTF-8, the two bit maps are compatible. */
{ {
for (c = 0; c < 32; c++) re->start_bitmap[c] |= classmap[c]; for (c = 0; c < 32; c++) re->start_bitmap[c] |= classmap[c];
} }
} }
/* Act on what follows the class. For a zero minimum repeat, continue; /* Act on what follows the class. For a zero minimum repeat, continue;
@ -1384,13 +1384,13 @@ return yield;
*************************************************/ *************************************************/
/* This function is handed a compiled expression that it must study to produce /* This function is handed a compiled expression that it must study to produce
information that will speed up the matching. information that will speed up the matching.
Argument: points to the compiled expression Argument: points to the compiled expression
Returns: 0 normally; non-zero should never normally occur Returns: 0 normally; non-zero should never normally occur
1 unknown opcode in set_start_bits 1 unknown opcode in set_start_bits
2 missing capturing bracket 2 missing capturing bracket
3 unknown opcode in find_minlength 3 unknown opcode in find_minlength
*/ */
int int
@ -1402,7 +1402,7 @@ BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
/* Find start of compiled code */ /* Find start of compiled code */
code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) + code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) +
re->name_entry_size * re->name_count; re->name_entry_size * re->name_count;
/* For an anchored pattern, or an unanchored pattern that has a first code /* For an anchored pattern, or an unanchored pattern that has a first code
@ -1422,17 +1422,17 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0 &&
switch(min = find_minlength(re, code, code, 0, utf)) switch(min = find_minlength(re, code, code, 0, utf))
{ {
case -1: /* \C in UTF mode or (*ACCEPT) was encountered */ case -1: /* \C in UTF mode or (*ACCEPT) was encountered */
break; break;
case -2: case -2:
return 2; /* missing capturing bracket */ return 2; /* missing capturing bracket */
case -3: case -3:
return 3; /* unrecognized opcode */ return 3; /* unrecognized opcode */
default: default:
re->minlength = min; re->minlength = min;
break; break;
} }
return 0; return 0;

View File

@ -81,7 +81,7 @@ for (entry = first; entry <= last; entry += entrysize)
{ {
uint16_t n = GET2(entry, 0); uint16_t n = GET2(entry, 0);
if (n < match_data->oveccount && match_data->ovector[n*2] != PCRE2_UNSET) if (n < match_data->oveccount && match_data->ovector[n*2] != PCRE2_UNSET)
return pcre2_substring_copy_bynumber(match_data, n, buffer, sizeptr); return pcre2_substring_copy_bynumber(match_data, n, buffer, sizeptr);
} }
return PCRE2_ERROR_NOSUBSTRING; return PCRE2_ERROR_NOSUBSTRING;
} }
@ -108,7 +108,7 @@ Returns: if successful: 0
*/ */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_copy_bynumber(pcre2_match_data *match_data, pcre2_substring_copy_bynumber(pcre2_match_data *match_data,
unsigned int stringnumber, PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr) unsigned int stringnumber, PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
{ {
PCRE2_SIZE left, right; PCRE2_SIZE left, right;
@ -119,7 +119,7 @@ if (stringnumber >= match_data->oveccount ||
(left = match_data->ovector[stringnumber*2]) == PCRE2_UNSET) (left = match_data->ovector[stringnumber*2]) == PCRE2_UNSET)
return PCRE2_ERROR_NOSUBSTRING; return PCRE2_ERROR_NOSUBSTRING;
right = match_data->ovector[stringnumber*2+1]; right = match_data->ovector[stringnumber*2+1];
if (right - left + 1 > *sizeptr) return PCRE2_ERROR_NOMEMORY; if (right - left + 1 > *sizeptr) return PCRE2_ERROR_NOMEMORY;
while (left < right) buffer[p++] = subject[left++]; while (left < right) buffer[p++] = subject[left++];
buffer[p] = 0; buffer[p] = 0;
*sizeptr = p; *sizeptr = p;
@ -140,7 +140,7 @@ Arguments:
match_data pointer to match_data match_data pointer to match_data
stringname the name of the required substring stringname the name of the required substring
stringptr where to put the pointer to the new memory stringptr where to put the pointer to the new memory
sizeptr where to put the length of the substring sizeptr where to put the length of the substring
Returns: if successful: zero Returns: if successful: zero
if not successful, a negative value: if not successful, a negative value:
@ -162,7 +162,7 @@ for (entry = first; entry <= last; entry += entrysize)
{ {
uint16_t n = GET2(entry, 0); uint16_t n = GET2(entry, 0);
if (n < match_data->oveccount && match_data->ovector[n*2] != PCRE2_UNSET) if (n < match_data->oveccount && match_data->ovector[n*2] != PCRE2_UNSET)
return pcre2_substring_get_bynumber(match_data, n, stringptr, sizeptr); return pcre2_substring_get_bynumber(match_data, n, stringptr, sizeptr);
} }
return PCRE2_ERROR_NOSUBSTRING; return PCRE2_ERROR_NOSUBSTRING;
} }
@ -180,7 +180,7 @@ Arguments:
match_data points to match data match_data points to match data
stringnumber the number of the required substring stringnumber the number of the required substring
stringptr where to put a pointer to the new memory stringptr where to put a pointer to the new memory
sizeptr where to put the size of the substring sizeptr where to put the size of the substring
Returns: if successful: zero Returns: if successful: zero
if not successful a negative error code: if not successful a negative error code:
@ -189,7 +189,7 @@ Returns: if successful: zero
*/ */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_get_bynumber(pcre2_match_data *match_data, pcre2_substring_get_bynumber(pcre2_match_data *match_data,
unsigned int stringnumber, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr) unsigned int stringnumber, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
{ {
PCRE2_SIZE left, right; PCRE2_SIZE left, right;
@ -204,8 +204,8 @@ if (stringnumber >= match_data->oveccount ||
return PCRE2_ERROR_NOSUBSTRING; return PCRE2_ERROR_NOSUBSTRING;
right = match_data->ovector[stringnumber*2+1]; right = match_data->ovector[stringnumber*2+1];
block = PRIV(memctl_malloc)(sizeof(pcre2_memctl) + block = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
(right-left+1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)match_data); (right-left+1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)match_data);
if (block == NULL) return PCRE2_ERROR_NOMEMORY; if (block == NULL) return PCRE2_ERROR_NOMEMORY;
yield = (PCRE2_UCHAR *)((char *)block + sizeof(pcre2_memctl)); yield = (PCRE2_UCHAR *)((char *)block + sizeof(pcre2_memctl));
@ -222,7 +222,7 @@ return 0;
* Free memory obtained by get_substring * * Free memory obtained by get_substring *
*************************************************/ *************************************************/
/* /*
Argument: the result of a previous pcre2_substring_get_byxxx() Argument: the result of a previous pcre2_substring_get_byxxx()
Returns: nothing Returns: nothing
*/ */
@ -246,7 +246,7 @@ permits duplicate names, the first substring that is set is chosen.
Arguments: Arguments:
match_data pointer to match data match_data pointer to match data
stringname the name of the required substring stringname the name of the required substring
sizeptr where to put the length sizeptr where to put the length
Returns: 0 if successful, else a negative error number Returns: 0 if successful, else a negative error number
*/ */
@ -265,7 +265,7 @@ for (entry = first; entry <= last; entry += entrysize)
{ {
uint16_t n = GET2(entry, 0); uint16_t n = GET2(entry, 0);
if (n < match_data->oveccount && match_data->ovector[n*2] != PCRE2_UNSET) if (n < match_data->oveccount && match_data->ovector[n*2] != PCRE2_UNSET)
return pcre2_substring_length_bynumber(match_data, n, sizeptr); return pcre2_substring_length_bynumber(match_data, n, sizeptr);
} }
return PCRE2_ERROR_NOSUBSTRING; return PCRE2_ERROR_NOSUBSTRING;
} }
@ -281,7 +281,7 @@ return PCRE2_ERROR_NOSUBSTRING;
Arguments: Arguments:
match_data pointer to match data match_data pointer to match data
stringnumber the number of the required substring stringnumber the number of the required substring
sizeptr where to put the length sizeptr where to put the length
Returns: 0 if successful, else a negative error number Returns: 0 if successful, else a negative error number
*/ */
@ -296,7 +296,7 @@ if (stringnumber >= match_data->oveccount ||
return PCRE2_ERROR_NOSUBSTRING; return PCRE2_ERROR_NOSUBSTRING;
*sizeptr = match_data->ovector[stringnumber*2 + 1] - *sizeptr = match_data->ovector[stringnumber*2 + 1] -
match_data->ovector[stringnumber*2]; match_data->ovector[stringnumber*2];
return 0; return 0;
} }
@ -307,23 +307,23 @@ return 0;
/* This function gets one chunk of memory and builds a list of pointers and all /* This function gets one chunk of memory and builds a list of pointers and all
the captured substrings in it. A NULL pointer is put on the end of the list. the captured substrings in it. A NULL pointer is put on the end of the list.
The substrings are zero-terminated, but also, if the final argument is The substrings are zero-terminated, but also, if the final argument is
non-NULL, a list of lengths is also returned. This allows binary data to be non-NULL, a list of lengths is also returned. This allows binary data to be
handled. handled.
Arguments: Arguments:
match_data points to the match data match_data points to the match data
listptr set to point to the list of pointers listptr set to point to the list of pointers
lengthsptr set to point to the list of lengths (may be NULL) lengthsptr set to point to the list of lengths (may be NULL)
Returns: if successful: 0 Returns: if successful: 0
if not successful, a negative error code: if not successful, a negative error code:
PCRE2_ERROR_NOMEMORY: failed to get memory, PCRE2_ERROR_NOMEMORY: failed to get memory,
or a match failure code or a match failure code
*/ */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_list_get(pcre2_match_data *match_data, PCRE2_UCHAR ***listptr, pcre2_substring_list_get(pcre2_match_data *match_data, PCRE2_UCHAR ***listptr,
PCRE2_SIZE **lengthsptr) PCRE2_SIZE **lengthsptr)
{ {
int i, count, count2; int i, count, count2;
@ -343,22 +343,22 @@ if (lengthsptr != NULL) size += sizeof(PCRE2_SIZE) * count; /* For lengths */
for (i = 0; i < count2; i += 2) for (i = 0; i < count2; i += 2)
size += sizeof(PCRE2_UCHAR *) + CU2BYTES(ovector[i+1] - ovector[i] + 1); size += sizeof(PCRE2_UCHAR *) + CU2BYTES(ovector[i+1] - ovector[i] + 1);
memp = PRIV(memctl_malloc)(size, (pcre2_memctl *)match_data); memp = PRIV(memctl_malloc)(size, (pcre2_memctl *)match_data);
if (memp == NULL) return PCRE2_ERROR_NOMEMORY; if (memp == NULL) return PCRE2_ERROR_NOMEMORY;
*listptr = listp = (PCRE2_UCHAR **)((char *)memp + sizeof(pcre2_memctl)); *listptr = listp = (PCRE2_UCHAR **)((char *)memp + sizeof(pcre2_memctl));
lensp = (PCRE2_SIZE *)((char *)listp + sizeof(PCRE2_UCHAR *) * (count + 1)); lensp = (PCRE2_SIZE *)((char *)listp + sizeof(PCRE2_UCHAR *) * (count + 1));
if (lengthsptr == NULL) if (lengthsptr == NULL)
{ {
sp = (PCRE2_UCHAR *)lensp; sp = (PCRE2_UCHAR *)lensp;
lensp = NULL; lensp = NULL;
} }
else else
{ {
*lengthsptr = lensp; *lengthsptr = lensp;
sp = (PCRE2_UCHAR *)((char *)lensp + sizeof(PCRE2_SIZE) * count); sp = (PCRE2_UCHAR *)((char *)lensp + sizeof(PCRE2_SIZE) * count);
} }
for (i = 0; i < count2; i += 2) for (i = 0; i < count2; i += 2)
{ {
@ -398,9 +398,9 @@ memctl->free(memctl, memctl->memory_data);
* Find (multiple) entries for named string * * Find (multiple) entries for named string *
*************************************************/ *************************************************/
/* This function scans the nametable for a given name, using binary chop. It /* This function scans the nametable for a given name, using binary chop. It
returns either two pointers to the entries in the table, or, if no pointers are returns either two pointers to the entries in the table, or, if no pointers are
given, the number of a group with the given name. If duplicate names are given, the number of a group with the given name. If duplicate names are
permitted, this may not be unique. permitted, this may not be unique.
Arguments: Arguments:
@ -428,11 +428,11 @@ while (top > bot)
uint16_t mid = (top + bot) / 2; uint16_t mid = (top + bot) / 2;
PCRE2_SPTR entry = nametable + entrysize*mid; PCRE2_SPTR entry = nametable + entrysize*mid;
int c = PRIV(strcmp)(stringname, entry + IMM2_SIZE); int c = PRIV(strcmp)(stringname, entry + IMM2_SIZE);
if (c == 0) if (c == 0)
{ {
PCRE2_SPTR first; PCRE2_SPTR first;
PCRE2_SPTR last; PCRE2_SPTR last;
PCRE2_SPTR lastentry; PCRE2_SPTR lastentry;
if (firstptr == NULL) return GET2(entry, 0); if (firstptr == NULL) return GET2(entry, 0);
lastentry = nametable + entrysize * (code->name_count - 1); lastentry = nametable + entrysize * (code->name_count - 1);
first = last = entry; first = last = entry;
@ -447,7 +447,7 @@ while (top > bot)
last += entrysize; last += entrysize;
} }
*firstptr = first; *firstptr = first;
*lastptr = last; *lastptr = last;
return entrysize; return entrysize;
} }
if (c > 0) bot = mid + 1; else top = mid; if (c > 0) bot = mid + 1; else top = mid;
@ -462,7 +462,7 @@ return PCRE2_ERROR_NOSUBSTRING;
*************************************************/ *************************************************/
/* This function is a convenience wrapper for pcre2_substring_nametable_scan() /* This function is a convenience wrapper for pcre2_substring_nametable_scan()
when it is known that names are unique. If there are duplicate names, it is not when it is known that names are unique. If there are duplicate names, it is not
defined which number is returned. defined which number is returned.
Arguments: Arguments:
@ -474,7 +474,7 @@ Returns: the number of the named parenthesis, or a negative number
*/ */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_number_from_name(const pcre2_code *code, pcre2_substring_number_from_name(const pcre2_code *code,
PCRE2_SPTR stringname) PCRE2_SPTR stringname)
{ {
return pcre2_substring_nametable_scan(code, stringname, NULL, NULL); return pcre2_substring_nametable_scan(code, stringname, NULL, NULL);

View File

@ -232,7 +232,7 @@ enum {
ucp_Takri, ucp_Takri,
/* New for Unicode 7.0.0: */ /* New for Unicode 7.0.0: */
ucp_Bassa_Vah, ucp_Bassa_Vah,
ucp_Caucasian_Albanian, ucp_Caucasian_Albanian,
ucp_Duployan, ucp_Duployan,
ucp_Elbasan, ucp_Elbasan,
ucp_Grantha, ucp_Grantha,

View File

@ -154,11 +154,11 @@ for (p = string; length-- > 0; p++)
*erroroffset = (int)(p - string); *erroroffset = (int)(p - string);
switch(ab - length) switch(ab - length)
{ {
case 1: return PCRE2_ERROR_UTF8_ERR1; case 1: return PCRE2_ERROR_UTF8_ERR1;
case 2: return PCRE2_ERROR_UTF8_ERR2; case 2: return PCRE2_ERROR_UTF8_ERR2;
case 3: return PCRE2_ERROR_UTF8_ERR3; case 3: return PCRE2_ERROR_UTF8_ERR3;
case 4: return PCRE2_ERROR_UTF8_ERR4; case 4: return PCRE2_ERROR_UTF8_ERR4;
case 5: return PCRE2_ERROR_UTF8_ERR5; case 5: return PCRE2_ERROR_UTF8_ERR5;
} }
} }
length -= ab; /* Length remaining */ length -= ab; /* Length remaining */
@ -314,7 +314,7 @@ return 0;
/* ----------------- Check a UTF-16 string ----------------- */ /* ----------------- Check a UTF-16 string ----------------- */
#elif PCRE2_CODE_UNIT_WIDTH == 16 #elif PCRE2_CODE_UNIT_WIDTH == 16
/* There's not so much work, nor so many errors, for UTF-16. /* There's not so much work, nor so many errors, for UTF-16.
PCRE2_ERROR_UTF16_ERR1 Missing low surrogate at the end of the string PCRE2_ERROR_UTF16_ERR1 Missing low surrogate at the end of the string

View File

@ -60,7 +60,7 @@ might contain codepoints above 255 and/or Unicode properties.
Arguments: Arguments:
c the character c the character
data points to the flag code unit of the XCLASS data data points to the flag code unit of the XCLASS data
utf TRUE if in UTF mode utf TRUE if in UTF mode
Returns: TRUE if character matches, else FALSE Returns: TRUE if character matches, else FALSE
*/ */
@ -261,7 +261,7 @@ while ((t = *data++) != XCL_END)
data += 2; data += 2;
} }
#else #else
(void)utf; /* Avoid compiler warning */ (void)utf; /* Avoid compiler warning */
#endif /* SUPPORT_UNICODE */ #endif /* SUPPORT_UNICODE */
} }

View File

@ -8,7 +8,7 @@ pcre2sample documentation for a short discussion ("man pcre2sample" if you have
the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is
incompatible with the original PCRE API. incompatible with the original PCRE API.
There are actually three libraries, each supporting a different code unit There are actually three libraries, each supporting a different code unit
width. This demonstration program uses the 8-bit library. width. This demonstration program uses the 8-bit library.
In Unix-like environments, if PCRE2 is installed in your standard system In Unix-like environments, if PCRE2 is installed in your standard system
@ -39,8 +39,8 @@ the following line. */
/* #define PCRE2_STATIC */ /* #define PCRE2_STATIC */
/* This macro must be defined before including pcre2.h. For a program that uses /* This macro must be defined before including pcre2.h. For a program that uses
only one code unit width, it makes it possible to use generic function names only one code unit width, it makes it possible to use generic function names
such as pcre2_compile(). */ such as pcre2_compile(). */
#define PCRE2_CODE_UNIT_WIDTH 8 #define PCRE2_CODE_UNIT_WIDTH 8
@ -124,7 +124,7 @@ subject_length = strlen((char *)subject);
re = pcre2_compile( re = pcre2_compile(
pattern, /* the pattern */ pattern, /* the pattern */
PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */ PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
0, /* default options */ 0, /* default options */
&errornumber, /* for error number */ &errornumber, /* for error number */
&erroroffset, /* for error offset */ &erroroffset, /* for error offset */
@ -134,9 +134,9 @@ re = pcre2_compile(
if (re == NULL) if (re == NULL)
{ {
PCRE2_UCHAR buffer[256]; PCRE2_UCHAR buffer[256];
pcre2_get_error_message(errornumber, buffer, sizeof(buffer)); pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset, printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset,
buffer); buffer);
return 1; return 1;
} }
@ -180,7 +180,7 @@ if (rc < 0)
return 1; return 1;
} }
/* Match succeded. Get a pointer to the output vector, where string offsets are /* Match succeded. Get a pointer to the output vector, where string offsets are
stored. */ stored. */
ovector = pcre2_get_ovector_pointer(match_data); ovector = pcre2_get_ovector_pointer(match_data);
@ -193,7 +193,7 @@ printf("\nMatch succeeded at offset %d\n", (int)ovector[0]);
* captured. * * captured. *
*************************************************************************/ *************************************************************************/
/* The output vector wasn't big enough. This should not happen, because we used /* The output vector wasn't big enough. This should not happen, because we used
pcre2_match_data_create_from_pattern() above. */ pcre2_match_data_create_from_pattern() above. */
if (rc == 0) if (rc == 0)
@ -244,7 +244,7 @@ if (namecount <= 0) printf("No named substrings\n"); else
&name_entry_size); /* where to put the answer */ &name_entry_size); /* where to put the answer */
/* Now we can scan the table and, for each entry, print the number, the name, /* Now we can scan the table and, for each entry, print the number, the name,
and the substring itself. In the 8-bit library the number is held in two and the substring itself. In the 8-bit library the number is held in two
bytes, most significant first. */ bytes, most significant first. */
tabptr = name_table; tabptr = name_table;
@ -289,7 +289,7 @@ if (namecount <= 0) printf("No named substrings\n"); else
if (!find_all) /* Check for -g */ if (!find_all) /* Check for -g */
{ {
pcre2_match_data_free(match_data); /* Release the memory that was used */ pcre2_match_data_free(match_data); /* Release the memory that was used */
pcre2_code_free(re); /* for the match data and the pattern. */ pcre2_code_free(re); /* for the match data and the pattern. */
return 0; /* Exit the program. */ return 0; /* Exit the program. */
} }
@ -307,7 +307,7 @@ sequence. */
(void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &newline); (void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &newline);
crlf_is_newline = newline == PCRE2_NEWLINE_ANY || crlf_is_newline = newline == PCRE2_NEWLINE_ANY ||
newline == PCRE2_NEWLINE_CRLF || newline == PCRE2_NEWLINE_CRLF ||
newline == PCRE2_NEWLINE_ANYCRLF; newline == PCRE2_NEWLINE_ANYCRLF;
/* Loop for second and subsequent matches */ /* Loop for second and subsequent matches */

View File

@ -450,7 +450,7 @@ pcre2grep_exit(int rc)
if (resource_error) if (resource_error)
{ {
fprintf(stderr, "pcre2grep: Error %d, %d or %d means that a resource limit " fprintf(stderr, "pcre2grep: Error %d, %d or %d means that a resource limit "
"was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT, "was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
PCRE2_ERROR_RECURSIONLIMIT); PCRE2_ERROR_RECURSIONLIMIT);
fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n"); fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n");
} }
@ -485,7 +485,7 @@ if (strlen(s) > MAXPATLEN)
{ {
fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n", fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n",
MAXPATLEN); MAXPATLEN);
free(p); free(p);
return NULL; return NULL;
} }
p->next = NULL; p->next = NULL;
@ -2381,7 +2381,7 @@ switch(letter)
unsigned char buffer[128]; unsigned char buffer[128];
(void)pcre2_config(PCRE2_CONFIG_VERSION, buffer); (void)pcre2_config(PCRE2_CONFIG_VERSION, buffer);
fprintf(stdout, "pcre2grep version %s\n", buffer); fprintf(stdout, "pcre2grep version %s\n", buffer);
} }
pcre2grep_exit(0); pcre2grep_exit(0);
break; break;
@ -2472,7 +2472,7 @@ if ((popts & PO_FIXED_STRINGS) != 0)
} }
sprintf((char *)buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]); sprintf((char *)buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
p->compiled = pcre2_compile(buffer, -1, options, &errcode, &erroffset, p->compiled = pcre2_compile(buffer, -1, options, &errcode, &erroffset,
compile_context); compile_context);
if (p->compiled != NULL) return TRUE; if (p->compiled != NULL) return TRUE;
@ -2555,11 +2555,11 @@ while (fgets(buffer, PATBUFSIZE, f) != NULL)
afterwards, as a precaution against any later code trying to use it. */ afterwards, as a precaution against any later code trying to use it. */
*patlastptr = add_pattern(buffer, *patlastptr); *patlastptr = add_pattern(buffer, *patlastptr);
if (*patlastptr == NULL) if (*patlastptr == NULL)
{ {
if (f != stdin) fclose(f); if (f != stdin) fclose(f);
return FALSE; return FALSE;
} }
if (*patptr == NULL) *patptr = *patlastptr; if (*patptr == NULL) *patptr = *patlastptr;
/* This loop is needed because compiling a "pattern" when -F is set may add /* This loop is needed because compiling a "pattern" when -F is set may add
@ -2571,10 +2571,10 @@ while (fgets(buffer, PATBUFSIZE, f) != NULL)
{ {
if (!compile_pattern(*patlastptr, pcre2_options, popts, TRUE, filename, if (!compile_pattern(*patlastptr, pcre2_options, popts, TRUE, filename,
linenumber)) linenumber))
{ {
if (f != stdin) fclose(f); if (f != stdin) fclose(f);
return FALSE; return FALSE;
} }
(*patlastptr)->string = NULL; /* Insurance */ (*patlastptr)->string = NULL; /* Insurance */
if ((*patlastptr)->next == NULL) break; if ((*patlastptr)->next == NULL) break;
*patlastptr = (*patlastptr)->next; *patlastptr = (*patlastptr)->next;
@ -2622,7 +2622,7 @@ for (i = 1; i < argc; i++)
char *option_data = (char *)""; /* default to keep compiler happy */ char *option_data = (char *)""; /* default to keep compiler happy */
BOOL longop; BOOL longop;
BOOL longopwasequals = FALSE; BOOL longopwasequals = FALSE;
if (argv[i][0] != '-') break; if (argv[i][0] != '-') break;
/* If we hit an argument that is just "-", it may be a reference to STDIN, /* If we hit an argument that is just "-", it may be a reference to STDIN,
@ -2925,7 +2925,7 @@ for (i = 1; i < argc; i++)
else *((int *)op->dataptr) = n; else *((int *)op->dataptr) = n;
} }
} }
/* Options have been decoded. If -C was used, its value is used as a default /* Options have been decoded. If -C was used, its value is used as a default
for -A and -B. */ for -A and -B. */
@ -2946,15 +2946,15 @@ if ((only_matching != NULL && (file_offsets || line_offsets)) ||
"and/or --line-offsets\n"); "and/or --line-offsets\n");
pcre2grep_exit(usage(2)); pcre2grep_exit(usage(2));
} }
/* Put limits into the match data block. */ /* Put limits into the match data block. */
if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit); if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
if (recursion_limit > 0) pcre2_set_recursion_limit(match_context, recursion_limit); if (recursion_limit > 0) pcre2_set_recursion_limit(match_context, recursion_limit);
if (only_matching != NULL || file_offsets || line_offsets) if (only_matching != NULL || file_offsets || line_offsets)
show_only_matching = TRUE; show_only_matching = TRUE;
/* If a locale has not been provided as an option, see if the LC_CTYPE or /* If a locale has not been provided as an option, see if the LC_CTYPE or
LC_ALL environment variable is set, and if so, use it. */ LC_ALL environment variable is set, and if so, use it. */
@ -2980,7 +2980,7 @@ if (locale != NULL)
locale, locale_from); locale, locale_from);
goto EXIT2; goto EXIT2;
} }
pcre2_set_character_tables(compile_context, pcre2_maketables(NULL)); pcre2_set_character_tables(compile_context, pcre2_maketables(NULL));
} }
/* Sort out colouring */ /* Sort out colouring */
@ -3007,27 +3007,27 @@ if (colour_option != NULL && strcmp(colour_option, "never") != 0)
if (newline_arg != NULL) if (newline_arg != NULL)
{ {
for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *)); for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *));
endlinetype++) endlinetype++)
{ {
if (strcmpic(newline_arg, newlines[endlinetype]) == 0) break; if (strcmpic(newline_arg, newlines[endlinetype]) == 0) break;
} }
if (endlinetype < (int)(sizeof(newlines)/sizeof(char *))) if (endlinetype < (int)(sizeof(newlines)/sizeof(char *)))
pcre2_set_newline(compile_context, endlinetype); pcre2_set_newline(compile_context, endlinetype);
else else
{ {
fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n", fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n",
newline_arg); newline_arg);
goto EXIT2; goto EXIT2;
} }
} }
/* Find default newline convention */ /* Find default newline convention */
else else
{ {
(void)pcre2_config(PCRE2_CONFIG_NEWLINE, &endlinetype); (void)pcre2_config(PCRE2_CONFIG_NEWLINE, &endlinetype);
} }
/* Interpret the text values for -d and -D */ /* Interpret the text values for -d and -D */

View File

@ -68,7 +68,7 @@ already set. */
#include "pcre2_internal.h" #include "pcre2_internal.h"
#include "pcre2posix.h" #include "pcre2posix.h"
/* Table to translate PCRE2 compile time error codes into POSIX error codes. /* Table to translate PCRE2 compile time error codes into POSIX error codes.
Only a few PCRE2 errors with a value greater than 23 turn into special POSIX Only a few PCRE2 errors with a value greater than 23 turn into special POSIX
codes: most go to REG_BADPAT. The second table lists, in pairs, those that codes: most go to REG_BADPAT. The second table lists, in pairs, those that
don't. */ don't. */
@ -89,7 +89,7 @@ static const int eint1[] = {
REG_ASSERT, /* internal error: unexpected repeat */ REG_ASSERT, /* internal error: unexpected repeat */
REG_BADPAT, /* unrecognized character after (? or (?- */ REG_BADPAT, /* unrecognized character after (? or (?- */
REG_BADPAT, /* POSIX named classes are supported only within a class */ REG_BADPAT, /* POSIX named classes are supported only within a class */
REG_BADPAT, /* POSIX collating elements are not supported */ REG_BADPAT, /* POSIX collating elements are not supported */
REG_EPAREN, /* missing ) */ REG_EPAREN, /* missing ) */
/* 15 */ /* 15 */
REG_ESUBREG, /* reference to non-existent subpattern */ REG_ESUBREG, /* reference to non-existent subpattern */
@ -103,7 +103,7 @@ static const int eint1[] = {
REG_EPAREN, /* unmatched closing parenthesis */ REG_EPAREN, /* unmatched closing parenthesis */
REG_ASSERT /* internal error: code overflow */ REG_ASSERT /* internal error: code overflow */
}; };
static const int eint2[] = { static const int eint2[] = {
30, REG_ECTYPE, /* unknown POSIX class name */ 30, REG_ECTYPE, /* unknown POSIX class name */
32, REG_INVARG, /* this version of PCRE does not have UTF or UCP support */ 32, REG_INVARG, /* this version of PCRE does not have UTF or UCP support */
@ -216,14 +216,14 @@ if ((cflags & REG_UTF) != 0) options |= PCRE2_UTF;
if ((cflags & REG_UCP) != 0) options |= PCRE2_UCP; if ((cflags & REG_UCP) != 0) options |= PCRE2_UCP;
if ((cflags & REG_UNGREEDY) != 0) options |= PCRE2_UNGREEDY; if ((cflags & REG_UNGREEDY) != 0) options |= PCRE2_UNGREEDY;
preg->re_pcre2_code = pcre2_compile((PCRE2_SPTR)pattern, -1, options, preg->re_pcre2_code = pcre2_compile((PCRE2_SPTR)pattern, -1, options,
&errorcode, &erroffset, NULL); &errorcode, &erroffset, NULL);
preg->re_erroffset = erroffset; preg->re_erroffset = erroffset;
if (preg->re_pcre2_code == NULL) if (preg->re_pcre2_code == NULL)
{ {
unsigned int i; unsigned int i;
if (errorcode < 0) return REG_BADPAT; /* UTF error */ if (errorcode < 0) return REG_BADPAT; /* UTF error */
errorcode -= COMPILE_ERROR_BASE; errorcode -= COMPILE_ERROR_BASE;
if (errorcode < (int)(sizeof(eint1)/sizeof(const int))) if (errorcode < (int)(sizeof(eint1)/sizeof(const int)))
return eint1[errorcode]; return eint1[errorcode];
@ -232,7 +232,7 @@ if (preg->re_pcre2_code == NULL)
return REG_BADPAT; return REG_BADPAT;
} }
(void)pcre2_pattern_info((const pcre2_code *)preg->re_pcre2_code, (void)pcre2_pattern_info((const pcre2_code *)preg->re_pcre2_code,
PCRE2_INFO_CAPTURECOUNT, &re_nsub); PCRE2_INFO_CAPTURECOUNT, &re_nsub);
preg->re_nsub = (size_t)re_nsub; preg->re_nsub = (size_t)re_nsub;
if ((options & PCRE2_NO_AUTO_CAPTURE) != 0) re_nsub = -1; if ((options & PCRE2_NO_AUTO_CAPTURE) != 0) re_nsub = -1;
@ -288,7 +288,7 @@ else
eo = (int)strlen(string); eo = (int)strlen(string);
} }
rc = pcre2_match((const pcre2_code *)preg->re_pcre2_code, rc = pcre2_match((const pcre2_code *)preg->re_pcre2_code,
(PCRE2_SPTR)string + so, (eo - so), 0, options, md, NULL); (PCRE2_SPTR)string + so, (eo - so), 0, options, md, NULL);
/* Successful match */ /* Successful match */

View File

@ -95,7 +95,7 @@ enum {
typedef struct { typedef struct {
void *re_pcre2_code; void *re_pcre2_code;
void *re_match_data; void *re_match_data;
size_t re_nsub; size_t re_nsub;
size_t re_erroffset; size_t re_erroffset;
} regex_t; } regex_t;

View File

@ -4797,9 +4797,9 @@ for (gmatched = 0;; gmatched++)
PCRE2_SIZE length; PCRE2_SIZE length;
uint32_t copybuffer[256]; uint32_t copybuffer[256];
int namelen = strlen((const char *)nptr); int namelen = strlen((const char *)nptr);
#if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
PCRE2_SIZE cnl = namelen; PCRE2_SIZE cnl = namelen;
#endif #endif
if (namelen == 0) break; if (namelen == 0) break;
#ifdef SUPPORT_PCRE2_8 #ifdef SUPPORT_PCRE2_8
@ -4864,9 +4864,9 @@ for (gmatched = 0;; gmatched++)
void *gotbuffer; void *gotbuffer;
int rc; int rc;
int namelen = strlen((const char *)nptr); int namelen = strlen((const char *)nptr);
#if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32 #if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
PCRE2_SIZE cnl = namelen; PCRE2_SIZE cnl = namelen;
#endif #endif
if (namelen == 0) break; if (namelen == 0) break;
#ifdef SUPPORT_PCRE2_8 #ifdef SUPPORT_PCRE2_8
@ -5389,25 +5389,25 @@ if (PO(options) != DO(options) || PO(control) != DO(control))
return 1; return 1;
} }
/* Get the PCRE2 and Unicode version number and JIT target information, at the /* Get the PCRE2 and Unicode version number and JIT target information, at the
same time checking that a request for the length gives the same answer. Also same time checking that a request for the length gives the same answer. Also
check lengths for non-string items. */ check lengths for non-string items. */
if (PCRE2_CONFIG(PCRE2_CONFIG_VERSION, NULL) != if (PCRE2_CONFIG(PCRE2_CONFIG_VERSION, NULL) !=
PCRE2_CONFIG(PCRE2_CONFIG_VERSION, version) || PCRE2_CONFIG(PCRE2_CONFIG_VERSION, version) ||
PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, NULL) != PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, NULL) !=
PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, uversion) || PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, uversion) ||
PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, NULL) != PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, NULL) !=
PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, jittarget) || PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, jittarget) ||
PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, NULL) != sizeof(int) || PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, NULL) != sizeof(int) ||
PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, NULL) != sizeof(long int)) PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, NULL) != sizeof(long int))
{ {
fprintf(stderr, "** Error in pcre2_config(): bad length\n"); fprintf(stderr, "** Error in pcre2_config(): bad length\n");
return 1; return 1;
} }
/* Get buffers from malloc() so that valgrind will check their misuse when /* Get buffers from malloc() so that valgrind will check their misuse when
debugging. They grow automatically when very long lines are read. The 16- debugging. They grow automatically when very long lines are read. The 16-