Tidy a lot of files (remove trailing spaces)

This commit is contained in:
Philip.Hazel 2014-10-20 17:28:49 +00:00
parent 4352f00bb9
commit c3799e750f
64 changed files with 1100 additions and 1113 deletions

View File

@ -382,21 +382,21 @@ SET(PCRE2_SOURCES
${PROJECT_BINARY_DIR}/pcre2_chartables.c
src/pcre2_compile.c
src/pcre2_config.c
src/pcre2_context.c
src/pcre2_context.c
src/pcre2_dfa_match.c
src/pcre2_error.c
src/pcre2_error.c
src/pcre2_jit_compile.c
src/pcre2_jit_match.c
src/pcre2_jit_misc.c
src/pcre2_maketables.c
src/pcre2_match.c
src/pcre2_match_data.c
src/pcre2_match_data.c
src/pcre2_newline.c
src/pcre2_ord2utf.c
src/pcre2_pattern_info.c
src/pcre2_pattern_info.c
src/pcre2_string_utils.c
src/pcre2_study.c
src/pcre2_substring.c
src/pcre2_substring.c
src/pcre2_tables.c
src/pcre2_ucd.c
src/pcre2_valid_utf.c
@ -462,11 +462,11 @@ SET(targets)
IF(PCRE2_BUILD_PCRE2_8)
ADD_LIBRARY(pcre2-8 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
SET_PROPERTY(TARGET pcre2-8
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
SET(targets ${targets} pcre2-8)
ADD_LIBRARY(pcre2posix ${PCRE2POSIX_HEADERS} ${PCRE2POSIX_SOURCES})
SET_PROPERTY(TARGET pcre2posix
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
SET(targets ${targets} pcre2posix)
TARGET_LINK_LIBRARIES(pcre2posix pcre2-8)
@ -503,7 +503,7 @@ ENDIF(PCRE2_BUILD_PCRE2_16)
IF(PCRE2_BUILD_PCRE2_32)
ADD_LIBRARY(pcre2-32 ${PCRE2_HEADERS} ${PCRE2_SOURCES} ${PROJECT_BINARY_DIR}/config.h)
SET_PROPERTY(TARGET pcre2-32
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32)
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=32)
SET(targets ${targets} pcre2-32)
IF(MINGW AND NOT PCRE2_STATIC)
@ -521,7 +521,7 @@ ENDIF(PCRE2_BUILD_PCRE2_32)
IF(PCRE2_BUILD_PCRE2GREP)
ADD_EXECUTABLE(pcre2grep src/pcre2grep.c)
SET_PROPERTY(TARGET pcre2grep
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
PROPERTY COMPILE_DEFINITIONS PCRE2_CODE_UNIT_WIDTH=8)
SET(targets ${targets} pcre2grep)
TARGET_LINK_LIBRARIES(pcre2grep pcre2posix ${PCRE2GREP_LIBS})
ENDIF(PCRE2_BUILD_PCRE2GREP)

View File

@ -5,41 +5,41 @@ Version 10.0 xx-xxxx-2014
-------------------------
Version 10.0 is the first release of PCRE2, a revised API for the PCRE library.
Changes prior to 10.0 are logged in the ChangeLog file for the old API, up to
Changes prior to 10.0 are logged in the ChangeLog file for the old API, up to
item 20 for release 8.36.
The code of the library was heavily revised as part of the new API
implementation. Details of each and every modification were not individually
logged. In addition to the API changes, the following changes were made. They
are either new functionality, or bug fixes and other noticeable changes of
The code of the library was heavily revised as part of the new API
implementation. Details of each and every modification were not individually
logged. In addition to the API changes, the following changes were made. They
are either new functionality, or bug fixes and other noticeable changes of
behaviour that were implemented after the code had been forked.
1. The test program, now called pcre2test, was re-specified and almost
1. The test program, now called pcre2test, was re-specified and almost
completely re-written. Its input is not compatible with input for pcretest.
2. Patterns may start with (*NOTEMPTY) or (*NOTEMPTY_ATSTART) to set the
PCRE2_NOTEMPTY or PCRE2_NOTEMPTY_ATSTART options for every subject line that is
PCRE2_NOTEMPTY or PCRE2_NOTEMPTY_ATSTART options for every subject line that is
matched by that pattern.
3. For the benefit of those who use PCRE2 via some other application, that is,
not writing the function calls themselves, it is possible to check the PCRE2
version by matching a pattern such as /(?(VERSION>=10.0)yes|no)/ against a
3. For the benefit of those who use PCRE2 via some other application, that is,
not writing the function calls themselves, it is possible to check the PCRE2
version by matching a pattern such as /(?(VERSION>=10.0)yes|no)/ against a
string such as "yesno".
4. There are case-equivalent Unicode characters whose encodings use different
numbers of code units in UTF-8. U+023A and U+2C65 are one example. (It is
theoretically possible for this to happen in UTF-16 too.) If a backreference to
a group containing one of these characters was greedily repeated, and during
4. There are case-equivalent Unicode characters whose encodings use different
numbers of code units in UTF-8. U+023A and U+2C65 are one example. (It is
theoretically possible for this to happen in UTF-16 too.) If a backreference to
a group containing one of these characters was greedily repeated, and during
the match a backtrack occurred, the subject might be backtracked by the wrong
number of code units. For example, if /^(\x{23a})\1*(.)/ is matched caselessly
(and in UTF-8 mode) against "\x{23a}\x{2c65}\x{2c65}\x{2c65}", group 2 should
number of code units. For example, if /^(\x{23a})\1*(.)/ is matched caselessly
(and in UTF-8 mode) against "\x{23a}\x{2c65}\x{2c65}\x{2c65}", group 2 should
capture the final character, which is the three bytes E2, B1, and A5 in UTF-8.
Incorrect backtracking meant that group 2 captured only the last two bytes.
This bug has been fixed; the new code is slower, but it is used only when the
Incorrect backtracking meant that group 2 captured only the last two bytes.
This bug has been fixed; the new code is slower, but it is used only when the
strings matched by the repetition are not all the same length.
5. A pattern such as /()a/ was not setting the "first character must be 'a'"
information. This applied to any pattern with a group that matched no
information. This applied to any pattern with a group that matched no
characters, for example: /(?:(?=.)|(?<!x))a/.
****

2
NEWS
View File

@ -5,7 +5,7 @@ Version 10.0 xx-xxxx-2014
-------------------------
Version 10.0 is the first release of PCRE2, a revised API for the PCRE library.
Changes prior to 10.0 are logged in the ChangeLog file for the old API, up to
Changes prior to 10.0 are logged in the ChangeLog file for the old API, up to
item 20 for release 8.36.
****

View File

@ -1,9 +1,9 @@
Building PCRE2 without using autotools
--------------------------------------
This document has been converted from the PCRE1 document, but is not yet
complete. I have removed a number of quite old sections about building in
various environments, as they applied only to PCRE1 and are probably out of
This document has been converted from the PCRE1 document, but is not yet
complete. I have removed a number of quite old sections about building in
various environments, as they applied only to PCRE1 and are probably out of
date.
@ -57,7 +57,7 @@ can skip ahead to the CMake section.
environment. In particular, you can alter the definition of the NEWLINE
macro to specify what character(s) you want to be interpreted as line
terminators.
When you compile any of the PCRE2 modules, you must specify
-DHAVE_CONFIG_H to your compiler so that src/config.h is included in the
sources.
@ -100,7 +100,7 @@ can skip ahead to the CMake section.
pcre2_chartables.c
pcre2_compile.c
pcre2_config.c
pcre2_context.c
pcre2_context.c
pcre2_dfa_match.c
pcre2_error.c
pcre2_jit_compile.c
@ -114,7 +114,7 @@ can skip ahead to the CMake section.
pcre2_pattern_info.c
pcre2_string_utils.c
pcre2_study.c
pcre2_substring.c
pcre2_substring.c
pcre2_tables.c
pcre2_ucd.c
pcre2_valid_utf.c
@ -138,8 +138,8 @@ can skip ahead to the CMake section.
(6) If you want to build a 16-bit library or 32-bit library (as well as, or
instead of the 8-bit library) just supply 16 or 32 as the value of
-DPCRE2_CODE_UNIT_WIDTH when you are compiling.
-DPCRE2_CODE_UNIT_WIDTH when you are compiling.
(7) If you want to build the POSIX wrapper functions (which apply only to the
8-bit library), ensure that you have the pcre2posix.h file and then
compile pcre2posix.c. Link the result (on its own) as the pcre2posix
@ -295,7 +295,7 @@ Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no
spaces in the names for your CMake installation and your PCRE2 source and build
directories.
The following instructions were contributed by a PCRE1 user, but they should
The following instructions were contributed by a PCRE1 user, but they should
also work for PCRE2. If they are not followed exactly, errors may occur. In the
event that errors do occur, it is recommended that you delete the CMake cache
before attempting to repeat the CMake build process. In the CMake GUI, the
@ -394,9 +394,9 @@ required. For details, please see this web site:
There is also a mirror here:
http://www.vsoft-software.com/downloads.html
The site currently has ports for PCRE1 releases, but PCRE2 should follow in due
course.
The site currently has ports for PCRE1 releases, but PCRE2 should follow in due
course.
==========================
Last Updated: 28 September 2014

View File

@ -27,7 +27,7 @@
# README & NON-AUTOTOOLS-BUILD
# These files are copied into the doc/html directory, with .txt
# extensions so that they can by hyperlinked from the HTML
# extensions so that they can by hyperlinked from the HTML
# documentation, because some people just go to the HTML without
# looking for text files.
@ -71,7 +71,7 @@ for file in pcre2 pcre2api pcre2build pcre2callout pcre2compat pcre2jit \
# pcre2syntax \
# pcre2precompile pcre2perform pcre2posix pcre2sample \
# pcre2stack ; do
echo " Processing $file.3"
nroff -c -man $file.3 >$file.rawtxt
perl ../CleanTxt <$file.rawtxt >>pcre2.txt
@ -168,17 +168,13 @@ cd ..
echo Documentation done
if [ "$1" = "doc" ] ; then exit; fi
# FIXME pro tem only do docs
exit
# These files are detrailed; do not detrail the test data because there may be
# significant trailing spaces. Do not detrail RunTest.bat, because it has CRLF
# line endings and the detrail script removes all trailing white space. The
# configure files are also omitted from the detrailing.
# configure files are also omitted from the detrailing.
files="\
Makefile.am \
Makefile.in \
configure.ac \
README \
LICENCE \
@ -195,54 +191,45 @@ files="\
RunGrepTest \
RunTest \
pcre2-config.in \
libpcre.pc.in \
libpcre16.pc.in \
libpcre32.pc.in \
libpcreposix.pc.in \
libpcrecpp.pc.in \
config.h.in \
pcre2_chartables.c.dist \
pcre2demo.c \
pcre2grep.c \
pcre2test.c \
dftables.c \
pcre2posix.c \
pcre2posix.h \
pcre2.h.in \
pcre2_internal.h \
pcre2_byte_order.c \
pcre2_compile.c \
pcre2_config.c \
pcre2_dfa_exec.c \
pcre2_exec.c \
pcre2_fullinfo.c \
pcre2_get.c \
pcre2_globals.c \
pcre2_jit_compile.c \
pcre2_jit_test.c \
pcre2_maketables.c \
pcre2_newline.c \
pcre2_ord2utf8.c \
pcre16_ord2utf16.c \
pcre32_ord2utf32.c \
pcre2_printint.c \
pcre2_refcount.c \
pcre2_string_utils.c \
pcre2_study.c \
pcre2_tables.c \
pcre2_valid_utf8.c \
pcre2_version.c \
pcre2_xclass.c \
pcre16_utf16_utils.c \
pcre32_utf32_utils.c \
pcre16_valid_utf16.c \
pcre32_valid_utf32.c \
perltest.pl \
ucp.h \
makevp.bat \
pcre.def \
libpcre.def \
libpcreposix.def"
libpcre2-8.pc.in \
libpcre2-16.pc.in \
libpcre2-32.pc.in \
libpcre2-posix.pc.in \
src/dftables.c \
src/pcre2.h.in \
src/pcre2_auto_possess.c \
src/pcre2_compile.c \
src/pcre2_config.c \
src/pcre2_context.c \
src/pcre2_dfa_match.c \
src/pcre2_error.c \
src/pcre2_internal.h \
src/pcre2_intmodedep.h \
src/pcre2_jit_compile.c \
src/pcre2_jit_match.c \
src/pcre2_jit_misc.c \
src/pcre2_jit_test.c \
src/pcre2_maketables.c \
src/pcre2_match.c \
src/pcre2_match_data.c \
src/pcre2_newline.c \
src/pcre2_ord2utf.c \
src/pcre2_pattern_info.c \
src/pcre2_printint.c \
src/pcre2_string_utils.c \
src/pcre2_study.c \
src/pcre2_substring.c \
src/pcre2_tables.c \
src/pcre2_ucd.c \
src/pcre2_ucp.h \
src/pcre2_valid_utf.c \
src/pcre2_xclass.c \
src/pcre2demo.c \
src/pcre2grep.c \
src/pcre2posix.c \
src/pcre2posix.h \
src/pcre2test.c"
echo Detrailing
perl ./Detrail $files doc/p* doc/html/*

46
README
View File

@ -1,7 +1,7 @@
README file for PCRE2 (Perl-compatible regular expression library)
------------------------------------------------------------------
PCRE2 is a re-implementation of the original PCRE library with an entirely new
PCRE2 is a re-implementation of the original PCRE library with an entirely new
API. The latest release of PCRE2 is always available in three alternative
formats from:
@ -11,7 +11,7 @@ FIXME: THIS WILL NOT BE THE CASE UNTIL THERE IS A FORMAL RELEASE.
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.tar.bz2
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.zip
There is a mailing list for discussion about the development of PCRE (both the
There is a mailing list for discussion about the development of PCRE (both the
original and new APIs) at pcre-dev@exim.org. You can access the archives and
subscribe or manage your subscription here:
@ -41,7 +41,7 @@ The PCRE2 APIs
PCRE2 is written in C, and it has its own API. There are three sets of
functions, one for the 8-bit library, which processes strings of bytes, one for
the 16-bit library, which processes strings of 16-bit values, and one for the
32-bit library, which processes strings of 32-bit values. As this is a new API,
32-bit library, which processes strings of 32-bit values. As this is a new API,
there as yet no C++ wrappers.
The distribution does contain a set of C wrapper functions for the 8-bit
@ -102,7 +102,7 @@ NON-AUTOTOOLS-BUILD, though if your system supports the use of "configure" and
"make" you may be able to build PCRE2 using autotools in the same way as for
many Unix-like systems.
PCRE2 can also be configured using CMake, which can be run in various ways
PCRE2 can also be configured using CMake, which can be run in various ways
(command line, GUI, etc). This creates Makefiles, solution files, etc. The file
NON-AUTOTOOLS-BUILD has information about CMake.
@ -186,13 +186,13 @@ library. They are also documented in the pcre2build man page.
handling UTF-8, UTF-16 and UTF-8 is not included. It is not possible to
configure one library with UTF support and the other without in the same
configuration.
Even when --enable-unicode is included, the use of a UTF encoding still has
to be enabled by an option at run time. When PCRE2 is compiled with this
option, its input can only either be ASCII or UTF-8/16/32, even when running
on EBCDIC platforms. It is not possible to use both --enable-unicode and
--enable-ebcdic at the same time.
When --enable-unicode is specified, as well as supporting UTF strings, PCRE2
includes support for the \P, \p, and \X sequences that recognize Unicode
character properties. However, only the basic two-letter properties such as
@ -248,7 +248,7 @@ library. They are also documented in the pcre2build man page.
cause programs to crash in strange ways. There is a discussion about stack
sizes in the pcre2stack man page.
. In the 8-bit library, the default maximum compiled pattern size is around
. In the 8-bit library, the default maximum compiled pattern size is around
64K. You can increase this by adding --with-link-size=3 to the "configure"
command. PCRE2 then uses three bytes instead of two for offsets to different
parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is
@ -360,7 +360,7 @@ The "configure" script builds the following files for the basic C library:
. src/pcre2.h the public PCRE2 header file
. pcre2-config script that shows the building settings such as CFLAGS
that were set for "configure"
. libpcre2-8.pc )
. libpcre2-8.pc )
. libpcre2-16.pc ) data for the pkg-config command
. libpcre2-32.pc )
. libpcre2-posix.pc )
@ -452,7 +452,7 @@ prints the version number, and
outputs information about where the 8-bit library is installed. This command
can be included in makefiles for programs that use PCRE2, saving the programmer
from having to remember too many details. Run pcre2-config with no arguments to
from having to remember too many details. Run pcre2-config with no arguments to
obtain a list of possible arguments.
The pkg-config command is another system for saving and retrieving information
@ -593,7 +593,7 @@ bug in PCRE2.
The third set of tests checks pcre2_maketables(), the facility for building a
set of character tables for a specific locale and using them instead of the
default tables. The script uses the "locale" command to check for the
default tables. The script uses the "locale" command to check for the
availability of the "fr_FR", "french", or "fr" locale, and uses the first one
that it finds. If the "locale" command fails, or if its output doesn't include
"fr_FR", "french", or "fr" in the list of available locales, the third test
@ -609,7 +609,7 @@ of the French locale have been encountered. The test passes if its output
matches any one of them.
The fourth and fifth tests check UTF and Unicode property support, the fourth
being compatible with the perltest.pl script, and the fifth checking
being compatible with the perltest.pl script, and the fifth checking
PCRE2-specific things.
The sixth and seventh tests check the pcre2_dfa_match() alternative matching
@ -623,8 +623,8 @@ change) and when Unicode support is enabled.
The ninth and tenth tests are run only in 8-bit mode, and the eleventh and
twelfth tests are run only in 16-bit and 32-bit modes. These are tests that
generate different output in 8-bit mode. Each pair are for general cases and
Unicode support, respectively. The thirteenth test checks the handling of
non-UTF characters greater than 255 by pcre2_dfa_match() in 16-bit and 32-bit
Unicode support, respectively. The thirteenth test checks the handling of
non-UTF characters greater than 255 by pcre2_dfa_match() in 16-bit and 32-bit
modes.
The fourteenth test is run only when JIT support is not available, and the
@ -632,8 +632,8 @@ fifteenth test is run only when JIT support is available. They test some
JIT-specific features such as information output from pcre2test about JIT
compilation.
The sixteenth and seventeenth tests are run only in 8-bit mode. They check the
POSIX interface to the 8-bit library, withouth and with Unicode support,
The sixteenth and seventeenth tests are run only in 8-bit mode. They check the
POSIX interface to the 8-bit library, withouth and with Unicode support,
respectively.
@ -692,9 +692,9 @@ will cause PCRE2 to malfunction.
File manifest
-------------
The distribution should contain the files listed below.
The distribution should contain the files listed below.
(A) Source files for the PCRE2 library functions and their headers are found in
(A) Source files for the PCRE2 library functions and their headers are found in
the src directory:
src/dftables.c auxiliary program for building pcre2_chartables.c
@ -705,25 +705,25 @@ The distribution should contain the files listed below.
specified, used by copying to pcre2_chartables.c
src/pcre2posix.c )
src/pcre2_auto_possess.c )
src/pcre2_auto_possess.c )
src/pcre2_compile.c )
src/pcre2_config.c )
src/pcre2_context.c )
src/pcre2_context.c )
src/pcre2_dfa_match.c )
src/pcre2_error.c )
src/pcre2_error.c )
src/pcre2_exec.c )
src/pcre2_jit_compile.c )
src/pcre2_jit_match.c ) sources for the functions in the library,
src/pcre2_jit_match.c ) sources for the functions in the library,
src/pcre2_jit_misc.c ) and some internal functions that they use
src/pcre2_maketables.c )
src/pcre2_match.c )
src/pcre2_match_data.c )
src/pcre2_match_data.c )
src/pcre2_newline.c )
src/pcre2_ord2utf.c )
src/pcre2_pattern_info.c )
src/pcre2_string_utils.c )
src/pcre2_study.c )
src/pcre2_substring.c )
src/pcre2_substring.c )
src/pcre2_tables.c )
src/pcre2_ucd.c )
src/pcre2_valid_utf.c )

View File

@ -23,7 +23,7 @@ pcre2grep=$builddir/pcre2grep
if [ ! -x $pcre2grep ] ; then
echo "** $pcre2grep does not exist or is not execuatble."
exit 1
fi
fi
valgrind=
while [ $# -gt 0 ] ; do

View File

@ -126,7 +126,7 @@ fi
checkresult()
{
if [ $1 -ne 0 ] ; then
if [ $1 -ne 0 ] ; then
echo "** pcre2test failed - check testtry"
exit 1
fi

View File

@ -106,7 +106,7 @@ AC_ARG_ENABLE(pcre32,,,enable_pcre32=no)
if test "$enable_pcre8$enable_pcre16$enable_pcre32" != "nonono"
then
echo "** ERROR: Use --[[en|dis]]able-pcre2-[[8|16|32]], not --[[en|dis]]able-pcre[[8|16|32]]"
exit 1
exit 1
fi
# Handle --disable-pcre2-8 (enabled by default)
@ -512,7 +512,7 @@ if test "$enable_jit" = "yes"; then
CC="$PTHREAD_CC"
CFLAGS="$PTHREAD_CFLAGS $CFLAGS"
LIBS="$PTHREAD_LIBS $LIBS"
fi
fi
AC_DEFINE([SUPPORT_JIT], [], [
Define to any value to enable support for Just-In-Time compiling.])
else
@ -538,7 +538,7 @@ if test "$enable_stack_for_recursion" = "no"; then
matching. This can sometimes be a problem on systems that have
stacks of limited size. Define HEAP_MATCH_RECURSE to any value to get a
version that doesn't use recursion in the match() function; instead
it creates its own stack by steam using memory from the heap. For more
it creates its own stack by steam using memory from the heap. For more
detail, see the comments and other stuff just above the match() function.])
fi
@ -559,8 +559,8 @@ if test $with_pcre2grep_bufsize -lt 8192 ; then
with_pcre2grep_bufsize="8192"
else
if test $? -gt 1 ; then
AC_MSG_ERROR([Bad value for --with-pcre2grep-bufsize])
fi
AC_MSG_ERROR([Bad value for --with-pcre2grep-bufsize])
fi
fi
AC_DEFINE_UNQUOTED([PCRE2GREP_BUFSIZE], [$with_pcre2grep_bufsize], [
@ -579,9 +579,9 @@ elif test "$enable_pcre2test_libreadline" = "yes"; then
fi
AC_DEFINE_UNQUOTED([NEWLINE_DEFAULT], [$ac_pcre2_newline_value], [
The value of NEWLINE_DEFAULT determines the default newline character
sequence. PCRE2 client programs can override this by selecting other values
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY),
The value of NEWLINE_DEFAULT determines the default newline character
sequence. PCRE2 client programs can override this by selecting other values
at run time. The valid values are 1 (CR), 2 (LF), 3 (CRLF), 4 (ANY),
and 5 (ANYCRLF).])
if test "$enable_bsr_anycrlf" = "yes"; then

View File

@ -1,9 +1,9 @@
Building PCRE2 without using autotools
--------------------------------------
This document has been converted from the PCRE1 document, but is not yet
complete. I have removed a number of quite old sections about building in
various environments, as they applied only to PCRE1 and are probably out of
This document has been converted from the PCRE1 document, but is not yet
complete. I have removed a number of quite old sections about building in
various environments, as they applied only to PCRE1 and are probably out of
date.
@ -57,7 +57,7 @@ can skip ahead to the CMake section.
environment. In particular, you can alter the definition of the NEWLINE
macro to specify what character(s) you want to be interpreted as line
terminators.
When you compile any of the PCRE2 modules, you must specify
-DHAVE_CONFIG_H to your compiler so that src/config.h is included in the
sources.
@ -100,7 +100,7 @@ can skip ahead to the CMake section.
pcre2_chartables.c
pcre2_compile.c
pcre2_config.c
pcre2_context.c
pcre2_context.c
pcre2_dfa_match.c
pcre2_error.c
pcre2_jit_compile.c
@ -114,7 +114,7 @@ can skip ahead to the CMake section.
pcre2_pattern_info.c
pcre2_string_utils.c
pcre2_study.c
pcre2_substring.c
pcre2_substring.c
pcre2_tables.c
pcre2_ucd.c
pcre2_valid_utf.c
@ -138,8 +138,8 @@ can skip ahead to the CMake section.
(6) If you want to build a 16-bit library or 32-bit library (as well as, or
instead of the 8-bit library) just supply 16 or 32 as the value of
-DPCRE2_CODE_UNIT_WIDTH when you are compiling.
-DPCRE2_CODE_UNIT_WIDTH when you are compiling.
(7) If you want to build the POSIX wrapper functions (which apply only to the
8-bit library), ensure that you have the pcre2posix.h file and then
compile pcre2posix.c. Link the result (on its own) as the pcre2posix
@ -295,7 +295,7 @@ Borland, Msys, MinGW, NMake, and Unix. If possible, use short paths with no
spaces in the names for your CMake installation and your PCRE2 source and build
directories.
The following instructions were contributed by a PCRE1 user, but they should
The following instructions were contributed by a PCRE1 user, but they should
also work for PCRE2. If they are not followed exactly, errors may occur. In the
event that errors do occur, it is recommended that you delete the CMake cache
before attempting to repeat the CMake build process. In the CMake GUI, the
@ -394,9 +394,9 @@ required. For details, please see this web site:
There is also a mirror here:
http://www.vsoft-software.com/downloads.html
The site currently has ports for PCRE1 releases, but PCRE2 should follow in due
course.
The site currently has ports for PCRE1 releases, but PCRE2 should follow in due
course.
==========================
Last Updated: 28 September 2014

View File

@ -1,7 +1,7 @@
README file for PCRE2 (Perl-compatible regular expression library)
------------------------------------------------------------------
PCRE2 is a re-implementation of the original PCRE library with an entirely new
PCRE2 is a re-implementation of the original PCRE library with an entirely new
API. The latest release of PCRE2 is always available in three alternative
formats from:
@ -11,7 +11,7 @@ FIXME: THIS WILL NOT BE THE CASE UNTIL THERE IS A FORMAL RELEASE.
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.tar.bz2
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre2/pcre2-xxx.zip
There is a mailing list for discussion about the development of PCRE (both the
There is a mailing list for discussion about the development of PCRE (both the
original and new APIs) at pcre-dev@exim.org. You can access the archives and
subscribe or manage your subscription here:
@ -41,7 +41,7 @@ The PCRE2 APIs
PCRE2 is written in C, and it has its own API. There are three sets of
functions, one for the 8-bit library, which processes strings of bytes, one for
the 16-bit library, which processes strings of 16-bit values, and one for the
32-bit library, which processes strings of 32-bit values. As this is a new API,
32-bit library, which processes strings of 32-bit values. As this is a new API,
there as yet no C++ wrappers.
The distribution does contain a set of C wrapper functions for the 8-bit
@ -102,7 +102,7 @@ NON-AUTOTOOLS-BUILD, though if your system supports the use of "configure" and
"make" you may be able to build PCRE2 using autotools in the same way as for
many Unix-like systems.
PCRE2 can also be configured using CMake, which can be run in various ways
PCRE2 can also be configured using CMake, which can be run in various ways
(command line, GUI, etc). This creates Makefiles, solution files, etc. The file
NON-AUTOTOOLS-BUILD has information about CMake.
@ -186,13 +186,13 @@ library. They are also documented in the pcre2build man page.
handling UTF-8, UTF-16 and UTF-8 is not included. It is not possible to
configure one library with UTF support and the other without in the same
configuration.
Even when --enable-unicode is included, the use of a UTF encoding still has
to be enabled by an option at run time. When PCRE2 is compiled with this
option, its input can only either be ASCII or UTF-8/16/32, even when running
on EBCDIC platforms. It is not possible to use both --enable-unicode and
--enable-ebcdic at the same time.
When --enable-unicode is specified, as well as supporting UTF strings, PCRE2
includes support for the \P, \p, and \X sequences that recognize Unicode
character properties. However, only the basic two-letter properties such as
@ -248,7 +248,7 @@ library. They are also documented in the pcre2build man page.
cause programs to crash in strange ways. There is a discussion about stack
sizes in the pcre2stack man page.
. In the 8-bit library, the default maximum compiled pattern size is around
. In the 8-bit library, the default maximum compiled pattern size is around
64K. You can increase this by adding --with-link-size=3 to the "configure"
command. PCRE2 then uses three bytes instead of two for offsets to different
parts of the compiled pattern. In the 16-bit library, --with-link-size=3 is
@ -360,7 +360,7 @@ The "configure" script builds the following files for the basic C library:
. src/pcre2.h the public PCRE2 header file
. pcre2-config script that shows the building settings such as CFLAGS
that were set for "configure"
. libpcre2-8.pc )
. libpcre2-8.pc )
. libpcre2-16.pc ) data for the pkg-config command
. libpcre2-32.pc )
. libpcre2-posix.pc )
@ -452,7 +452,7 @@ prints the version number, and
outputs information about where the 8-bit library is installed. This command
can be included in makefiles for programs that use PCRE2, saving the programmer
from having to remember too many details. Run pcre2-config with no arguments to
from having to remember too many details. Run pcre2-config with no arguments to
obtain a list of possible arguments.
The pkg-config command is another system for saving and retrieving information
@ -593,7 +593,7 @@ bug in PCRE2.
The third set of tests checks pcre2_maketables(), the facility for building a
set of character tables for a specific locale and using them instead of the
default tables. The script uses the "locale" command to check for the
default tables. The script uses the "locale" command to check for the
availability of the "fr_FR", "french", or "fr" locale, and uses the first one
that it finds. If the "locale" command fails, or if its output doesn't include
"fr_FR", "french", or "fr" in the list of available locales, the third test
@ -609,7 +609,7 @@ of the French locale have been encountered. The test passes if its output
matches any one of them.
The fourth and fifth tests check UTF and Unicode property support, the fourth
being compatible with the perltest.pl script, and the fifth checking
being compatible with the perltest.pl script, and the fifth checking
PCRE2-specific things.
The sixth and seventh tests check the pcre2_dfa_match() alternative matching
@ -623,8 +623,8 @@ change) and when Unicode support is enabled.
The ninth and tenth tests are run only in 8-bit mode, and the eleventh and
twelfth tests are run only in 16-bit and 32-bit modes. These are tests that
generate different output in 8-bit mode. Each pair are for general cases and
Unicode support, respectively. The thirteenth test checks the handling of
non-UTF characters greater than 255 by pcre2_dfa_match() in 16-bit and 32-bit
Unicode support, respectively. The thirteenth test checks the handling of
non-UTF characters greater than 255 by pcre2_dfa_match() in 16-bit and 32-bit
modes.
The fourteenth test is run only when JIT support is not available, and the
@ -632,8 +632,8 @@ fifteenth test is run only when JIT support is available. They test some
JIT-specific features such as information output from pcre2test about JIT
compilation.
The sixteenth and seventeenth tests are run only in 8-bit mode. They check the
POSIX interface to the 8-bit library, withouth and with Unicode support,
The sixteenth and seventeenth tests are run only in 8-bit mode. They check the
POSIX interface to the 8-bit library, withouth and with Unicode support,
respectively.
@ -692,9 +692,9 @@ will cause PCRE2 to malfunction.
File manifest
-------------
The distribution should contain the files listed below.
The distribution should contain the files listed below.
(A) Source files for the PCRE2 library functions and their headers are found in
(A) Source files for the PCRE2 library functions and their headers are found in
the src directory:
src/dftables.c auxiliary program for building pcre2_chartables.c
@ -705,25 +705,25 @@ The distribution should contain the files listed below.
specified, used by copying to pcre2_chartables.c
src/pcre2posix.c )
src/pcre2_auto_possess.c )
src/pcre2_auto_possess.c )
src/pcre2_compile.c )
src/pcre2_config.c )
src/pcre2_context.c )
src/pcre2_context.c )
src/pcre2_dfa_match.c )
src/pcre2_error.c )
src/pcre2_error.c )
src/pcre2_exec.c )
src/pcre2_jit_compile.c )
src/pcre2_jit_match.c ) sources for the functions in the library,
src/pcre2_jit_match.c ) sources for the functions in the library,
src/pcre2_jit_misc.c ) and some internal functions that they use
src/pcre2_maketables.c )
src/pcre2_match.c )
src/pcre2_match_data.c )
src/pcre2_match_data.c )
src/pcre2_newline.c )
src/pcre2_ord2utf.c )
src/pcre2_pattern_info.c )
src/pcre2_string_utils.c )
src/pcre2_study.c )
src/pcre2_substring.c )
src/pcre2_substring.c )
src/pcre2_tables.c )
src/pcre2_ucd.c )
src/pcre2_valid_utf.c )

View File

@ -1,10 +1,10 @@
<html>
<!-- This is a manually maintained file that is the root of the HTML version of
the PCRE2 documentation. When the HTML documents are built from the man
page versions, the entire doc/html directory is emptied, this file is then
copied into doc/html/index.html, and the remaining files therein are
<!-- This is a manually maintained file that is the root of the HTML version of
the PCRE2 documentation. When the HTML documents are built from the man
page versions, the entire doc/html directory is emptied, this file is then
copied into doc/html/index.html, and the remaining files therein are
created by the 132html script.
-->
-->
<head>
<title>PCRE2 specification</title>
</head>
@ -87,7 +87,7 @@ in the library. There is a single page for each triple of 8-bit/16-bit/32-bit
functions.
</p>
<table>
<table>
<tr><td><a href="pcre2_assign_jit_stack.html">pcre2_assign_jit_stack</a></td>
<td>&nbsp;&nbsp;Assign stack for JIT matching</td></tr>
@ -153,7 +153,7 @@ functions.
<tr><td><a href="pcre2_maketables.html">pcre2_maketables</a></td>
<td>&nbsp;&nbsp;Build character tables in current locale</td></tr>
<tr><td><a href="pcre2_pattern_to_host_byte_order.html">pcre2_pattern_to_host_byte_order</a></td>
<td>&nbsp;&nbsp;Convert compiled pattern to host byte order if necessary</td></tr>

View File

@ -43,11 +43,11 @@ of Unicode in use can be discovered by running
</PRE>
</P>
<P>
The three libraries contain identical sets of functions, with names ending in
_8, _16, or _32, respectively (for example, <b>pcre2_compile_8()</b>). However,
by defining PCRE2_CODE_UNIT_WIDTH to be 8, 16, or 32, a program that uses just
The three libraries contain identical sets of functions, with names ending in
_8, _16, or _32, respectively (for example, <b>pcre2_compile_8()</b>). However,
by defining PCRE2_CODE_UNIT_WIDTH to be 8, 16, or 32, a program that uses just
one code unit width can be written using generic names such as
<b>pcre2_compile()</b>, and the documentation is written assuming that this is
<b>pcre2_compile()</b>, and the documentation is written assuming that this is
the case.
</P>
<P>

View File

@ -306,7 +306,7 @@ unknown should also use the real function names. (Unfortunately, it is not
possible in C code to save and restore the value of a macro.)
</P>
<P>
If PCRE2_CODE_UNIT_WIDTH is not defined before including <b>pcre2.h</b>, a
If PCRE2_CODE_UNIT_WIDTH is not defined before including <b>pcre2.h</b>, a
compiler error occurs.
</P>
<P>
@ -443,7 +443,7 @@ below.
</P>
<P>
The choice of newline convention does not affect the interpretation of
the \n or \r escape sequences, nor does it affect what \R matches, which has
the \n or \r escape sequences, nor does it affect what \R matches, which has
its own separate control.
</P>
<br><a name="SEC12" href="#TOC1">MULTITHREADING</a><br>
@ -553,7 +553,7 @@ The memory used for a general context should be freed by calling:
The compile context
</b><br>
<P>
A compile context is required if you want to change the default values of any
A compile context is required if you want to change the default values of any
of the following compile-time parameters:
<pre>
What \R matches (Unicode newlines or CR, LF, CRLF only);
@ -562,7 +562,7 @@ of the following compile-time parameters:
The compile time nested parentheses limit;
An external function for stack checking.
</pre>
A compile context is also required if you are using custom memory management.
A compile context is also required if you are using custom memory management.
If none of these apply, just pass NULL as the context argument of
<i>pcre2_compile()</i>.
</P>
@ -579,33 +579,33 @@ A compile context is created, copied, and freed by the following functions:
<b>void pcre2_compile_context_free(pcre2_compile_context *<i>ccontext</i>);</b>
<br>
<br>
A compile context is created with default values for its parameters. These can
be changed by calling the following functions, which return 0 on success, or
A compile context is created with default values for its parameters. These can
be changed by calling the following functions, which return 0 on success, or
PCRE2_ERROR_BADDATA if invalid data is detected.
<b>int pcre2_set_bsr(pcre2_compile_context *<i>ccontext</i>,</b>
<b> uint32_t <i>value</i>);</b>
<br>
<br>
The value must be PCRE2_BSR_ANYCRLF, to specify that \R matches only CR, LF,
or CRLF, or PCRE2_BSR_UNICODE, to specify that \R matches any Unicode line
ending sequence. The value of this parameter does not affect what is compiled;
The value must be PCRE2_BSR_ANYCRLF, to specify that \R matches only CR, LF,
or CRLF, or PCRE2_BSR_UNICODE, to specify that \R matches any Unicode line
ending sequence. The value of this parameter does not affect what is compiled;
it is just saved with the compiled pattern. The value is used by the JIT
compiler and by the two interpreted matching functions, <i>pcre2_match()</i> and
compiler and by the two interpreted matching functions, <i>pcre2_match()</i> and
<i>pcre2_dfa_match()</i>.
<b>int pcre2_set_character_tables(pcre2_compile_context *<i>ccontext</i>,</b>
<b> const unsigned char *<i>tables</i>);</b>
<br>
<br>
The value must be the result of a call to <i>pcre2_maketables()</i>, whose only
The value must be the result of a call to <i>pcre2_maketables()</i>, whose only
argument is a general context. This function builds a set of character tables
in the current locale.
<b>int pcre2_set_newline(pcre2_compile_context *<i>ccontext</i>,</b>
<b> uint32_t <i>value</i>);</b>
<br>
<br>
This specifies which characters or character sequences are to be recognized as
This specifies which characters or character sequences are to be recognized as
newlines. The value must be one of PCRE2_NEWLINE_CR (carriage return only),
PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the two-character
PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the two-character
sequence CR followed by LF), PCRE2_NEWLINE_ANYCRLF (any of the above), or
PCRE2_NEWLINE_ANY (any Unicode newline sequence).
</P>
@ -627,7 +627,7 @@ using up too much system stack when being compiled.
<br>
<br>
There is at least one application that runs PCRE2 in threads with very limited
system stack, where running out of stack is to be avoided at all costs. The
system stack, where running out of stack is to be avoided at all costs. The
parenthesis limit above cannot take account of how much stack is actually
available. For a finer control, you can supply a function that is called
whenever <b>pcre2_compile()</b> starts to compile a parenthesized part of a
@ -638,20 +638,20 @@ function should return zero if all is well, or non-zero to force an error.
The match context
</b><br>
<P>
A match context is required if you want to change the default values of any
A match context is required if you want to change the default values of any
of the following match-time parameters:
<pre>
What \R matches (Unicode newlines or CR, LF, CRLF only);
A callout function;
The limit for calling <i>match()</i>;
The limit for calling <i>match()</i>;
The limit for calling <i>match()</i> recursively;
The newline character sequence;
</pre>
A match context is also required if you are using custom memory management.
If none of these apply, just pass NULL as the context argument of
A match context is also required if you are using custom memory management.
If none of these apply, just pass NULL as the context argument of
<b>pcre2_match()</b>, <b>pcre2_dfa_match()</b>, or <b>pcre2_jit_match()</b>.
Changing the newline value or what \R matches at match time disables the use
of JIT via <b>pcre2_match()</b>.
Changing the newline value or what \R matches at match time disables the use
of JIT via <b>pcre2_match()</b>.
</P>
<P>
A match context is created, copied, and freed by the following functions:
@ -666,8 +666,8 @@ A match context is created, copied, and freed by the following functions:
<b>void pcre2_match_context_free(pcre2_match_context *<i>mcontext</i>);</b>
<br>
<br>
A match context is created with default values for its parameters. These can
be changed by calling the following functions, which return 0 on success, or
A match context is created with default values for its parameters. These can
be changed by calling the following functions, which return 0 on success, or
PCRE2_ERROR_BADDATA if invalid data is detected.
<b>int pcre2_set_callout(pcre2_match_context *<i>mcontext</i>,</b>
<b> int (*<i>callout_function</i>)(pcre2_callout_block *),</b>
@ -693,7 +693,7 @@ calls repeatedly (sometimes recursively). The limit set by <i>match_limit</i> is
imposed on the number of times this function is called during a match, which
has the effect of limiting the amount of backtracking that can take place. For
patterns that are not anchored, the count restarts from zero for each position
in the subject string. This limit is not relevant to <b>pcre2_dfa_match()</b>,
in the subject string. This limit is not relevant to <b>pcre2_dfa_match()</b>,
which ignores it.
</P>
<P>
@ -730,7 +730,7 @@ This limit is of use only if it is set smaller than <i>match_limit</i>.
Limiting the recursion depth limits the amount of system stack that can be
used, or, when PCRE2 has been compiled to use memory on the heap instead of the
stack, the amount of heap memory that can be used. This limit is not relevant,
and is ignored, when matching is done using JIT compiled code or by the
and is ignored, when matching is done using JIT compiled code or by the
<b>pcre2_dfa_match()</b> function.
</P>
<P>
@ -751,9 +751,9 @@ limit is set, less than the default.
<b> void (*<i>private_free</i>)(void *, void *), void *<i>memory_data</i>);</b>
<br>
<br>
This function sets up two additional custom memory management functions for use
This function sets up two additional custom memory management functions for use
by <b>pcre2_match()</b> when PCRE2 is compiled to use the heap for remembering
backtracking data, instead of recursive function calls that use the system
backtracking data, instead of recursive function calls that use the system
stack. There is a discussion about PCRE2's stack usage in the
<a href="pcre2stack.html"><b>pcre2stack</b></a>
documentation. See the
@ -765,7 +765,7 @@ limited stacks. Because of the greater use of memory management,
general custom memory functions are provided so that special-purpose external
code can be used for this case, because the memory blocks are all the same
size. The blocks are retained by <b>pcre2_match()</b> until it is about to exit
so that they can be re-used when possible during the match. In the absence of
so that they can be re-used when possible during the match. In the absence of
these functions, the normal custom memory management functions are used, if
supplied, otherwise the system functions.
</P>
@ -785,7 +785,7 @@ required. The second argument is a pointer to memory into which the information
is placed. If NULL is passed, the function returns the amount of memory that is
needed for the requested information. For calls that return numerical values,
the value is in bytes; when requesting these values, <i>where</i> should point
to appropriately aligned memory. For calls that return strings, the required
to appropriately aligned memory. For calls that return strings, the required
length is given in code units, not counting the terminating zero.
</P>
<P>
@ -809,7 +809,7 @@ compiling is available; otherwise it is set to zero.
PCRE2_CONFIG_JITTARGET
</pre>
The <i>where</i> argument should point to a buffer that is at least 48 code
units long. (The exact length needed can be found by calling
units long. (The exact length needed can be found by calling
<b>pcre2_config()</b> with <b>where</b> set to NULL.) The buffer is filled with a
string that contains the name of the architecture for which the JIT compiler is
configured, for example "x86 32bit (little endian + unaligned)". If JIT support
@ -820,9 +820,9 @@ the string, in code units, is returned.
</pre>
The output is an integer that contains the number of bytes used for internal
linkage in compiled regular expressions. When PCRE2 is configured, the value
can be set to 2, 3, or 4, with the default being 2. This is the value that is
returned by <b>pcre2_config()</b>. However, when the 16-bit library is compiled,
a value of 3 is rounded up to 4, and when the 32-bit library is compiled,
can be set to 2, 3, or 4, with the default being 2. This is the value that is
returned by <b>pcre2_config()</b>. However, when the 16-bit library is compiled,
a value of 3 is rounded up to 4, and when the 32-bit library is compiled,
internal linkages always use 4 bytes, so the configured value is not relevant.
</P>
<P>
@ -908,16 +908,16 @@ units) is returned.
<b>pcre2_code_free(pcre2_code *<i>code</i>);</b>
</P>
<P>
This function compiles a pattern, defined by a pointer to a string of code
units and a length, into an internal form. If the pattern is zero-terminated,
the length should be specified as PCRE2_ZERO_TERMINATED. The function returns a
pointer to a block of memory that contains the compiled pattern and related
data. The caller must free the memory by calling <b>pcre2_code_free()</b> when
This function compiles a pattern, defined by a pointer to a string of code
units and a length, into an internal form. If the pattern is zero-terminated,
the length should be specified as PCRE2_ZERO_TERMINATED. The function returns a
pointer to a block of memory that contains the compiled pattern and related
data. The caller must free the memory by calling <b>pcre2_code_free()</b> when
it is no longer needed.
</P>
<P>
If the compile context argument <i>ccontext</i> is NULL, the memory is obtained
by calling <b>malloc()</b>. Otherwise, it is obtained from the same memory
If the compile context argument <i>ccontext</i> is NULL, the memory is obtained
by calling <b>malloc()</b>. Otherwise, it is obtained from the same memory
function that was used for the compile context.
</P>
<P>
@ -927,7 +927,7 @@ options are described below. Some of them (in particular, those that are
compatible with Perl, but some others as well) can also be set and unset from
within the pattern (see the detailed description in the
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
documentation).
documentation).
</P>
<P>
For those options that can be different in different parts of the pattern, the
@ -936,7 +936,7 @@ compilation. The PCRE2_ANCHORED and PCRE2_NO_UTF_CHECK options can be set at
the time of matching as well as at compile time.
</P>
<P>
Other, less frequently required compile-time parameters (for example, the
Other, less frequently required compile-time parameters (for example, the
newline setting) can be provided in a compile context (as described
<a href="#compilecontext">above).</a>
</P>
@ -962,10 +962,10 @@ This code fragment shows a typical straightforward call to
<pre>
pcre2_code *re;
PCRE2_SIZE erroffset;
int errorcode;
int errorcode;
re = pcre2_compile(
"^A.*Z", /* the pattern */
PCRE2_ZERO_TERMINATED, /* the pattern is zero-terminated */
PCRE2_ZERO_TERMINATED, /* the pattern is zero-terminated */
0, /* default options */
&errorcode, /* for error code */
&erroffset, /* for error offset */
@ -984,14 +984,14 @@ Perl.
<pre>
PCRE2_ALLOW_EMPTY_CLASS
</pre>
By default, for compatibility with Perl, a closing square bracket that
immediately follows an opening one is treated as a data character for the
class. When PCRE2_ALLOW_EMPTY_CLASS is set, it terminates the class, which
therefore contains no characters and so can never match.
By default, for compatibility with Perl, a closing square bracket that
immediately follows an opening one is treated as a data character for the
class. When PCRE2_ALLOW_EMPTY_CLASS is set, it terminates the class, which
therefore contains no characters and so can never match.
<pre>
PCRE2_ALT_BSUX
</pre>
This option request alternative handling of three escape sequences, which
This option request alternative handling of three escape sequences, which
makes PCRE2's behaviour more like ECMAscript (aka JavaScript). When it is set:
</P>
<P>
@ -1023,7 +1023,7 @@ documentation.
</pre>
If this bit is set, letters in the pattern match both upper and lower case
letters in the subject. It is equivalent to Perl's /i option, and it can be
changed within a pattern by a (?i) option setting.
changed within a pattern by a (?i) option setting.
<pre>
PCRE2_DOLLAR_ENDONLY
</pre>
@ -1076,7 +1076,7 @@ Which characters are interpreted as newlines can be specified by a setting in
the compile context that is passed to <b>pcre2_compile()</b> or by a special
sequence at the start of the pattern, as described in the section entitled
<a href="pcrepattern.html#newlines">"Newline conventions"</a>
in the <b>pcre2pattern</b> documentation. A default is defined when PCRE2 is
in the <b>pcre2pattern</b> documentation. A default is defined when PCRE2 is
built.
<pre>
PCRE2_FIRSTLINE
@ -1091,7 +1091,7 @@ If this option is set, a back reference to an unset subpattern group matches an
empty string (by default this causes the current matching alternative to fail).
A pattern such as (\1)(a) succeeds when this option is set (assuming it can
find an "a" in the subject), whereas it fails by default, for Perl
compatibility. Setting this option makes PCRE2 behave more like ECMAscript (aka
compatibility. Setting this option makes PCRE2 behave more like ECMAscript (aka
JavaScript).
<pre>
PCRE2_MULTILINE
@ -1116,10 +1116,10 @@ occurrences of ^ or $ in a pattern, setting PCRE2_MULTILINE has no effect.
PCRE2_NEVER_UCP
</pre>
This option locks out the use of Unicode properties for handling \B, \b, \D,
\d, \S, \s, \W, \w, and some of the POSIX character classes, as described
for the PCRE2_UCP option below. In particular, it prevents the creator of the
pattern from enabling this facility by starting the pattern with (*UCP). This
may be useful in applications that process patterns from external sources. The
\d, \S, \s, \W, \w, and some of the POSIX character classes, as described
for the PCRE2_UCP option below. In particular, it prevents the creator of the
pattern from enabling this facility by starting the pattern with (*UCP). This
may be useful in applications that process patterns from external sources. The
option combination PCRE_UCP and PCRE_NEVER_UCP causes an error.
<pre>
PCRE2_NEVER_UTF
@ -1195,7 +1195,7 @@ pattern
(*MARK:A)(X|Y)
</pre>
The minimum length for a match is one character. If the subject is "ABC", there
will be attempts to match "ABC", "BC", and "C". An attempt to match an empty
will be attempts to match "ABC", "BC", and "C". An attempt to match an empty
string at the end of the subject does not take place, because PCRE2 knows that
the subject is now too short, and so the (*MARK) is never encountered. In this
case, the optimization does not affect the overall match result, which is still
@ -1211,7 +1211,7 @@ and
<a href="pcre2unicode.html#utf32strings">UTF-32 strings</a>
in the
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
document.
document.
If an invalid UTF sequence is found, <b>pcre2_compile()</b> returns a negative
error code.
</P>
@ -1391,9 +1391,9 @@ The possible values for the second argument are defined in <b>pcre2.h</b>, and
are as follows:
<pre>
PCRE2_INFO_ALLOPTIONS
PCRE2_INFO_ARGOPTIONS
PCRE2_INFO_ARGOPTIONS
</pre>
Return a copy of the pattern's options. The third argument should point to a
Return a copy of the pattern's options. The third argument should point to a
<b>uint32_t</b> variable. PCRE2_INFO_ARGOPTIONS returns exactly the options that
were passed to <b>pcre2_compile()</b>, whereas PCRE2_INFO_ALLOPTIONS returns
the compile options as modified by any top-level option settings at the start
@ -1411,7 +1411,7 @@ alternatives begin with one of the following:
\G always
.* if PCRE2_DOTALL is set and there are no back references to the subpattern in which .* appears
</pre>
For such patterns, the PCRE2_ANCHORED bit is set in the options returned for
For such patterns, the PCRE2_ANCHORED bit is set in the options returned for
PCRE2_INFO_ALLOPTIONS.
<pre>
PCRE2_INFO_BACKREFMAX
@ -1499,7 +1499,7 @@ return zero. The third argument should point to a <b>size_t</b> variable.
</pre>
Returns 1 if there is a rightmost literal code unit that must exist in any
matched string, other than at its start. The third argument should point to an
<b>uint32_t</b> variable. If there is no such value, 0 is returned. When 1 is
<b>uint32_t</b> variable. If there is no such value, 0 is returned. When 1 is
returned, the code unit value itself can be retrieved using
PCRE2_INFO_LASTCODEUNIT.
</P>
@ -1657,11 +1657,11 @@ pattern with the JIT compiler does not alter the value returned by this option.
<b>void pcre2_match_data_free(pcre2_match_data *<i>match_data</i>);</b>
</P>
<P>
Information about successful and unsuccessful matches is placed in a match
Information about successful and unsuccessful matches is placed in a match
data block, which is an opaque structure that is accessed by function calls. In
particular, the match data block contains a vector of offsets into the subject
string that define the matched part of the subject and any substrings that were
capured. This is know as the <i>ovector</i>.
capured. This is know as the <i>ovector</i>.
</P>
<P>
Before calling <b>pcre2_match()</b> or <b>pcre2_dfa_match()</b> you must create a
@ -1676,12 +1676,12 @@ return the overall matched string.
</P>
<P>
For <b>pcre2_match_data_create_from_pattern()</b>, the first argument is a
pointer to a compiled pattern. In this case the ovector is created to be
pointer to a compiled pattern. In this case the ovector is created to be
exactly the right size to hold all the substrings a pattern might capture.
</P>
<P>
The second argument of both these functions ia a pointer to a general context,
which can specify custom memory management for obtaining the memory for the
The second argument of both these functions ia a pointer to a general context,
which can specify custom memory management for obtaining the memory for the
match data block. If you are not using custom memory management, pass NULL.
</P>
<P>
@ -1728,8 +1728,8 @@ Here is an example of a simple call to <b>pcre2_match()</b>:
match_data, /* the match data block */
NULL); /* a match context; NULL means use defaults */
</pre>
If the subject string is zero-terminated, the length can be given as
PCRE2_ZERO_TERMINATED. A match context must be provided if certain less common
If the subject string is zero-terminated, the length can be given as
PCRE2_ZERO_TERMINATED. A match context must be provided if certain less common
matching parameters are to be changed. For details, see the section on
<a href="#matchcontext">the match context</a>
above.
@ -1742,7 +1742,7 @@ The subject string is passed to <b>pcre2_match()</b> as a pointer in
<i>subject</i>, a length in <i>length</i>, and a starting offset in
<i>startoffset</i>. The length and offset are in code units, not characters.
That is, they are in bytes for the 8-bit library, 16-bit code units for the
16-bit library, and 32-bit code units for the 32-bit library, whether or not
16-bit library, and 32-bit code units for the 32-bit library, whether or not
UTF processing is enabled.
</P>
<P>
@ -1752,7 +1752,7 @@ zero, the search for a match starts at the beginning of the subject, and this
is by far the most common case. In UTF-8 or UTF-16 mode, the starting offset
must point to the start of a character, or to the end of the subject (in UTF-32
mode, one code unit equals one character, so all offsets are valid). Like the
pattern string, the subject may contain binary zeroes.
pattern string, the subject may contain binary zeroes.
</P>
<P>
A non-zero starting offset is useful when searching for another match in the
@ -1814,7 +1814,7 @@ JIT matching is disabled and the normal interpretive code in
The PCRE2_ANCHORED option limits <b>pcre2_match()</b> to matching at the first
matching position. If a pattern was compiled with PCRE2_ANCHORED, or turned out
to be anchored by virtue of its contents, it cannot be made unachored at
matching time. Note that setting the option at match time disables JIT
matching time. Note that setting the option at match time disables JIT
matching.
<pre>
PCRE2_NOTBOL
@ -1867,14 +1867,14 @@ and
<a href="pcre2unicode.html#utf32strings">UTF-32 strings</a>
in the
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
page.
page.
</P>
<P>
If you know that your subject is valid, and you want to skip these checks for
performance reasons, you can set the PCRE2_NO_UTF_CHECK option when calling
<b>pcre2_match()</b>. You might want to do this for the second and subsequent
calls to <b>pcre2_match()</b> if you are making repeated calls to find all the
matches in a single subject string.
matches in a single subject string.
</P>
<P>
NOTE: When PCRE2_NO_UTF_CHECK is set, the effect of passing an invalid string
@ -1908,9 +1908,9 @@ documentation.
</P>
<br><a name="SEC22" href="#TOC1">NEWLINE HANDLING WHEN MATCHING</a><br>
<P>
When PCRE2 is built, a default newline convention is set; this is usually the
standard convention for the operating system. The default can be overridden in
either a
When PCRE2 is built, a default newline convention is set; this is usually the
standard convention for the operating system. The default can be overridden in
either a
<a href="#compilecontext">compile context</a>
or a
<a href="#matchcontext">match context.</a>
@ -1953,7 +1953,7 @@ valid newline sequence and explicit \r or \n escapes appear in the pattern.
</P>
<P>
In general, a pattern matches a certain portion of the subject, and in
addition, further substrings from the subject may be picked out by
addition, further substrings from the subject may be picked out by
parenthesized parts of the pattern. Following the usage in Jeffrey Friedl's
book, this is called "capturing" in what follows, and the phrase "capturing
subpattern" is used for a fragment of a pattern that picks out a substring.
@ -1964,11 +1964,11 @@ pattern.
</P>
<P>
The overall matched string and any captured substrings are returned to the
caller via a vector of PCRE2_SIZE values, called the <b>ovector</b>. This is
caller via a vector of PCRE2_SIZE values, called the <b>ovector</b>. This is
contained within the
<a href="#matchdatablock">match data block.</a>
You can obtain direct access to the ovector by calling
<b>pcre2_get_ovector_pointer()</b> to find its address, and
You can obtain direct access to the ovector by calling
<b>pcre2_get_ovector_pointer()</b> to find its address, and
<b>pcre2_get_ovector_count()</b> to find the number of pairs of values it
contains. Alternatively, you can use the auxiliary functions for accessing
captured substrings
@ -2044,26 +2044,26 @@ Other information about the match
<b>PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *<i>match_data</i>);</b>
</P>
<P>
In addition to the offsets in the ovector, other information about a match is
In addition to the offsets in the ovector, other information about a match is
retained in the match data block and can be retrieved by the above functions.
</P>
<P>
When a (*MARK) name is to be passed back, <b>pcre2_get_mark()</b> returns a
pointer to the zero-terminated name, which is within the compiled pattern.
Otherwise NULL is returned. A (*MARK) name may be available after a failed
pointer to the zero-terminated name, which is within the compiled pattern.
Otherwise NULL is returned. A (*MARK) name may be available after a failed
match or a partial match, as well as after a successful one.
</P>
<P>
The offset of the character at which the successful match started is
returned by <b>pcre2_get_startchar()</b>. This can be different to the value of
<i>ovector[0]</i> if the pattern contains the \K escape sequence. Note,
<i>ovector[0]</i> if the pattern contains the \K escape sequence. Note,
however, the \K has no effect for a partial match.
<a name="errorlist"></a></P>
<br><b>
Error return values from <b>pcre2_match()</b>
</b><br>
<P>
If <b>pcre2_match()</b> fails, it returns a negative number. This can be
If <b>pcre2_match()</b> fails, it returns a negative number. This can be
converted to a text string by calling <b>pcre2_get_error_message()</b>. Negative
error codes are also returned by other functions, and are documented with them.
The codes are given names in the header file. If UTF checking is in force and
@ -2205,7 +2205,7 @@ argument is a pointer to the match data block, the second is the group number,
and the third is a pointer to a variable into which the length is placed.
</P>
<P>
The <b>pcre2_substring_copy_bynumber()</b> function copies one string into a
The <b>pcre2_substring_copy_bynumber()</b> function copies one string into a
supplied buffer, whereas <b>pcre2_substring_get_bynumber()</b> copies it into
new memory, obtained using the same memory allocation function that was used
for the match data block. The first two arguments of these functions are a
@ -2220,10 +2220,10 @@ This is updated to contain the actual number of code units used, excluding the
terminating zero.
</P>
<P>
For <b>pcre2_substring_get_bynumber()</b> the third and fourth arguments point
to variables that are updated with a pointer to the new memory and the number
of code units that comprise the substring, again excluding the terminating
zero. When the substring is no longer needed, the memory should be freed by
For <b>pcre2_substring_get_bynumber()</b> the third and fourth arguments point
to variables that are updated with a pointer to the new memory and the number
of code units that comprise the substring, again excluding the terminating
zero. When the substring is no longer needed, the memory should be freed by
calling <b>pcre2_substring_free()</b>.
</P>
<P>
@ -2237,9 +2237,9 @@ attempt to get memory failed for <b>pcre2_substring_get_bynumber()</b>.
<pre>
PCRE2_ERROR_NOSUBSTRING
</pre>
No substring with the given number was captured. This could be because there is
no capturing group of that number in the pattern, or because the group with
that number did not participate in the match, or because the ovector was too
No substring with the given number was captured. This could be because there is
no capturing group of that number in the pattern, or because the group with
that number did not participate in the match, or because the ovector was too
small to capture that group.
</P>
<br><a name="SEC25" href="#TOC1">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a><br>
@ -2253,7 +2253,7 @@ small to capture that group.
<P>
The <b>pcre2_substring_list_get()</b> function extracts all available substrings
and builds a list of pointers to them, and a second list that contains their
lengths (in code units), excluding a terminating zero that is added to each of
lengths (in code units), excluding a terminating zero that is added to each of
them. All this is done in a single block of memory that is obtained using the
same memory allocation function that was used to get the match data block.
</P>
@ -2265,7 +2265,7 @@ NULL pointer. The address of the list of lengths is returned via
therefore need the lengths, you may supply NULL as the <b>lengthsptr</b>
argument to disable the creation of a list of lengths. The yield of the
function is zero if all went well, or PCRE2_ERROR_NOMEMORY if the memory block
could not be obtained. When the list is no longer needed, it should be freed by
could not be obtained. When the list is no longer needed, it should be freed by
calling <b>pcre2_substring_list_free()</b>.
</P>
<P>
@ -2312,7 +2312,7 @@ name.
<P>
Given the number, you can extract the substring directly, or use one of the
functions described in the previous section. For convenience, there are also
"byname" functions that correspond to the "bynumber" functions, the only
"byname" functions that correspond to the "bynumber" functions, the only
difference being that the second argument is a name instead of a number.
However, if PCRE2_DUPNAMES is set and there are duplicate names,
the behaviour may not be what you want (see the next section).
@ -2375,7 +2375,7 @@ numbers, and hence the captured data.
<P>
The traditional matching function uses a similar algorithm to Perl, which stops
when it finds the first match, starting at a given point in the subject. If you
want to find all possible matches, or the longest possible match at a given
want to find all possible matches, or the longest possible match at a given
position, consider using the alternative matching function (see below) instead.
If you cannot use the alternative function, you can kludge it up by making use
of the callout facility, which is described in the
@ -2566,8 +2566,8 @@ fail, this error is given.
</P>
<br><a name="SEC30" href="#TOC1">SEE ALSO</a><br>
<P>
<b>pcre2build</b>(3), <b>pcre2libs</b>(3), <b>pcre2callout</b>(3),
<b>pcre2matching</b>(3), <b>pcre2partial</b>(3), <b>pcre2posix</b>(3),
<b>pcre2build</b>(3), <b>pcre2libs</b>(3), <b>pcre2callout</b>(3),
<b>pcre2matching</b>(3), <b>pcre2partial</b>(3), <b>pcre2posix</b>(3),
<b>pcre2demo(3)</b>, <b>pcre2sample</b>(3), <b>pcre2stack</b>(3).
</P>
<br><a name="SEC31" href="#TOC1">AUTHOR</a><br>

View File

@ -88,11 +88,11 @@ single-byte characters, or UTF-8 strings. You can also build two other
libraries, called <b>libpcre2-16</b> and <b>libpcre2-32</b>, which process
strings that are contained in vectors of 16-bit and 32-bit code units,
respectively. These can be interpreted either as single-unit characters or
UTF-16/UTF-32 strings. To build these additional libraries, add one or both of
UTF-16/UTF-32 strings. To build these additional libraries, add one or both of
the following to the <b>configure</b> command:
<pre>
--enable-pcre16
--enable-pcre32
--enable-pcre32
</pre>
If you do not want the 8-bit library, add
<pre>
@ -358,7 +358,7 @@ override this value by specifying a run-time option.
If you add one of
<pre>
--enable-pcre2test-libreadline
--enable-pcre2test-libedit
--enable-pcre2test-libedit
</pre>
to the <b>configure</b> command, <b>pcre2test</b> is linked with the
<b>libreadline</b> or<b>libedit</b> library, respectively, and when its input is
@ -376,8 +376,8 @@ unmodified distribution version of readline is in use), some extra
configuration may be necessary. The INSTALL file for <b>libreadline</b> says
this:
<pre>
"Readline uses the termcap functions, but does not link with
the termcap or curses library itself, allowing applications
"Readline uses the termcap functions, but does not link with
the termcap or curses library itself, allowing applications
which link with readline the to choose an appropriate library."
</pre>
If your environment has not been set up so that an appropriate library is

View File

@ -25,7 +25,7 @@ pcre2sample documentation for a short discussion ("man pcre2sample" if you have
the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is
incompatible with the original PCRE API.
There are actually three libraries, each supporting a different code unit
There are actually three libraries, each supporting a different code unit
width. This demonstration program uses the 8-bit library.
In Unix-like environments, if PCRE2 is installed in your standard system
@ -56,8 +56,8 @@ the following line. */
/* #define PCRE2_STATIC */
/* This macro must be defined before including pcre2.h. For a program that uses
only one code unit width, it makes it possible to use generic function names
/* This macro must be defined before including pcre2.h. For a program that uses
only one code unit width, it makes it possible to use generic function names
such as pcre2_compile(). */
#define PCRE2_CODE_UNIT_WIDTH 8
@ -141,7 +141,7 @@ subject_length = strlen((char *)subject);
re = pcre2_compile(
pattern, /* the pattern */
PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
0, /* default options */
&amp;errornumber, /* for error number */
&amp;erroroffset, /* for error offset */
@ -151,9 +151,9 @@ re = pcre2_compile(
if (re == NULL)
{
PCRE2_UCHAR buffer[256];
PCRE2_UCHAR buffer[256];
pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset,
printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset,
buffer);
return 1;
}
@ -197,7 +197,7 @@ if (rc &lt; 0)
return 1;
}
/* Match succeded. Get a pointer to the output vector, where string offsets are
/* Match succeded. Get a pointer to the output vector, where string offsets are
stored. */
ovector = pcre2_get_ovector_pointer(match_data);
@ -210,7 +210,7 @@ printf("\nMatch succeeded at offset %d\n", (int)ovector[0]);
* captured. *
*************************************************************************/
/* The output vector wasn't big enough. This should not happen, because we used
/* The output vector wasn't big enough. This should not happen, because we used
pcre2_match_data_create_from_pattern() above. */
if (rc == 0)
@ -261,7 +261,7 @@ if (namecount &lt;= 0) printf("No named substrings\n"); else
&amp;name_entry_size); /* where to put the answer */
/* Now we can scan the table and, for each entry, print the number, the name,
and the substring itself. In the 8-bit library the number is held in two
and the substring itself. In the 8-bit library the number is held in two
bytes, most significant first. */
tabptr = name_table;
@ -306,7 +306,7 @@ if (namecount &lt;= 0) printf("No named substrings\n"); else
if (!find_all) /* Check for -g */
{
pcre2_match_data_free(match_data); /* Release the memory that was used */
pcre2_match_data_free(match_data); /* Release the memory that was used */
pcre2_code_free(re); /* for the match data and the pattern. */
return 0; /* Exit the program. */
}
@ -324,7 +324,7 @@ sequence. */
(void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &amp;newline);
crlf_is_newline = newline == PCRE2_NEWLINE_ANY ||
newline == PCRE2_NEWLINE_CRLF ||
newline == PCRE2_NEWLINE_ANYCRLF;
newline == PCRE2_NEWLINE_ANYCRLF;
/* Loop for second and subsequent matches */

View File

@ -71,10 +71,10 @@ performance, there is also a "fast path" API that is JIT-specific.
</P>
<br><a name="SEC3" href="#TOC1">SIMPLE USE OF JIT</a><br>
<P>
To make use of the JIT support in the simplest way, all you have to do is to
call <b>pcre2_jit_compile()</b> after successfully compiling a pattern with
<b>pcre2_compile()</b>. This function has two arguments: the first is the
compiled pattern pointer that was returned by <b>pcre2_compile()</b>, and the
To make use of the JIT support in the simplest way, all you have to do is to
call <b>pcre2_jit_compile()</b> after successfully compiling a pattern with
<b>pcre2_compile()</b>. This function has two arguments: the first is the
compiled pattern pointer that was returned by <b>pcre2_compile()</b>, and the
second is a set of option bits, which must include at least one of
PCRE2_JIT_COMPLETE, PCRE2_JIT_PARTIAL_HARD, or PCRE2_JIT_PARTIAL_SOFT.
</P>
@ -239,7 +239,7 @@ non-default JIT stacks might operate:
</pre>
All the functions described in this section do nothing if JIT is not available,
and <b>pcre2_jit_stack_assign()</b> does nothing unless the <b>code</b> argument
is non-NULL and points to a <b>pcre2_code</b> block that has been successfully
is non-NULL and points to a <b>pcre2_code</b> block that has been successfully
processed by <b>pcre2_jit_compile()</b>.
<a name="stackfaq"></a></P>
<br><a name="SEC7" href="#TOC1">JIT STACK FAQ</a><br>
@ -328,18 +328,18 @@ callback.
<pre>
int rc;
pcre2_code *re;
pcre2_match_data *match_data;
pcre2_match_data *match_data;
pcre2_jit_stack *jit_stack;
re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0,
re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0,
&errornumber, &erroffset, NULL);
/* Check for errors */
rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
/* Check for errors */
/* Check for errors */
jit_stack = pcre2_jit_stack_alloc(NULL, 32*1024, 512*1024);
/* Check for error (NULL) */
pcre2_jit_stack_assign(re, NULL, jit_stack);
match_data = pcre2_match_data_create(re, 10);
match_data = pcre2_match_data_create(re, 10);
rc = pcre2_match(re, subject, length, 0, 0, match_data, NULL);
/* Check results */
pcre2_free(re);

View File

@ -89,15 +89,15 @@ empty string at the end of the subject.
</P>
<P>
When a partial match is returned, the first two elements in the ovector point
to the portion of the subject that was matched. The appearance of \K in the
to the portion of the subject that was matched. The appearance of \K in the
pattern has no effect for a partial match. Consider this pattern:
<pre>
/abc\K123/
</pre>
If it is matched against "456abc123xyz" the result is a complete match, and the
ovector defines the matched string as "123", because \K resets the "start of
match" point. However, if a partial match is requested and the subject string
is "456abc12", a partial match is found for the string "abc12", because all
ovector defines the matched string as "123", because \K resets the "start of
match" point. However, if a partial match is requested and the subject string
is "456abc12", a partial match is found for the string "abc12", because all
these characters are needed for a subsequent re-match with additional
characters.
</P>
@ -343,14 +343,14 @@ same point as before.
For example, if the pattern "(?&#60;=123)abc" is partially matched against the
string "xx123ab", the ovector offsets are 5 and 7 ("ab"). The maximum
lookbehind count is 3, so all characters before offset 2 can be discarded. The
value of <b>startoffset</b> for the next match should be 3. When <b>pcre2test</b>
displays a partial match, it indicates the lookbehind characters with '&#60;'
value of <b>startoffset</b> for the next match should be 3. When <b>pcre2test</b>
displays a partial match, it indicates the lookbehind characters with '&#60;'
characters:
<pre>
re&#62; "(?&#60;=123)abc"
data&#62; xx123ab\=ph
Partial match: 123ab
&#60;&#60;&#60;
&#60;&#60;&#60;
</PRE>
</P>
<P>

View File

@ -145,7 +145,7 @@ Unicode newline sequence. The
<a href="pcre2api.html"><b>pcre2api</b></a>
page has
<a href="pcre2api.html#newlines">further discussion</a>
about newlines, and shows how to set the newline convention when calling
about newlines, and shows how to set the newline convention when calling
<b>pcre2_compile()</b>.
</P>
<P>
@ -218,7 +218,7 @@ corresponding characters in the subject. As a trivial example, the pattern
</pre>
matches a portion of a subject string that is identical to itself. When
caseless matching is specified (the PCRE2_CASELESS option), letters are matched
independently of case.
independently of case.
</P>
<P>
The power of regular expressions comes from the ability to include alternatives
@ -1191,8 +1191,8 @@ An opening square bracket introduces a character class, terminated by a closing
square bracket. A closing square bracket on its own is not special by default.
If a closing square bracket is required as a member of the class, it should be
the first data character in the class (after an initial circumflex, if present)
or escaped with a backslash. This means that, by default, an empty class cannot
be defined. However, if the PCRE2_ALLOW_EMPTY_CLASS option is set, a closing
or escaped with a backslash. This means that, by default, an empty class cannot
be defined. However, if the PCRE2_ALLOW_EMPTY_CLASS option is set, a closing
square bracket at the start does end the (empty) class.
</P>
<P>
@ -1216,7 +1216,7 @@ string.
When caseless matching is set, any letters in a class represent both their
upper case and lower case versions, so for example, a caseless [aeiou] matches
"A" as well as "a", and a caseless [^aeiou] does not match "A", whereas a
caseful version would.
caseful version would.
</P>
<P>
Characters that might indicate line breaks are never treated in any special way
@ -1341,7 +1341,7 @@ classes by other sequences, as follows:
[:alnum:] becomes \p{Xan}
[:alpha:] becomes \p{L}
[:blank:] becomes \h
[:cntrl:] becomes \p{Cc}
[:cntrl:] becomes \p{Cc}
[:digit:] becomes \p{Nd}
[:lower:] becomes \p{Ll}
[:space:] becomes \p{Xps}
@ -1490,7 +1490,7 @@ match "cataract", "erpillar" or an empty string.
<br>
2. It sets up the subpattern as a capturing subpattern. This means that, when
the whole pattern matches, the portion of the subject string that matched the
subpattern is passed back to the caller, separately from the portion that
subpattern is passed back to the caller, separately from the portion that
matched the whole pattern. (This applies only to the traditional matching
function; the DFA matching function does not support capturing.)
</P>
@ -1908,7 +1908,7 @@ at release 5.10.
PCRE2 has an optimization that automatically "possessifies" certain simple
pattern constructs. For example, the sequence A+B is treated as A++B because
there is no point in backtracking into a sequence of A's when B must follow.
This feature can be disabled by the PCRE2_NO_AUTOPOSSESS option, or starting
This feature can be disabled by the PCRE2_NO_AUTOPOSSESS option, or starting
the pattern with (*NO_AUTO_POSSESS).
</P>
<P>
@ -2216,7 +2216,7 @@ if the pattern is written as
<pre>
^.*+(?&#60;=abcd)
</pre>
there can be no backtracking for the .*+ item because of the possessive
there can be no backtracking for the .*+ item because of the possessive
quantifier; it can match only the entire string. The subsequent lookbehind
assertion does a single test on the last four characters. If it fails, the
match fails immediately. For long strings, this approach makes a significant
@ -2720,8 +2720,8 @@ same pair of parentheses when there is a repetition.
<P>
PCRE2 provides a similar feature, but of course it cannot obey arbitrary Perl
code. The feature is called "callout". The caller of PCRE2 provides an external
function by putting its entry point in a match context using the function
<b>pcre2_set_callout()</b> and passing the context to <b>pcre2_match()</b> or
function by putting its entry point in a match context using the function
<b>pcre2_set_callout()</b> and passing the context to <b>pcre2_match()</b> or
<b>pcre2_dfa_match()</b>. If no match context is passed, or if the callout entry
point is set to NULL, callouts are disabled.
</P>
@ -2961,7 +2961,7 @@ output from <b>pcre2test</b>:
re&#62; /(*COMMIT)abc/
data&#62; xyzabc
0: abc
data&#62;
data&#62;
re&#62; /(*COMMIT)abc/no_start_optimize
data&#62; xyzabc
No match
@ -2989,7 +2989,7 @@ as (*COMMIT).
<P>
The behaviour of (*PRUNE:NAME) is the not the same as (*MARK:NAME)(*PRUNE).
It is like (*MARK:NAME) in that the name is remembered for passing back to the
caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
ignoring those set by (*PRUNE) or (*THEN).
<pre>
(*SKIP)
@ -3041,7 +3041,7 @@ group. If (*THEN) is not inside an alternation, it acts like (*PRUNE).
<P>
The behaviour of (*THEN:NAME) is the not the same as (*MARK:NAME)(*THEN).
It is like (*MARK:NAME) in that the name is remembered for passing back to the
caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
ignoring those set by (*PRUNE) and (*THEN).
</P>
<P>

View File

@ -103,17 +103,17 @@ PCRE2 to use heap memory instead of stack for remembering back-up points when
of how to do this are given in the
<a href="pcre2build.html"><b>pcre2build</b></a>
documentation. When built in this way, instead of using the stack, PCRE2
gets memory for remembering backup points from the heap. By default, the memory
is obtained by calling the system <b>malloc()</b> function, but you can arrange
to supply your own memory management function. For details, see the section
entitled
gets memory for remembering backup points from the heap. By default, the memory
is obtained by calling the system <b>malloc()</b> function, but you can arrange
to supply your own memory management function. For details, see the section
entitled
<a href="pcre2api.html#matchcontext">"The match context"</a>
in the
<a href="pcre2api.html"><b>pcre2api</b></a>
documentation. Since the block sizes are always the same, it may be possible to
implement customized a memory handler that is more efficient than the standard
function. The memory blocks obtained for this purpose are retained and re-used
if possible while <b>pcre2_match()</b> is running. They are all freed just
function. The memory blocks obtained for this purpose are retained and re-used
if possible while <b>pcre2_match()</b> is running. They are all freed just
before it exits.
</P>
<br><b>

View File

@ -414,7 +414,7 @@ appear.
(*LIMIT_MATCH=d) set the match limit to d (decimal number)
(*LIMIT_RECURSION=d) set the recursion limit to d (decimal number)
(*NOTEMPTY) set PCRE2_NOTEMPTY when matching
(*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching
(*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching
(*NO_AUTO_POSSESS) no auto-possessification (PCRE2_NO_AUTO_POSSESS)
(*NO_START_OPT) no start-match optimization (PCRE2_NO_START_OPTIMIZE)
(*UTF) set appropriate UTF mode for the library in use

View File

@ -476,7 +476,7 @@ about the pattern:
/I info show info about compiled pattern
hex pattern is coded in hexadecimal
jit[=&#60;number&#62;] use JIT
jitverify verify JIT use
jitverify verify JIT use
locale=&#60;name&#62; use this locale
memory show memory used
newline=&#60;type&#62; set newline type
@ -565,7 +565,7 @@ number in the range 0 to 7:
7 all three modes
</pre>
If no number is given, 7 is assumed. If JIT compilation is successful, the
compiled JIT code will automatically be used when <b>pcre2_match()</b> is run
compiled JIT code will automatically be used when <b>pcre2_match()</b> is run
for the appropriate type of match, except when incompatible run-time options
are specified. For more details, see the
<a href="pcre2jit.html"><b>pcre2jit</b></a>
@ -710,7 +710,7 @@ for a description of their effects.
partial_hard (or ph) set PCRE2_PARTIAL_HARD
partial_soft (or ps) set PCRE2_PARTIAL_SOFT
</pre>
The partial matching modifiers are provided with abbreviations because they
The partial matching modifiers are provided with abbreviations because they
appear frequently in tests.
</P>
<P>
@ -892,8 +892,8 @@ until it finds the minimum values for each parameter that allow
<b>pcre2_match()</b> to complete without error.
</P>
<P>
If JIT is being used, only the match limit is relevant. If DFA matching is
being used, neither limit is relevant, and this modifier is ignored (with a
If JIT is being used, only the match limit is relevant. If DFA matching is
being used, neither limit is relevant, and this modifier is ignored (with a
warning message).
</P>
<P>
@ -939,10 +939,10 @@ appears, though of course it can also be used to set a default in a
available for storing matching information. The default is 15.
</P>
<P>
At least one pair of offsets is always created by
<b>pcre2_match_data_create()</b>, for matching with PCRE2's native API, so a
value of 0 is the same as 1. However a value of 0 is useful when testing the
POSIX API because it causes <b>regexec()</b> to be called with a NULL capture
At least one pair of offsets is always created by
<b>pcre2_match_data_create()</b>, for matching with PCRE2's native API, so a
value of 0 is the same as 1. However a value of 0 is useful when testing the
POSIX API because it causes <b>regexec()</b> to be called with a NULL capture
vector.
</P>
<br><a name="SEC12" href="#TOC1">THE ALTERNATIVE MATCHING FUNCTION</a><br>

View File

@ -67,7 +67,7 @@ In UTF modes, the dot metacharacter matches one UTF character instead of a
single code unit.
</P>
<P>
The escape sequence \C can be used to match a single code unit, in a UTF mode,
The escape sequence \C can be used to match a single code unit, in a UTF mode,
but its use can lead to some strange effects because it breaks up multi-unit
characters (see the description of \C in the
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
@ -114,8 +114,8 @@ VALIDITY OF UTF STRINGS
</b><br>
<P>
When the PCRE2_UTF option is set, the strings passed as patterns and subjects
are (by default) checked for validity on entry to the relevant functions.
If an invalid UTF string is passed, an error return is given.
are (by default) checked for validity on entry to the relevant functions.
If an invalid UTF string is passed, an error return is given.
</P>
<P>
UTF-16 and UTF-32 strings can indicate their endianness by special code knows

View File

@ -23,11 +23,11 @@ of Unicode in use can be discovered by running
.sp
pcre2test -C
.P
The three libraries contain identical sets of functions, with names ending in
_8, _16, or _32, respectively (for example, \fBpcre2_compile_8()\fP). However,
by defining PCRE2_CODE_UNIT_WIDTH to be 8, 16, or 32, a program that uses just
The three libraries contain identical sets of functions, with names ending in
_8, _16, or _32, respectively (for example, \fBpcre2_compile_8()\fP). However,
by defining PCRE2_CODE_UNIT_WIDTH to be 8, 16, or 32, a program that uses just
one code unit width can be written using generic names such as
\fBpcre2_compile()\fP, and the documentation is written assuming that this is
\fBpcre2_compile()\fP, and the documentation is written assuming that this is
the case.
.P
In addition to the Perl-compatible matching function, PCRE2 contains an

View File

@ -158,8 +158,8 @@ REVISION
Last updated: 28 September 2014
Copyright (c) 1997-2014 University of Cambridge.
------------------------------------------------------------------------------
PCRE2API(3) Library Functions Manual PCRE2API(3)
@ -2529,8 +2529,8 @@ REVISION
Last updated: 16 October 2014
Copyright (c) 1997-2014 University of Cambridge.
------------------------------------------------------------------------------
PCRE2BUILD(3) Library Functions Manual PCRE2BUILD(3)
@ -2981,8 +2981,8 @@ REVISION
Last updated: 28 September 2014
Copyright (c) 1997-2014 University of Cambridge.
------------------------------------------------------------------------------
PCRE2CALLOUT(3) Library Functions Manual PCRE2CALLOUT(3)
@ -3217,8 +3217,8 @@ REVISION
Last updated: 19 October 2014
Copyright (c) 1997-2014 University of Cambridge.
------------------------------------------------------------------------------
PCRE2COMPAT(3) Library Functions Manual PCRE2COMPAT(3)
@ -3403,8 +3403,8 @@ REVISION
Last updated: 28 September 2014
Copyright (c) 1997-2014 University of Cambridge.
------------------------------------------------------------------------------
PCRE2JIT(3) Library Functions Manual PCRE2JIT(3)
@ -3758,8 +3758,8 @@ REVISION
Last updated: 29 September 2014
Copyright (c) 1997-2014 University of Cambridge.
------------------------------------------------------------------------------
PCRE2LIMITS(3) Library Functions Manual PCRE2LIMITS(3)
@ -3826,8 +3826,8 @@ REVISION
Last updated: 29 September 2014
Copyright (c) 1997-2014 University of Cambridge.
------------------------------------------------------------------------------
PCRE2MATCHING(3) Library Functions Manual PCRE2MATCHING(3)
@ -4045,8 +4045,8 @@ REVISION
Last updated: 29 September 2014
Copyright (c) 1997-2014 University of Cambridge.
------------------------------------------------------------------------------
PCRE2PARTIAL(3) Library Functions Manual PCRE2PARTIAL(3)
@ -4485,8 +4485,8 @@ REVISION
Last updated: 14 October 2014
Copyright (c) 1997-2014 University of Cambridge.
------------------------------------------------------------------------------
PCRE2UNICODE(3) Library Functions Manual PCRE2UNICODE(3)
@ -4711,5 +4711,5 @@ REVISION
Last updated: 16 September 2014
Copyright (c) 1997-2014 University of Cambridge.
------------------------------------------------------------------------------

View File

@ -250,7 +250,7 @@ to be included in an environment where the value of PCRE2_CODE_UNIT_WIDTH is
unknown should also use the real function names. (Unfortunately, it is not
possible in C code to save and restore the value of a macro.)
.P
If PCRE2_CODE_UNIT_WIDTH is not defined before including \fBpcre2.h\fP, a
If PCRE2_CODE_UNIT_WIDTH is not defined before including \fBpcre2.h\fP, a
compiler error occurs.
.P
When using multiple libraries in an application, you must take care when
@ -392,7 +392,7 @@ section on \fBpcre2_match()\fP options
below.
.P
The choice of newline convention does not affect the interpretation of
the \en or \er escape sequences, nor does it affect what \eR matches, which has
the \en or \er escape sequences, nor does it affect what \eR matches, which has
its own separate control.
.
.
@ -509,7 +509,7 @@ The memory used for a general context should be freed by calling:
.SS "The compile context"
.rs
.sp
A compile context is required if you want to change the default values of any
A compile context is required if you want to change the default values of any
of the following compile-time parameters:
.sp
What \eR matches (Unicode newlines or CR, LF, CRLF only);
@ -518,7 +518,7 @@ of the following compile-time parameters:
The compile time nested parentheses limit;
An external function for stack checking.
.sp
A compile context is also required if you are using custom memory management.
A compile context is also required if you are using custom memory management.
If none of these apply, just pass NULL as the context argument of
\fIpcre2_compile()\fP.
.P
@ -534,8 +534,8 @@ A compile context is created, copied, and freed by the following functions:
.B void pcre2_compile_context_free(pcre2_compile_context *\fIccontext\fP);
.fi
.sp
A compile context is created with default values for its parameters. These can
be changed by calling the following functions, which return 0 on success, or
A compile context is created with default values for its parameters. These can
be changed by calling the following functions, which return 0 on success, or
PCRE2_ERROR_BADDATA if invalid data is detected.
.sp
.nf
@ -543,11 +543,11 @@ PCRE2_ERROR_BADDATA if invalid data is detected.
.B " uint32_t \fIvalue\fP);"
.fi
.sp
The value must be PCRE2_BSR_ANYCRLF, to specify that \eR matches only CR, LF,
or CRLF, or PCRE2_BSR_UNICODE, to specify that \eR matches any Unicode line
ending sequence. The value of this parameter does not affect what is compiled;
The value must be PCRE2_BSR_ANYCRLF, to specify that \eR matches only CR, LF,
or CRLF, or PCRE2_BSR_UNICODE, to specify that \eR matches any Unicode line
ending sequence. The value of this parameter does not affect what is compiled;
it is just saved with the compiled pattern. The value is used by the JIT
compiler and by the two interpreted matching functions, \fIpcre2_match()\fP and
compiler and by the two interpreted matching functions, \fIpcre2_match()\fP and
\fIpcre2_dfa_match()\fP.
.sp
.nf
@ -555,7 +555,7 @@ compiler and by the two interpreted matching functions, \fIpcre2_match()\fP and
.B " const unsigned char *\fItables\fP);"
.fi
.sp
The value must be the result of a call to \fIpcre2_maketables()\fP, whose only
The value must be the result of a call to \fIpcre2_maketables()\fP, whose only
argument is a general context. This function builds a set of character tables
in the current locale.
.sp
@ -564,9 +564,9 @@ in the current locale.
.B " uint32_t \fIvalue\fP);"
.fi
.sp
This specifies which characters or character sequences are to be recognized as
This specifies which characters or character sequences are to be recognized as
newlines. The value must be one of PCRE2_NEWLINE_CR (carriage return only),
PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the two-character
PCRE2_NEWLINE_LF (linefeed only), PCRE2_NEWLINE_CRLF (the two-character
sequence CR followed by LF), PCRE2_NEWLINE_ANYCRLF (any of the above), or
PCRE2_NEWLINE_ANY (any Unicode newline sequence).
.P
@ -591,7 +591,7 @@ using up too much system stack when being compiled.
.fi
.sp
There is at least one application that runs PCRE2 in threads with very limited
system stack, where running out of stack is to be avoided at all costs. The
system stack, where running out of stack is to be avoided at all costs. The
parenthesis limit above cannot take account of how much stack is actually
available. For a finer control, you can supply a function that is called
whenever \fBpcre2_compile()\fP starts to compile a parenthesized part of a
@ -603,20 +603,20 @@ function should return zero if all is well, or non-zero to force an error.
.SS "The match context"
.rs
.sp
A match context is required if you want to change the default values of any
A match context is required if you want to change the default values of any
of the following match-time parameters:
.sp
What \eR matches (Unicode newlines or CR, LF, CRLF only);
A callout function;
The limit for calling \fImatch()\fP;
The limit for calling \fImatch()\fP;
The limit for calling \fImatch()\fP recursively;
The newline character sequence;
.sp
A match context is also required if you are using custom memory management.
If none of these apply, just pass NULL as the context argument of
A match context is also required if you are using custom memory management.
If none of these apply, just pass NULL as the context argument of
\fBpcre2_match()\fP, \fBpcre2_dfa_match()\fP, or \fBpcre2_jit_match()\fP.
Changing the newline value or what \eR matches at match time disables the use
of JIT via \fBpcre2_match()\fP.
Changing the newline value or what \eR matches at match time disables the use
of JIT via \fBpcre2_match()\fP.
.P
A match context is created, copied, and freed by the following functions:
.sp
@ -630,8 +630,8 @@ A match context is created, copied, and freed by the following functions:
.B void pcre2_match_context_free(pcre2_match_context *\fImcontext\fP);
.fi
.sp
A match context is created with default values for its parameters. These can
be changed by calling the following functions, which return 0 on success, or
A match context is created with default values for its parameters. These can
be changed by calling the following functions, which return 0 on success, or
PCRE2_ERROR_BADDATA if invalid data is detected.
.sp
.nf
@ -662,7 +662,7 @@ calls repeatedly (sometimes recursively). The limit set by \fImatch_limit\fP is
imposed on the number of times this function is called during a match, which
has the effect of limiting the amount of backtracking that can take place. For
patterns that are not anchored, the count restarts from zero for each position
in the subject string. This limit is not relevant to \fBpcre2_dfa_match()\fP,
in the subject string. This limit is not relevant to \fBpcre2_dfa_match()\fP,
which ignores it.
.P
When \fBpcre2_match()\fP is called with a pattern that was successfully studied
@ -698,7 +698,7 @@ This limit is of use only if it is set smaller than \fImatch_limit\fP.
Limiting the recursion depth limits the amount of system stack that can be
used, or, when PCRE2 has been compiled to use memory on the heap instead of the
stack, the amount of heap memory that can be used. This limit is not relevant,
and is ignored, when matching is done using JIT compiled code or by the
and is ignored, when matching is done using JIT compiled code or by the
\fBpcre2_dfa_match()\fP function.
.P
The default value for \fIrecursion_limit\fP can be set when PCRE2 is built; the
@ -720,9 +720,9 @@ limit is set, less than the default.
.B " void (*\fIprivate_free\fP)(void *, void *), void *\fImemory_data\fP);"
.fi
.sp
This function sets up two additional custom memory management functions for use
This function sets up two additional custom memory management functions for use
by \fBpcre2_match()\fP when PCRE2 is compiled to use the heap for remembering
backtracking data, instead of recursive function calls that use the system
backtracking data, instead of recursive function calls that use the system
stack. There is a discussion about PCRE2's stack usage in the
.\" HREF
\fBpcre2stack\fP
@ -738,7 +738,7 @@ limited stacks. Because of the greater use of memory management,
general custom memory functions are provided so that special-purpose external
code can be used for this case, because the memory blocks are all the same
size. The blocks are retained by \fBpcre2_match()\fP until it is about to exit
so that they can be re-used when possible during the match. In the absence of
so that they can be re-used when possible during the match. In the absence of
these functions, the normal custom memory management functions are used, if
supplied, otherwise the system functions.
.
@ -760,7 +760,7 @@ required. The second argument is a pointer to memory into which the information
is placed. If NULL is passed, the function returns the amount of memory that is
needed for the requested information. For calls that return numerical values,
the value is in bytes; when requesting these values, \fIwhere\fP should point
to appropriately aligned memory. For calls that return strings, the required
to appropriately aligned memory. For calls that return strings, the required
length is given in code units, not counting the terminating zero.
.P
When requesting information, the returned value from \fBpcre2_config()\fP is
@ -783,7 +783,7 @@ compiling is available; otherwise it is set to zero.
PCRE2_CONFIG_JITTARGET
.sp
The \fIwhere\fP argument should point to a buffer that is at least 48 code
units long. (The exact length needed can be found by calling
units long. (The exact length needed can be found by calling
\fBpcre2_config()\fP with \fBwhere\fP set to NULL.) The buffer is filled with a
string that contains the name of the architecture for which the JIT compiler is
configured, for example "x86 32bit (little endian + unaligned)". If JIT support
@ -794,9 +794,9 @@ the string, in code units, is returned.
.sp
The output is an integer that contains the number of bytes used for internal
linkage in compiled regular expressions. When PCRE2 is configured, the value
can be set to 2, 3, or 4, with the default being 2. This is the value that is
returned by \fBpcre2_config()\fP. However, when the 16-bit library is compiled,
a value of 3 is rounded up to 4, and when the 32-bit library is compiled,
can be set to 2, 3, or 4, with the default being 2. This is the value that is
returned by \fBpcre2_config()\fP. However, when the 16-bit library is compiled,
a value of 3 is rounded up to 4, and when the 32-bit library is compiled,
internal linkages always use 4 bytes, so the configured value is not relevant.
.P
The default value of 2 for the 8-bit and 16-bit libraries is sufficient for all
@ -820,7 +820,7 @@ that is recognized as meaning "newline". The values are:
3 Carriage return, linefeed (CRLF)
4 Any Unicode line ending
5 Any of CR, LF, or CRLF
.sp
.sp
The default should normally correspond to the standard sequence for your
operating system.
.sp
@ -849,7 +849,7 @@ compiled. The output is zero if PCRE2 was compiled to use blocks of data on the
heap instead of recursive function calls.
.sp
PCRE2_CONFIG_UNICODE_VERSION
.sp
.sp
The \fIwhere\fP argument should point to a buffer that is at least 24 code
units long. (The exact length needed can be found by calling
\fBpcre2_config()\fP with \fBwhere\fP set to NULL.) If PCRE2 has been compiled
@ -884,15 +884,15 @@ units) is returned.
.B pcre2_code_free(pcre2_code *\fIcode\fP);
.fi
.P
This function compiles a pattern, defined by a pointer to a string of code
units and a length, into an internal form. If the pattern is zero-terminated,
the length should be specified as PCRE2_ZERO_TERMINATED. The function returns a
pointer to a block of memory that contains the compiled pattern and related
data. The caller must free the memory by calling \fBpcre2_code_free()\fP when
This function compiles a pattern, defined by a pointer to a string of code
units and a length, into an internal form. If the pattern is zero-terminated,
the length should be specified as PCRE2_ZERO_TERMINATED. The function returns a
pointer to a block of memory that contains the compiled pattern and related
data. The caller must free the memory by calling \fBpcre2_code_free()\fP when
it is no longer needed.
.P
If the compile context argument \fIccontext\fP is NULL, the memory is obtained
by calling \fBmalloc()\fP. Otherwise, it is obtained from the same memory
If the compile context argument \fIccontext\fP is NULL, the memory is obtained
by calling \fBmalloc()\fP. Otherwise, it is obtained from the same memory
function that was used for the compile context.
.P
The \fIoptions\fP argument contains various bit settings that affect the
@ -903,14 +903,14 @@ within the pattern (see the detailed description in the
.\" HREF
\fBpcre2pattern\fP
.\"
documentation).
documentation).
.P
For those options that can be different in different parts of the pattern, the
contents of the \fIoptions\fP argument specifies their settings at the start of
compilation. The PCRE2_ANCHORED and PCRE2_NO_UTF_CHECK options can be set at
the time of matching as well as at compile time.
.P
Other, less frequently required compile-time parameters (for example, the
Other, less frequently required compile-time parameters (for example, the
newline setting) can be provided in a compile context (as described
.\" HTML <a href="#compilecontext">
.\" </a>
@ -936,10 +936,10 @@ This code fragment shows a typical straightforward call to
.sp
pcre2_code *re;
PCRE2_SIZE erroffset;
int errorcode;
int errorcode;
re = pcre2_compile(
"^A.*Z", /* the pattern */
PCRE2_ZERO_TERMINATED, /* the pattern is zero-terminated */
PCRE2_ZERO_TERMINATED, /* the pattern is zero-terminated */
0, /* default options */
&errorcode, /* for error code */
&erroffset, /* for error offset */
@ -958,14 +958,14 @@ Perl.
.sp
PCRE2_ALLOW_EMPTY_CLASS
.sp
By default, for compatibility with Perl, a closing square bracket that
immediately follows an opening one is treated as a data character for the
class. When PCRE2_ALLOW_EMPTY_CLASS is set, it terminates the class, which
therefore contains no characters and so can never match.
By default, for compatibility with Perl, a closing square bracket that
immediately follows an opening one is treated as a data character for the
class. When PCRE2_ALLOW_EMPTY_CLASS is set, it terminates the class, which
therefore contains no characters and so can never match.
.sp
PCRE2_ALT_BSUX
.sp
This option request alternative handling of three escape sequences, which
This option request alternative handling of three escape sequences, which
makes PCRE2's behaviour more like ECMAscript (aka JavaScript). When it is set:
.P
(1) \eU matches an upper case "U" character; by default \eU causes a compile
@ -996,7 +996,7 @@ documentation.
.sp
If this bit is set, letters in the pattern match both upper and lower case
letters in the subject. It is equivalent to Perl's /i option, and it can be
changed within a pattern by a (?i) option setting.
changed within a pattern by a (?i) option setting.
.sp
PCRE2_DOLLAR_ENDONLY
.sp
@ -1052,7 +1052,7 @@ sequence at the start of the pattern, as described in the section entitled
.\" </a>
"Newline conventions"
.\"
in the \fBpcre2pattern\fP documentation. A default is defined when PCRE2 is
in the \fBpcre2pattern\fP documentation. A default is defined when PCRE2 is
built.
.sp
PCRE2_FIRSTLINE
@ -1067,7 +1067,7 @@ If this option is set, a back reference to an unset subpattern group matches an
empty string (by default this causes the current matching alternative to fail).
A pattern such as (\e1)(a) succeeds when this option is set (assuming it can
find an "a" in the subject), whereas it fails by default, for Perl
compatibility. Setting this option makes PCRE2 behave more like ECMAscript (aka
compatibility. Setting this option makes PCRE2 behave more like ECMAscript (aka
JavaScript).
.sp
PCRE2_MULTILINE
@ -1091,10 +1091,10 @@ occurrences of ^ or $ in a pattern, setting PCRE2_MULTILINE has no effect.
PCRE2_NEVER_UCP
.sp
This option locks out the use of Unicode properties for handling \eB, \eb, \eD,
\ed, \eS, \es, \eW, \ew, and some of the POSIX character classes, as described
for the PCRE2_UCP option below. In particular, it prevents the creator of the
pattern from enabling this facility by starting the pattern with (*UCP). This
may be useful in applications that process patterns from external sources. The
\ed, \eS, \es, \eW, \ew, and some of the POSIX character classes, as described
for the PCRE2_UCP option below. In particular, it prevents the creator of the
pattern from enabling this facility by starting the pattern with (*UCP). This
may be useful in applications that process patterns from external sources. The
option combination PCRE_UCP and PCRE_NEVER_UCP causes an error.
.sp
PCRE2_NEVER_UTF
@ -1167,7 +1167,7 @@ pattern
(*MARK:A)(X|Y)
.sp
The minimum length for a match is one character. If the subject is "ABC", there
will be attempts to match "ABC", "BC", and "C". An attempt to match an empty
will be attempts to match "ABC", "BC", and "C". An attempt to match an empty
string at the end of the subject does not take place, because PCRE2 knows that
the subject is now too short, and so the (*MARK) is never encountered. In this
case, the optimization does not affect the overall match result, which is still
@ -1194,7 +1194,7 @@ in the
.\" HREF
\fBpcre2unicode\fP
.\"
document.
document.
If an invalid UTF sequence is found, \fBpcre2_compile()\fP returns a negative
error code.
.P
@ -1385,9 +1385,9 @@ The possible values for the second argument are defined in \fBpcre2.h\fP, and
are as follows:
.sp
PCRE2_INFO_ALLOPTIONS
PCRE2_INFO_ARGOPTIONS
PCRE2_INFO_ARGOPTIONS
.sp
Return a copy of the pattern's options. The third argument should point to a
Return a copy of the pattern's options. The third argument should point to a
\fBuint32_t\fP variable. PCRE2_INFO_ARGOPTIONS returns exactly the options that
were passed to \fBpcre2_compile()\fP, whereas PCRE2_INFO_ALLOPTIONS returns
the compile options as modified by any top-level option settings at the start
@ -1406,7 +1406,7 @@ alternatives begin with one of the following:
.* if PCRE2_DOTALL is set and there are no back
references to the subpattern in which .* appears
.sp
For such patterns, the PCRE2_ANCHORED bit is set in the options returned for
For such patterns, the PCRE2_ANCHORED bit is set in the options returned for
PCRE2_INFO_ALLOPTIONS.
.sp
PCRE2_INFO_BACKREFMAX
@ -1490,7 +1490,7 @@ return zero. The third argument should point to a \fBsize_t\fP variable.
.sp
Returns 1 if there is a rightmost literal code unit that must exist in any
matched string, other than at its start. The third argument should point to an
\fBuint32_t\fP variable. If there is no such value, 0 is returned. When 1 is
\fBuint32_t\fP variable. If there is no such value, 0 is returned. When 1 is
returned, the code unit value itself can be retrieved using
PCRE2_INFO_LASTCODEUNIT.
.P
@ -1617,7 +1617,7 @@ values are:
3 Carriage return, linefeed (CRLF)
4 Any Unicode line ending
5 Any of CR, LF, or CRLF
.sp
.sp
The default can be overridden when a pattern is matched.
.sp
PCRE2_INFO_RECURSIONLIMIT
@ -1652,11 +1652,11 @@ pattern with the JIT compiler does not alter the value returned by this option.
.B void pcre2_match_data_free(pcre2_match_data *\fImatch_data\fP);
.fi
.P
Information about successful and unsuccessful matches is placed in a match
Information about successful and unsuccessful matches is placed in a match
data block, which is an opaque structure that is accessed by function calls. In
particular, the match data block contains a vector of offsets into the subject
string that define the matched part of the subject and any substrings that were
capured. This is know as the \fIovector\fP.
capured. This is know as the \fIovector\fP.
.P
Before calling \fBpcre2_match()\fP or \fBpcre2_dfa_match()\fP you must create a
match data block by calling one of the creation functions above. For
@ -1669,11 +1669,11 @@ pair is imposed by \fBpcre2_match_data_create()\fP, so it is always possible to
return the overall matched string.
.P
For \fBpcre2_match_data_create_from_pattern()\fP, the first argument is a
pointer to a compiled pattern. In this case the ovector is created to be
pointer to a compiled pattern. In this case the ovector is created to be
exactly the right size to hold all the substrings a pattern might capture.
.P
The second argument of both these functions ia a pointer to a general context,
which can specify custom memory management for obtaining the memory for the
The second argument of both these functions ia a pointer to a general context,
which can specify custom memory management for obtaining the memory for the
match data block. If you are not using custom memory management, pass NULL.
.P
A match data block can be used many times, with the same or different compiled
@ -1729,8 +1729,8 @@ Here is an example of a simple call to \fBpcre2_match()\fP:
match_data, /* the match data block */
NULL); /* a match context; NULL means use defaults */
.sp
If the subject string is zero-terminated, the length can be given as
PCRE2_ZERO_TERMINATED. A match context must be provided if certain less common
If the subject string is zero-terminated, the length can be given as
PCRE2_ZERO_TERMINATED. A match context must be provided if certain less common
matching parameters are to be changed. For details, see the section on
.\" HTML <a href="#matchcontext">
.\" </a>
@ -1746,7 +1746,7 @@ The subject string is passed to \fBpcre2_match()\fP as a pointer in
\fIsubject\fP, a length in \fIlength\fP, and a starting offset in
\fIstartoffset\fP. The length and offset are in code units, not characters.
That is, they are in bytes for the 8-bit library, 16-bit code units for the
16-bit library, and 32-bit code units for the 32-bit library, whether or not
16-bit library, and 32-bit code units for the 32-bit library, whether or not
UTF processing is enabled.
.P
If \fIstartoffset\fP is greater than the length of the subject,
@ -1755,7 +1755,7 @@ zero, the search for a match starts at the beginning of the subject, and this
is by far the most common case. In UTF-8 or UTF-16 mode, the starting offset
must point to the start of a character, or to the end of the subject (in UTF-32
mode, one code unit equals one character, so all offsets are valid). Like the
pattern string, the subject may contain binary zeroes.
pattern string, the subject may contain binary zeroes.
.P
A non-zero starting offset is useful when searching for another match in the
same subject by calling \fBpcre2_match()\fP again after a previous success.
@ -1816,7 +1816,7 @@ JIT matching is disabled and the normal interpretive code in
The PCRE2_ANCHORED option limits \fBpcre2_match()\fP to matching at the first
matching position. If a pattern was compiled with PCRE2_ANCHORED, or turned out
to be anchored by virtue of its contents, it cannot be made unachored at
matching time. Note that setting the option at match time disables JIT
matching time. Note that setting the option at match time disables JIT
matching.
.sp
PCRE2_NOTBOL
@ -1880,13 +1880,13 @@ in the
.\" HREF
\fBpcre2unicode\fP
.\"
page.
page.
.P
If you know that your subject is valid, and you want to skip these checks for
performance reasons, you can set the PCRE2_NO_UTF_CHECK option when calling
\fBpcre2_match()\fP. You might want to do this for the second and subsequent
calls to \fBpcre2_match()\fP if you are making repeated calls to find all the
matches in a single subject string.
matches in a single subject string.
.P
NOTE: When PCRE2_NO_UTF_CHECK is set, the effect of passing an invalid string
as a subject, or an invalid value of \fIstartoffset\fP, is undefined. Your
@ -1921,10 +1921,10 @@ documentation.
.
.SH "NEWLINE HANDLING WHEN MATCHING"
.rs
.sp
When PCRE2 is built, a default newline convention is set; this is usually the
standard convention for the operating system. The default can be overridden in
either a
.sp
When PCRE2 is built, a default newline convention is set; this is usually the
standard convention for the operating system. The default can be overridden in
either a
.\" HTML <a href="#compilecontext">
.\" </a>
compile context
@ -1972,7 +1972,7 @@ valid newline sequence and explicit \er or \en escapes appear in the pattern.
.fi
.P
In general, a pattern matches a certain portion of the subject, and in
addition, further substrings from the subject may be picked out by
addition, further substrings from the subject may be picked out by
parenthesized parts of the pattern. Following the usage in Jeffrey Friedl's
book, this is called "capturing" in what follows, and the phrase "capturing
subpattern" is used for a fragment of a pattern that picks out a substring.
@ -1982,14 +1982,14 @@ used to find out how many capturing subpatterns there are in a compiled
pattern.
.P
The overall matched string and any captured substrings are returned to the
caller via a vector of PCRE2_SIZE values, called the \fBovector\fP. This is
caller via a vector of PCRE2_SIZE values, called the \fBovector\fP. This is
contained within the
.\" HTML <a href="#matchdatablock">
.\" </a>
match data block.
.\"
You can obtain direct access to the ovector by calling
\fBpcre2_get_ovector_pointer()\fP to find its address, and
You can obtain direct access to the ovector by calling
\fBpcre2_get_ovector_pointer()\fP to find its address, and
\fBpcre2_get_ovector_count()\fP to find the number of pairs of values it
contains. Alternatively, you can use the auxiliary functions for accessing
captured substrings
@ -2065,17 +2065,17 @@ had.
.B PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *\fImatch_data\fP);
.fi
.P
In addition to the offsets in the ovector, other information about a match is
In addition to the offsets in the ovector, other information about a match is
retained in the match data block and can be retrieved by the above functions.
.P
When a (*MARK) name is to be passed back, \fBpcre2_get_mark()\fP returns a
pointer to the zero-terminated name, which is within the compiled pattern.
Otherwise NULL is returned. A (*MARK) name may be available after a failed
pointer to the zero-terminated name, which is within the compiled pattern.
Otherwise NULL is returned. A (*MARK) name may be available after a failed
match or a partial match, as well as after a successful one.
.P
The offset of the character at which the successful match started is
returned by \fBpcre2_get_startchar()\fP. This can be different to the value of
\fIovector[0]\fP if the pattern contains the \eK escape sequence. Note,
\fIovector[0]\fP if the pattern contains the \eK escape sequence. Note,
however, the \eK has no effect for a partial match.
.
.
@ -2083,7 +2083,7 @@ however, the \eK has no effect for a partial match.
.SS "Error return values from \fBpcre2_match()\fP"
.rs
.sp
If \fBpcre2_match()\fP fails, it returns a negative number. This can be
If \fBpcre2_match()\fP fails, it returns a negative number. This can be
converted to a text string by calling \fBpcre2_get_error_message()\fP. Negative
error codes are also returned by other functions, and are documented with them.
The codes are given names in the header file. If UTF checking is in force and
@ -2237,7 +2237,7 @@ extracting it by calling \fBpcre2_substring_length_bynumber()\fP. The first
argument is a pointer to the match data block, the second is the group number,
and the third is a pointer to a variable into which the length is placed.
.P
The \fBpcre2_substring_copy_bynumber()\fP function copies one string into a
The \fBpcre2_substring_copy_bynumber()\fP function copies one string into a
supplied buffer, whereas \fBpcre2_substring_get_bynumber()\fP copies it into
new memory, obtained using the same memory allocation function that was used
for the match data block. The first two arguments of these functions are a
@ -2250,10 +2250,10 @@ the buffer and a pointer to a variable that contains its length in code units.
This is updated to contain the actual number of code units used, excluding the
terminating zero.
.P
For \fBpcre2_substring_get_bynumber()\fP the third and fourth arguments point
to variables that are updated with a pointer to the new memory and the number
of code units that comprise the substring, again excluding the terminating
zero. When the substring is no longer needed, the memory should be freed by
For \fBpcre2_substring_get_bynumber()\fP the third and fourth arguments point
to variables that are updated with a pointer to the new memory and the number
of code units that comprise the substring, again excluding the terminating
zero. When the substring is no longer needed, the memory should be freed by
calling \fBpcre2_substring_free()\fP.
.P
The return value from these functions is zero for success, or one of these
@ -2266,9 +2266,9 @@ attempt to get memory failed for \fBpcre2_substring_get_bynumber()\fP.
.sp
PCRE2_ERROR_NOSUBSTRING
.sp
No substring with the given number was captured. This could be because there is
no capturing group of that number in the pattern, or because the group with
that number did not participate in the match, or because the ovector was too
No substring with the given number was captured. This could be because there is
no capturing group of that number in the pattern, or because the group with
that number did not participate in the match, or because the ovector was too
small to capture that group.
.
.
@ -2284,7 +2284,7 @@ small to capture that group.
.P
The \fBpcre2_substring_list_get()\fP function extracts all available substrings
and builds a list of pointers to them, and a second list that contains their
lengths (in code units), excluding a terminating zero that is added to each of
lengths (in code units), excluding a terminating zero that is added to each of
them. All this is done in a single block of memory that is obtained using the
same memory allocation function that was used to get the match data block.
.P
@ -2295,7 +2295,7 @@ NULL pointer. The address of the list of lengths is returned via
therefore need the lengths, you may supply NULL as the \fBlengthsptr\fP
argument to disable the creation of a list of lengths. The yield of the
function is zero if all went well, or PCRE2_ERROR_NOMEMORY if the memory block
could not be obtained. When the list is no longer needed, it should be freed by
could not be obtained. When the list is no longer needed, it should be freed by
calling \fBpcre2_substring_list_free()\fP.
.P
If this function encounters a substring that is unset, which can happen when
@ -2340,7 +2340,7 @@ name.
.P
Given the number, you can extract the substring directly, or use one of the
functions described in the previous section. For convenience, there are also
"byname" functions that correspond to the "bynumber" functions, the only
"byname" functions that correspond to the "bynumber" functions, the only
difference being that the second argument is a name instead of a number.
However, if PCRE2_DUPNAMES is set and there are duplicate names,
the behaviour may not be what you want (see the next section).
@ -2413,7 +2413,7 @@ numbers, and hence the captured data.
.sp
The traditional matching function uses a similar algorithm to Perl, which stops
when it finds the first match, starting at a given point in the subject. If you
want to find all possible matches, or the longest possible match at a given
want to find all possible matches, or the longest possible match at a given
position, consider using the alternative matching function (see below) instead.
If you cannot use the alternative function, you can kludge it up by making use
of the callout facility, which is described in the
@ -2614,8 +2614,8 @@ fail, this error is given.
.SH "SEE ALSO"
.rs
.sp
\fBpcre2build\fP(3), \fBpcre2libs\fP(3), \fBpcre2callout\fP(3),
\fBpcre2matching\fP(3), \fBpcre2partial\fP(3), \fBpcre2posix\fP(3),
\fBpcre2build\fP(3), \fBpcre2libs\fP(3), \fBpcre2callout\fP(3),
\fBpcre2matching\fP(3), \fBpcre2partial\fP(3), \fBpcre2posix\fP(3),
\fBpcre2demo(3)\fP, \fBpcre2sample\fP(3), \fBpcre2stack\fP(3).
.
.

View File

@ -71,11 +71,11 @@ single-byte characters, or UTF-8 strings. You can also build two other
libraries, called \fBlibpcre2-16\fP and \fBlibpcre2-32\fP, which process
strings that are contained in vectors of 16-bit and 32-bit code units,
respectively. These can be interpreted either as single-unit characters or
UTF-16/UTF-32 strings. To build these additional libraries, add one or both of
UTF-16/UTF-32 strings. To build these additional libraries, add one or both of
the following to the \fBconfigure\fP command:
.sp
--enable-pcre16
--enable-pcre32
--enable-pcre32
.sp
If you do not want the 8-bit library, add
.sp
@ -367,7 +367,7 @@ override this value by specifying a run-time option.
If you add one of
.sp
--enable-pcre2test-libreadline
--enable-pcre2test-libedit
--enable-pcre2test-libedit
.sp
to the \fBconfigure\fP command, \fBpcre2test\fP is linked with the
\fBlibreadline\fP or\fBlibedit\fP library, respectively, and when its input is
@ -384,8 +384,8 @@ unmodified distribution version of readline is in use), some extra
configuration may be necessary. The INSTALL file for \fBlibreadline\fP says
this:
.sp
"Readline uses the termcap functions, but does not link with
the termcap or curses library itself, allowing applications
"Readline uses the termcap functions, but does not link with
the termcap or curses library itself, allowing applications
which link with readline the to choose an appropriate library."
.sp
If your environment has not been set up so that an appropriate library is

View File

@ -16,9 +16,9 @@ PCRE2 provides a feature called "callout", which is a means of temporarily
passing control to the caller of PCRE2 in the middle of pattern matching. The
caller of PCRE2 provides an external function by putting its entry point in
a match context (see \fBpcre2_set_callout()\fP) in the
.\" HREF
\fBpcre2api\fP
.\"
.\" HREF
\fBpcre2api\fP
.\"
documentation).
.P
Within a regular expression, (?C) indicates the points at which the external

View File

@ -25,7 +25,7 @@ pcre2sample documentation for a short discussion ("man pcre2sample" if you have
the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is
incompatible with the original PCRE API.
There are actually three libraries, each supporting a different code unit
There are actually three libraries, each supporting a different code unit
width. This demonstration program uses the 8-bit library.
In Unix-like environments, if PCRE2 is installed in your standard system
@ -56,8 +56,8 @@ the following line. */
/* #define PCRE2_STATIC */
/* This macro must be defined before including pcre2.h. For a program that uses
only one code unit width, it makes it possible to use generic function names
/* This macro must be defined before including pcre2.h. For a program that uses
only one code unit width, it makes it possible to use generic function names
such as pcre2_compile(). */
#define PCRE2_CODE_UNIT_WIDTH 8
@ -141,7 +141,7 @@ subject_length = strlen((char *)subject);
re = pcre2_compile(
pattern, /* the pattern */
PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
0, /* default options */
&errornumber, /* for error number */
&erroroffset, /* for error offset */
@ -151,9 +151,9 @@ re = pcre2_compile(
if (re == NULL)
{
PCRE2_UCHAR buffer[256];
PCRE2_UCHAR buffer[256];
pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
printf("PCRE2 compilation failed at offset %d: %s\en", (int)erroroffset,
printf("PCRE2 compilation failed at offset %d: %s\en", (int)erroroffset,
buffer);
return 1;
}
@ -197,7 +197,7 @@ if (rc < 0)
return 1;
}
/* Match succeded. Get a pointer to the output vector, where string offsets are
/* Match succeded. Get a pointer to the output vector, where string offsets are
stored. */
ovector = pcre2_get_ovector_pointer(match_data);
@ -210,7 +210,7 @@ printf("\enMatch succeeded at offset %d\en", (int)ovector[0]);
* captured. *
*************************************************************************/
/* The output vector wasn't big enough. This should not happen, because we used
/* The output vector wasn't big enough. This should not happen, because we used
pcre2_match_data_create_from_pattern() above. */
if (rc == 0)
@ -261,7 +261,7 @@ if (namecount <= 0) printf("No named substrings\en"); else
&name_entry_size); /* where to put the answer */
/* Now we can scan the table and, for each entry, print the number, the name,
and the substring itself. In the 8-bit library the number is held in two
and the substring itself. In the 8-bit library the number is held in two
bytes, most significant first. */
tabptr = name_table;
@ -306,7 +306,7 @@ if (namecount <= 0) printf("No named substrings\en"); else
if (!find_all) /* Check for -g */
{
pcre2_match_data_free(match_data); /* Release the memory that was used */
pcre2_match_data_free(match_data); /* Release the memory that was used */
pcre2_code_free(re); /* for the match data and the pattern. */
return 0; /* Exit the program. */
}
@ -324,7 +324,7 @@ sequence. */
(void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &newline);
crlf_is_newline = newline == PCRE2_NEWLINE_ANY ||
newline == PCRE2_NEWLINE_CRLF ||
newline == PCRE2_NEWLINE_ANYCRLF;
newline == PCRE2_NEWLINE_ANYCRLF;
/* Loop for second and subsequent matches */

View File

@ -48,10 +48,10 @@ performance, there is also a "fast path" API that is JIT-specific.
.SH "SIMPLE USE OF JIT"
.rs
.sp
To make use of the JIT support in the simplest way, all you have to do is to
call \fBpcre2_jit_compile()\fP after successfully compiling a pattern with
\fBpcre2_compile()\fP. This function has two arguments: the first is the
compiled pattern pointer that was returned by \fBpcre2_compile()\fP, and the
To make use of the JIT support in the simplest way, all you have to do is to
call \fBpcre2_jit_compile()\fP after successfully compiling a pattern with
\fBpcre2_compile()\fP. This function has two arguments: the first is the
compiled pattern pointer that was returned by \fBpcre2_compile()\fP, and the
second is a set of option bits, which must include at least one of
PCRE2_JIT_COMPLETE, PCRE2_JIT_PARTIAL_HARD, or PCRE2_JIT_PARTIAL_SOFT.
.P
@ -221,7 +221,7 @@ non-default JIT stacks might operate:
.sp
All the functions described in this section do nothing if JIT is not available,
and \fBpcre2_jit_stack_assign()\fP does nothing unless the \fBcode\fP argument
is non-NULL and points to a \fBpcre2_code\fP block that has been successfully
is non-NULL and points to a \fBpcre2_code\fP block that has been successfully
processed by \fBpcre2_jit_compile()\fP.
.
.
@ -302,18 +302,18 @@ callback.
.sp
int rc;
pcre2_code *re;
pcre2_match_data *match_data;
pcre2_match_data *match_data;
pcre2_jit_stack *jit_stack;
.sp
re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0,
re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0,
&errornumber, &erroffset, NULL);
/* Check for errors */
rc = pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
/* Check for errors */
/* Check for errors */
jit_stack = pcre2_jit_stack_alloc(NULL, 32*1024, 512*1024);
/* Check for error (NULL) */
pcre2_jit_stack_assign(re, NULL, jit_stack);
match_data = pcre2_match_data_create(re, 10);
match_data = pcre2_match_data_create(re, 10);
rc = pcre2_match(re, subject, length, 0, 0, match_data, NULL);
/* Check results */
pcre2_free(re);

View File

@ -64,15 +64,15 @@ matched; without such a restriction there would always be a partial match of an
empty string at the end of the subject.
.P
When a partial match is returned, the first two elements in the ovector point
to the portion of the subject that was matched. The appearance of \eK in the
to the portion of the subject that was matched. The appearance of \eK in the
pattern has no effect for a partial match. Consider this pattern:
.sp
/abc\eK123/
.sp
If it is matched against "456abc123xyz" the result is a complete match, and the
ovector defines the matched string as "123", because \eK resets the "start of
match" point. However, if a partial match is requested and the subject string
is "456abc12", a partial match is found for the string "abc12", because all
ovector defines the matched string as "123", because \eK resets the "start of
match" point. However, if a partial match is requested and the subject string
is "456abc12", a partial match is found for the string "abc12", because all
these characters are needed for a subsequent re-match with additional
characters.
.P
@ -316,14 +316,14 @@ same point as before.
For example, if the pattern "(?<=123)abc" is partially matched against the
string "xx123ab", the ovector offsets are 5 and 7 ("ab"). The maximum
lookbehind count is 3, so all characters before offset 2 can be discarded. The
value of \fBstartoffset\fP for the next match should be 3. When \fBpcre2test\fP
displays a partial match, it indicates the lookbehind characters with '<'
value of \fBstartoffset\fP for the next match should be 3. When \fBpcre2test\fP
displays a partial match, it indicates the lookbehind characters with '<'
characters:
.sp
re> "(?<=123)abc"
data> xx123ab\e=ph
Partial match: 123ab
<<<
<<<
.P
3. Because a partial match must always contain at least one character, what
might be considered a partial match of an empty string actually gives a "no

View File

@ -118,7 +118,7 @@ page has
.\" </a>
further discussion
.\"
about newlines, and shows how to set the newline convention when calling
about newlines, and shows how to set the newline convention when calling
\fBpcre2_compile()\fP.
.P
It is also possible to specify a newline convention by starting a pattern
@ -196,7 +196,7 @@ corresponding characters in the subject. As a trivial example, the pattern
.sp
matches a portion of a subject string that is identical to itself. When
caseless matching is specified (the PCRE2_CASELESS option), letters are matched
independently of case.
independently of case.
.P
The power of regular expressions comes from the ability to include alternatives
and repetitions in the pattern. These are encoded in the pattern by the use of
@ -1199,8 +1199,8 @@ An opening square bracket introduces a character class, terminated by a closing
square bracket. A closing square bracket on its own is not special by default.
If a closing square bracket is required as a member of the class, it should be
the first data character in the class (after an initial circumflex, if present)
or escaped with a backslash. This means that, by default, an empty class cannot
be defined. However, if the PCRE2_ALLOW_EMPTY_CLASS option is set, a closing
or escaped with a backslash. This means that, by default, an empty class cannot
be defined. However, if the PCRE2_ALLOW_EMPTY_CLASS option is set, a closing
square bracket at the start does end the (empty) class.
.P
A character class matches a single character in the subject. A matched
@ -1221,7 +1221,7 @@ string.
When caseless matching is set, any letters in a class represent both their
upper case and lower case versions, so for example, a caseless [aeiou] matches
"A" as well as "a", and a caseless [^aeiou] does not match "A", whereas a
caseful version would.
caseful version would.
.P
Characters that might indicate line breaks are never treated in any special way
when matching character classes, whatever line-ending sequence is in use, and
@ -1340,7 +1340,7 @@ classes by other sequences, as follows:
[:alnum:] becomes \ep{Xan}
[:alpha:] becomes \ep{L}
[:blank:] becomes \eh
[:cntrl:] becomes \ep{Cc}
[:cntrl:] becomes \ep{Cc}
[:digit:] becomes \ep{Nd}
[:lower:] becomes \ep{Ll}
[:space:] becomes \ep{Xps}
@ -1496,7 +1496,7 @@ match "cataract", "erpillar" or an empty string.
.sp
2. It sets up the subpattern as a capturing subpattern. This means that, when
the whole pattern matches, the portion of the subject string that matched the
subpattern is passed back to the caller, separately from the portion that
subpattern is passed back to the caller, separately from the portion that
matched the whole pattern. (This applies only to the traditional matching
function; the DFA matching function does not support capturing.)
.P
@ -1916,7 +1916,7 @@ at release 5.10.
PCRE2 has an optimization that automatically "possessifies" certain simple
pattern constructs. For example, the sequence A+B is treated as A++B because
there is no point in backtracking into a sequence of A's when B must follow.
This feature can be disabled by the PCRE2_NO_AUTOPOSSESS option, or starting
This feature can be disabled by the PCRE2_NO_AUTOPOSSESS option, or starting
the pattern with (*NO_AUTO_POSSESS).
.P
When a pattern contains an unlimited repeat inside a subpattern that can itself
@ -2238,7 +2238,7 @@ if the pattern is written as
.sp
^.*+(?<=abcd)
.sp
there can be no backtracking for the .*+ item because of the possessive
there can be no backtracking for the .*+ item because of the possessive
quantifier; it can match only the entire string. The subsequent lookbehind
assertion does a single test on the last four characters. If it fails, the
match fails immediately. For long strings, this approach makes a significant
@ -2754,8 +2754,8 @@ same pair of parentheses when there is a repetition.
.P
PCRE2 provides a similar feature, but of course it cannot obey arbitrary Perl
code. The feature is called "callout". The caller of PCRE2 provides an external
function by putting its entry point in a match context using the function
\fBpcre2_set_callout()\fP and passing the context to \fBpcre2_match()\fP or
function by putting its entry point in a match context using the function
\fBpcre2_set_callout()\fP and passing the context to \fBpcre2_match()\fP or
\fBpcre2_dfa_match()\fP. If no match context is passed, or if the callout entry
point is set to NULL, callouts are disabled.
.P
@ -3008,7 +3008,7 @@ output from \fBpcre2test\fP:
re> /(*COMMIT)abc/
data> xyzabc
0: abc
data>
data>
re> /(*COMMIT)abc/no_start_optimize
data> xyzabc
No match
@ -3035,7 +3035,7 @@ as (*COMMIT).
.P
The behaviour of (*PRUNE:NAME) is the not the same as (*MARK:NAME)(*PRUNE).
It is like (*MARK:NAME) in that the name is remembered for passing back to the
caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
ignoring those set by (*PRUNE) or (*THEN).
.sp
(*SKIP)
@ -3085,7 +3085,7 @@ group. If (*THEN) is not inside an alternation, it acts like (*PRUNE).
.P
The behaviour of (*THEN:NAME) is the not the same as (*MARK:NAME)(*THEN).
It is like (*MARK:NAME) in that the name is remembered for passing back to the
caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
caller. However, (*SKIP:NAME) searches only for names set with (*MARK),
ignoring those set by (*PRUNE) and (*THEN).
.P
A subpattern that does not contain a | character is just a part of the

View File

@ -90,10 +90,10 @@ of how to do this are given in the
\fBpcre2build\fP
.\"
documentation. When built in this way, instead of using the stack, PCRE2
gets memory for remembering backup points from the heap. By default, the memory
is obtained by calling the system \fBmalloc()\fP function, but you can arrange
to supply your own memory management function. For details, see the section
entitled
gets memory for remembering backup points from the heap. By default, the memory
is obtained by calling the system \fBmalloc()\fP function, but you can arrange
to supply your own memory management function. For details, see the section
entitled
.\" HTML <a href="pcre2api.html#matchcontext">
.\" </a>
"The match context"
@ -104,8 +104,8 @@ in the
.\"
documentation. Since the block sizes are always the same, it may be possible to
implement customized a memory handler that is more efficient than the standard
function. The memory blocks obtained for this purpose are retained and re-used
if possible while \fBpcre2_match()\fP is running. They are all freed just
function. The memory blocks obtained for this purpose are retained and re-used
if possible while \fBpcre2_match()\fP is running. They are all freed just
before it exits.
.
.

View File

@ -387,7 +387,7 @@ appear.
(*LIMIT_MATCH=d) set the match limit to d (decimal number)
(*LIMIT_RECURSION=d) set the recursion limit to d (decimal number)
(*NOTEMPTY) set PCRE2_NOTEMPTY when matching
(*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching
(*NOTEMPTY_ATSTART) set PCRE2_NOTEMPTY_ATSTART when matching
(*NO_AUTO_POSSESS) no auto-possessification (PCRE2_NO_AUTO_POSSESS)
(*NO_START_OPT) no start-match optimization (PCRE2_NO_START_OPTIMIZE)
(*UTF) set appropriate UTF mode for the library in use

View File

@ -433,7 +433,7 @@ about the pattern:
/I info show info about compiled pattern
hex pattern is coded in hexadecimal
jit[=<number>] use JIT
jitverify verify JIT use
jitverify verify JIT use
locale=<name> use this locale
memory show memory used
newline=<type> set newline type
@ -518,7 +518,7 @@ number in the range 0 to 7:
7 all three modes
.sp
If no number is given, 7 is assumed. If JIT compilation is successful, the
compiled JIT code will automatically be used when \fBpcre2_match()\fP is run
compiled JIT code will automatically be used when \fBpcre2_match()\fP is run
for the appropriate type of match, except when incompatible run-time options
are specified. For more details, see the
.\" HREF
@ -670,7 +670,7 @@ for a description of their effects.
partial_hard (or ph) set PCRE2_PARTIAL_HARD
partial_soft (or ps) set PCRE2_PARTIAL_SOFT
.sp
The partial matching modifiers are provided with abbreviations because they
The partial matching modifiers are provided with abbreviations because they
appear frequently in tests.
.P
If the \fB/posix\fP modifier was present on the pattern, causing the POSIX
@ -844,8 +844,8 @@ context via \fBpcre2_set_match_limit()\fP and \fBpcre2_set_recursion_limit()\fP
until it finds the minimum values for each parameter that allow
\fBpcre2_match()\fP to complete without error.
.P
If JIT is being used, only the match limit is relevant. If DFA matching is
being used, neither limit is relevant, and this modifier is ignored (with a
If JIT is being used, only the match limit is relevant. If DFA matching is
being used, neither limit is relevant, and this modifier is ignored (with a
warning message).
.P
The \fImatch_limit\fP number is a measure of the amount of backtracking
@ -890,10 +890,10 @@ appears, though of course it can also be used to set a default in a
\fB#subject\fP command. It specifies the number of pairs of offsets that are
available for storing matching information. The default is 15.
.P
At least one pair of offsets is always created by
\fBpcre2_match_data_create()\fP, for matching with PCRE2's native API, so a
value of 0 is the same as 1. However a value of 0 is useful when testing the
POSIX API because it causes \fBregexec()\fP to be called with a NULL capture
At least one pair of offsets is always created by
\fBpcre2_match_data_create()\fP, for matching with PCRE2's native API, so a
value of 0 is the same as 1. However a value of 0 is useful when testing the
POSIX API because it causes \fBregexec()\fP to be called with a NULL capture
vector.
.
.

View File

@ -57,7 +57,7 @@ individual code units.
In UTF modes, the dot metacharacter matches one UTF character instead of a
single code unit.
.P
The escape sequence \eC can be used to match a single code unit, in a UTF mode,
The escape sequence \eC can be used to match a single code unit, in a UTF mode,
but its use can lead to some strange effects because it breaks up multi-unit
characters (see the description of \eC in the
.\" HREF
@ -107,8 +107,8 @@ case-equivalent, and these are treated as such.
.rs
.sp
When the PCRE2_UTF option is set, the strings passed as patterns and subjects
are (by default) checked for validity on entry to the relevant functions.
If an invalid UTF string is passed, an error return is given.
are (by default) checked for validity on entry to the relevant functions.
If an invalid UTF string is passed, an error return is given.
.P
UTF-16 and UTF-32 strings can indicate their endianness by special code knows
as a byte-order mark (BOM). The PCRE2 functions do not handle this, expecting

View File

@ -82,13 +82,13 @@ for (;;)
chomp($pattern);
$pattern =~ s/\s+$//;
# Split the pattern from the modifiers and adjust them as necessary.
$pattern =~ /^\s*((.).*\2)(.*)$/s;
$pat = $1;
$mod = $3;
# The private "aftertext" modifier means "print $' afterwards".
$showrest = ($mod =~ s/aftertext,?//);
@ -131,9 +131,9 @@ for (;;)
for (;;)
{
last if ! ($_ = <$infile>);
last if $_ =~ /^\s*$/;
}
}
last if $_ =~ /^\s*$/;
}
}
next NEXT_RE;
}

View File

@ -41,7 +41,7 @@ POSSIBILITY OF SUCH DAMAGE.
/* This is a freestanding support program to generate a file containing
character tables for PCRE2. The tables are built according to the current
locale using the pcre2_maketables() function, which is part of the PCRE2 API.
locale using the pcre2_maketables() function, which is part of the PCRE2 API.
*/
#ifdef HAVE_CONFIG_H

View File

@ -38,7 +38,7 @@ POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* This module contains functions that scan a compiled pattern and change
/* This module contains functions that scan a compiled pattern and change
repeats into possessive repeats where possible. */
@ -359,8 +359,8 @@ Returns: points to the start of the next opcode if *code is accepted
NULL if *code is not accepted
*/
static PCRE2_SPTR
get_chr_property_list(PCRE2_SPTR code, BOOL utf, const uint8_t *fcc,
static PCRE2_SPTR
get_chr_property_list(PCRE2_SPTR code, BOOL utf, const uint8_t *fcc,
uint32_t *list)
{
PCRE2_UCHAR c = *code;
@ -387,7 +387,7 @@ if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
if (c == OP_UPTO || c == OP_MINUPTO || c == OP_EXACT || c == OP_POSUPTO)
code += IMM2_SIZE;
list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT &&
list[1] = (c != OP_PLUS && c != OP_MINPLUS && c != OP_EXACT &&
c != OP_POSPLUS);
switch(base)
@ -595,7 +595,7 @@ for(;;)
Therefore infinite recursions are not possible. */
c = *code;
/* Skip over callouts */
if (c == OP_CALLOUT)
@ -624,7 +624,7 @@ for(;;)
/* If the bracket is capturing, and referenced by an OP_RECURSE, or
it is an atomic sub-pattern (assert, once, etc.) the non-greedy case
cannot be converted to a possessive form. */
if (base_list[1] == 0) return FALSE;
switch(*(code - GET(code, 1)))
@ -636,7 +636,7 @@ for(;;)
case OP_ONCE:
case OP_ONCE_NC:
/* Atomic sub-patterns and assertions can always auto-possessify their
last iterator. However, if the group was entered as a result of checking
last iterator. However, if the group was entered as a result of checking
a previous iterator, this is not possible. */
return !entered_a_group;
@ -672,7 +672,7 @@ for(;;)
do next_code += GET(next_code, 1); while (*next_code == OP_ALT);
/* The bracket content will be checked by the OP_BRA/OP_CBRA case above. */
next_code += 1 + LINK_SIZE;
if (!compare_opcodes(next_code, utf, cb, base_list, base_end))
return FALSE;
@ -681,14 +681,14 @@ for(;;)
continue;
default:
break;
break;
}
/* Check for a supported opcode, and load its properties. */
code = get_chr_property_list(code, utf, cb->fcc, list);
if (code == NULL) return FALSE; /* Unsupported */
/* If either opcode is a small character list, set pointers for comparing
characters from that list with another list, or with a property. */
@ -778,7 +778,7 @@ for(;;)
/* Because the bit sets are unaligned bytes, we need to perform byte
comparison here. */
set_end = set1 + 32;
if (invert_bits)
{
@ -922,7 +922,7 @@ for(;;)
autoposstab[leftop - FIRST_AUTOTAB_OP][rightop - FIRST_AUTOTAB_OP];
if (!accepted) return FALSE;
if (list[1] == 0) return TRUE;
/* Might be an empty repeat. */
continue;
@ -1093,8 +1093,8 @@ but some compilers complain about an unreachable statement. */
if appropriate. This function modifies the compiled opcode!
Arguments:
code points to start of the byte code
utf TRUE in UTF mode
code points to start of the byte code
utf TRUE in UTF mode
cb compile data block
Returns: nothing
@ -1111,7 +1111,7 @@ uint32_t list[8];
for (;;)
{
c = *code;
if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
{
c -= get_repeat_base(c) - OP_STAR;
@ -1244,7 +1244,7 @@ for (;;)
}
/* Add in the fixed length from the table */
code += PRIV(OP_lengths)[c];
/* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be

View File

@ -594,7 +594,7 @@ static pso pso_list[] = {
{ (uint8_t *)STRING_UTF_RIGHTPAR, 4, PSO_OPT, PCRE2_UTF },
{ (uint8_t *)STRING_UCP_RIGHTPAR, 4, PSO_OPT, PCRE2_UCP },
{ (uint8_t *)STRING_NOTEMPTY_RIGHTPAR, 9, PSO_FLG, PCRE2_NOTEMPTY_SET },
{ (uint8_t *)STRING_NOTEMPTY_ATSTART_RIGHTPAR,17, PSO_FLG, PCRE2_NE_ATST_SET },
{ (uint8_t *)STRING_NOTEMPTY_ATSTART_RIGHTPAR,17, PSO_FLG, PCRE2_NE_ATST_SET },
{ (uint8_t *)STRING_NO_AUTO_POSSESS_RIGHTPAR, 16, PSO_OPT, PCRE2_NO_AUTO_POSSESS },
{ (uint8_t *)STRING_NO_START_OPT_RIGHTPAR, 13, PSO_OPT, PCRE2_NO_START_OPTIMIZE },
{ (uint8_t *)STRING_LIMIT_MATCH_EQ, 12, PSO_LIMM, 0 },
@ -675,12 +675,12 @@ static const uint8_t opcode_possessify[] = {
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_code_free(pcre2_code *code)
{
if (code != NULL)
if (code != NULL)
{
if (code->executable_jit != NULL)
PRIV(jit_free)(code->executable_jit, &code->memctl);
code->memctl.free(code, code->memctl.memory_data);
}
}
}
@ -4462,7 +4462,7 @@ for (;; ptr++)
syntax, so we just ignore the repeat. */
if (*previous == OP_COND && previous[LINK_SIZE+1] == OP_FALSE &&
previous[GET(previous, 1)] != OP_ALT)
previous[GET(previous, 1)] != OP_ALT)
goto END_REPEAT;
/* There is no sense in actually repeating assertions. The only potential
@ -5169,64 +5169,64 @@ for (;; ptr++)
namelen = -1; /* => not a name; must set to avoid warning */
name = NULL; /* Always set to avoid warning */
recno = 0; /* Always set to avoid warning */
/* Point at character after (?( */
ptr++;
/* Check for (?(VERSION[>]=n.m), which is a facility whereby indirect
users of PCRE2 via an application can discover which release of PCRE2
users of PCRE2 via an application can discover which release of PCRE2
is being used. */
if (PRIV(strncmp_c8)(ptr, STRING_VERSION, 7) == 0 &&
if (PRIV(strncmp_c8)(ptr, STRING_VERSION, 7) == 0 &&
ptr[7] != CHAR_RIGHT_PARENTHESIS)
{
BOOL ge = FALSE;
BOOL ge = FALSE;
int major = 0;
int minor = 0;
ptr += 7;
if (*ptr == CHAR_GREATER_THAN_SIGN)
{
ge = TRUE;
ptr++;
}
}
/* NOTE: cannot write IS_DIGIT(*(++ptr)) here because IS_DIGIT
references its argument twice. */
if (*ptr != CHAR_EQUALS_SIGN || (ptr++, !IS_DIGIT(*ptr)))
{
{
*errorcodeptr = ERR79;
goto FAILED;
}
goto FAILED;
}
while (IS_DIGIT(*ptr)) major = major * 10 + *ptr++ - '0';
if (*ptr == CHAR_DOT)
{
ptr++;
while (IS_DIGIT(*ptr)) minor = minor * 10 + *ptr++ - '0';
}
ptr++;
while (IS_DIGIT(*ptr)) minor = minor * 10 + *ptr++ - '0';
}
if (*ptr != CHAR_RIGHT_PARENTHESIS)
{
*errorcodeptr = ERR79;
goto FAILED;
}
}
if (ge)
code[1+LINK_SIZE] = ((PCRE2_MAJOR > major) ||
(PCRE2_MAJOR == major && PCRE2_MINOR >= minor))?
OP_TRUE : OP_FALSE;
else
else
code[1+LINK_SIZE] = (PCRE2_MAJOR == major && PCRE2_MINOR == minor)?
OP_TRUE : OP_FALSE;
ptr++;
skipbytes = 1;
break; /* End of condition processing */
}
}
/* Check for a test for recursion in a named group. */
if (*ptr == CHAR_R && ptr[1] == CHAR_AMPERSAND)
@ -5404,8 +5404,8 @@ for (;; ptr++)
}
/* Similarly, check for the (?(DEFINE) "condition", which is always
false. During compilation we set OP_DEFINE to distinguish this from
other OP_FALSE conditions so that it can be checked for having only one
false. During compilation we set OP_DEFINE to distinguish this from
other OP_FALSE conditions so that it can be checked for having only one
branch, but after that the opcode is changed to OP_FALSE. */
else if (namelen == 6 && PRIV(strncmp_c8)(name, STRING_DEFINE, 6) == 0)
@ -6133,7 +6133,7 @@ for (;; ptr++)
while (*tc != OP_KET);
/* A DEFINE group is never obeyed inline (the "condition" is always
false). It must have only one branch. Having checked this, change the
false). It must have only one branch. Having checked this, change the
opcode to OP_FALSE. */
if (code[LINK_SIZE+1] == OP_DEFINE)
@ -6143,7 +6143,7 @@ for (;; ptr++)
*errorcodeptr = ERR54;
goto FAILED;
}
code[LINK_SIZE+1] = OP_FALSE;
code[LINK_SIZE+1] = OP_FALSE;
bravalue = OP_DEFINE; /* Just a flag to suppress char handling below */
}
@ -6219,7 +6219,7 @@ for (;; ptr++)
than one can replicate it as reqcu if necessary. If the subpattern has
no firstcu, set "none" for the whole branch. In both cases, a zero
repeat forces firstcu to "none". */
if (firstcuflags == REQ_UNSET && subfirstcuflags != REQ_UNSET)
{
if (subfirstcuflags >= 0)
@ -6759,7 +6759,7 @@ for (;;)
reqcu = firstcu;
reqcuflags = firstcuflags;
}
}
}
firstcuflags = REQ_NONE;
}
@ -7389,12 +7389,12 @@ if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0)
/* A NULL compile context means "use a default context" */
if (ccontext == NULL)
if (ccontext == NULL)
ccontext = (pcre2_compile_context *)(&PRIV(default_compile_context));
/* A zero-terminated pattern is indicated by the special length value
PCRE2_ZERO_TERMINATED. Otherwise, we make a copy of the pattern and add a zero,
to ensure that it is always possible to look one code unit beyond the end of
/* A zero-terminated pattern is indicated by the special length value
PCRE2_ZERO_TERMINATED. Otherwise, we make a copy of the pattern and add a zero,
to ensure that it is always possible to look one code unit beyond the end of
the pattern's characters. */
if (patlen == PCRE2_ZERO_TERMINATED) patlen = PRIV(strlen)(pattern); else
@ -7481,19 +7481,19 @@ while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
case PSO_OPT:
cb.external_options |= p->value;
break;
case PSO_FLG:
setflags |= p->value;
break;
break;
case PSO_NL:
newline = p->value;
setflags |= PCRE2_NL_SET;
setflags |= PCRE2_NL_SET;
break;
case PSO_BSR:
bsr = p->value;
setflags |= PCRE2_BSR_SET;
setflags |= PCRE2_BSR_SET;
break;
case PSO_LIMM:
@ -7883,8 +7883,8 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0 &&
/* If the pattern is still not anchored and we do not have a first code unit,
see if there is one that is asserted (these are not saved during the compile
because they can cause conflicts with actual literals that follow). This code
need not be obeyed if PCRE2_NO_START_OPTIMIZE is set, as the data it would
because they can cause conflicts with actual literals that follow). This code
need not be obeyed if PCRE2_NO_START_OPTIMIZE is set, as the data it would
create will not be used. */
if ((re->overall_options & (PCRE2_ANCHORED|PCRE2_NO_START_OPTIMIZE)) == 0)
@ -7930,7 +7930,7 @@ if ((re->overall_options & (PCRE2_ANCHORED|PCRE2_NO_START_OPTIMIZE)) == 0)
}
/* Handle the "required code unit", if one is set. In the case of an anchored
pattern, do this only if it follows a variable length item in the pattern.
pattern, do this only if it follows a variable length item in the pattern.
Again, skip this if PCRE2_NO_START_OPTIMIZE is set. */
if (reqcuflags >= 0 &&
@ -7973,7 +7973,7 @@ while (*codestart == OP_ALT);
to set up information such as a bitmap of starting code units and a minimum
matching length. */
if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 &&
if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 &&
PRIV(study)(re) != 0)
{
errorcode = ERR31;

View File

@ -42,7 +42,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "config.h"
#endif
/* Save the configured link size, which is in bytes. In 16-bit and 32-bit modes
/* Save the configured link size, which is in bytes. In 16-bit and 32-bit modes
its value gets changed by pcre2_internal.h to be in code units. */
static int configured_link_size = LINK_SIZE;
@ -69,7 +69,7 @@ Arguments:
Returns: 0 if data returned
>= 0 if where is NULL, giving length required
PCRE2_ERROR_BADOPTION if "where" not recognized
or JIT target requested when JIT not enabled
or JIT target requested when JIT not enabled
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
@ -80,33 +80,33 @@ if (where == NULL) /* Requests a length */
switch(what)
{
default:
return PCRE2_ERROR_BADOPTION;
return PCRE2_ERROR_BADOPTION;
case PCRE2_CONFIG_BSR:
case PCRE2_CONFIG_JIT:
case PCRE2_CONFIG_LINKSIZE:
case PCRE2_CONFIG_NEWLINE:
case PCRE2_CONFIG_STACKRECURSE:
case PCRE2_CONFIG_UNICODE:
return sizeof(int);
return sizeof(int);
case PCRE2_CONFIG_MATCHLIMIT:
case PCRE2_CONFIG_PARENSLIMIT:
case PCRE2_CONFIG_RECURSIONLIMIT:
return sizeof(long int);
/* These are handled below */
case PCRE2_CONFIG_JITTARGET:
case PCRE2_CONFIG_UNICODE_VERSION:
case PCRE2_CONFIG_VERSION:
break;
}
}
}
switch (what)
{
default:
default:
return PCRE2_ERROR_BADOPTION;
case PCRE2_CONFIG_BSR:
@ -129,9 +129,9 @@ switch (what)
#ifdef SUPPORT_JIT
{
const char *v = PRIV(jit_get_target)();
return (where == NULL)? (int)strlen(v) :
return (where == NULL)? (int)strlen(v) :
PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v);
}
}
#else
return PCRE2_ERROR_BADOPTION;
#endif
@ -163,9 +163,9 @@ switch (what)
*((int *)where) = 1;
#endif
break;
case PCRE2_CONFIG_UNICODE_VERSION:
{
{
#if defined SUPPORT_UNICODE
const char *v = PRIV(unicode_version);
#else
@ -183,15 +183,15 @@ switch (what)
*((int *)where) = 0;
#endif
break;
/* The hackery in setting "v" below is to cope with the case when
/* The hackery in setting "v" below is to cope with the case when
PCRE2_PRERELEASE is set to an empty string (which it is for real releases).
If the second alternative is used in this case, it does not leave a space
If the second alternative is used in this case, it does not leave a space
before the date. On the other hand, if all four macros are put into a single
XSTRING when PCRE2_PRERELEASE is not empty, an unwanted space is inserted.
XSTRING when PCRE2_PRERELEASE is not empty, an unwanted space is inserted.
There are problems using an "obvious" approach like this:
XSTRING(PCRE2_MAJOR) "." XSTRING(PCRE_MINOR)
XSTRING(PCRE2_MAJOR) "." XSTRING(PCRE_MINOR)
XSTRING(PCRE2_PRERELEASE) " " XSTRING(PCRE_DATE)
because, when PCRE2_PRERELEASE is empty, this leads to an attempted expansion
@ -199,18 +199,18 @@ switch (what)
argument consists of no preprocessing tokens, the behavior is undefined." It
turns out the gcc treats this case as a single empty string - which is what
we really want - but Visual C grumbles about the lack of an argument for the
macro. Unfortunately, both are within their rights. As there seems to be no
way to test for a macro's value being empty at compile time, we have to
macro. Unfortunately, both are within their rights. As there seems to be no
way to test for a macro's value being empty at compile time, we have to
resort to a runtime test. */
case PCRE2_CONFIG_VERSION:
{
{
const char *v = (XSTRING(Z PCRE2_PRERELEASE)[1] == 0)?
XSTRING(PCRE2_MAJOR.PCRE2_MINOR PCRE2_DATE) :
XSTRING(PCRE2_MAJOR.PCRE2_MINOR) XSTRING(PCRE2_PRERELEASE PCRE2_DATE);
return (where == NULL)? (int)strlen(v) :
return (where == NULL)? (int)strlen(v) :
PRIV(strcpy_c8)((PCRE2_UCHAR *)where, v);
}
}
}
return 0;

View File

@ -72,15 +72,15 @@ free(block);
* Get a block and save memory control *
*************************************************/
/* This internal function is called to get a block of memory in which the
/* This internal function is called to get a block of memory in which the
memory control data is to be stored at the start for future use.
Arguments:
size amount of memory required
memctl pointer to a memctl block or NULL
Returns: pointer to memory or NULL on failure
*/
*/
PCRE2_EXP_DEFN void *
PRIV(memctl_malloc)(size_t size, pcre2_memctl *memctl)
@ -88,7 +88,7 @@ PRIV(memctl_malloc)(size_t size, pcre2_memctl *memctl)
pcre2_memctl *newmemctl;
void *yield = (memctl == NULL)? malloc(size) :
memctl->malloc(size, memctl->memory_data);
if (yield == NULL) return NULL;
if (yield == NULL) return NULL;
newmemctl = (pcre2_memctl *)yield;
if (memctl == NULL)
{
@ -96,9 +96,9 @@ if (memctl == NULL)
newmemctl->free = default_free;
newmemctl->memory_data = NULL;
}
else *newmemctl = *memctl;
else *newmemctl = *memctl;
return yield;
}
}
@ -108,11 +108,11 @@ return yield;
/* Initializing for compile and match contexts is done in separate, private
functions so that these can be called from functions such as pcre2_compile()
when an external context is not supplied. The initializing functions have an
when an external context is not supplied. The initializing functions have an
option to set up default memory management. */
PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION
pcre2_general_context_create(void *(*private_malloc)(size_t, void *),
pcre2_general_context_create(void *(*private_malloc)(size_t, void *),
void (*private_free)(void *, void *), void *memory_data)
{
pcre2_general_context *gcontext;
@ -121,7 +121,7 @@ if (private_free == NULL) private_free = default_free;
gcontext = private_malloc(sizeof(pcre2_real_general_context), memory_data);
if (gcontext == NULL) return NULL;
gcontext->memctl.malloc = private_malloc;
gcontext->memctl.free = private_free;
gcontext->memctl.free = private_free;
gcontext->memctl.memory_data = memory_data;
return gcontext;
}
@ -136,7 +136,7 @@ const pcre2_compile_context PRIV(default_compile_context) = {
PRIV(default_tables),
BSR_DEFAULT,
NEWLINE_DEFAULT,
PARENS_NEST_LIMIT };
PARENS_NEST_LIMIT };
/* The create function copies the default into the new memory, but must
override the default memory handling functions if a gcontext was provided. */
@ -145,8 +145,8 @@ PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION
pcre2_compile_context_create(pcre2_general_context *gcontext)
{
pcre2_compile_context *ccontext = PRIV(memctl_malloc)(
sizeof(pcre2_real_compile_context), (pcre2_memctl *)gcontext);
if (ccontext == NULL) return NULL;
sizeof(pcre2_real_compile_context), (pcre2_memctl *)gcontext);
if (ccontext == NULL) return NULL;
*ccontext = PRIV(default_compile_context);
if (gcontext != NULL)
*((pcre2_memctl *)ccontext) = *((pcre2_memctl *)gcontext);
@ -159,14 +159,14 @@ when no context is supplied to a match function. */
const pcre2_match_context PRIV(default_match_context) = {
{ default_malloc, default_free, NULL },
#ifdef HEAP_MATCH_RECURSE
#ifdef HEAP_MATCH_RECURSE
{ default_malloc, default_free, NULL },
#endif
NULL,
NULL,
MATCH_LIMIT,
MATCH_LIMIT_RECURSION };
MATCH_LIMIT_RECURSION };
/* The create function copies the default into the new memory, but must
override the default memory handling functions if a gcontext was provided. */
@ -174,8 +174,8 @@ PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION
pcre2_match_context_create(pcre2_general_context *gcontext)
{
pcre2_match_context *mcontext = PRIV(memctl_malloc)(
sizeof(pcre2_real_match_context), (pcre2_memctl *)gcontext);
if (mcontext == NULL) return NULL;
sizeof(pcre2_real_match_context), (pcre2_memctl *)gcontext);
if (mcontext == NULL) return NULL;
*mcontext = PRIV(default_match_context);
if (gcontext != NULL)
*((pcre2_memctl *)mcontext) = *((pcre2_memctl *)gcontext);
@ -190,8 +190,8 @@ return mcontext;
PCRE2_EXP_DEFN pcre2_general_context * PCRE2_CALL_CONVENTION
pcre2_general_context_copy(pcre2_general_context *gcontext)
{
pcre2_general_context *new =
gcontext->memctl.malloc(sizeof(pcre2_real_general_context),
pcre2_general_context *new =
gcontext->memctl.malloc(sizeof(pcre2_real_general_context),
gcontext->memctl.memory_data);
if (new == NULL) return NULL;
memcpy(new, gcontext, sizeof(pcre2_real_general_context));
@ -202,8 +202,8 @@ return new;
PCRE2_EXP_DEFN pcre2_compile_context * PCRE2_CALL_CONVENTION
pcre2_compile_context_copy(pcre2_compile_context *ccontext)
{
pcre2_compile_context *new =
ccontext->memctl.malloc(sizeof(pcre2_real_compile_context),
pcre2_compile_context *new =
ccontext->memctl.malloc(sizeof(pcre2_real_compile_context),
ccontext->memctl.memory_data);
if (new == NULL) return NULL;
memcpy(new, ccontext, sizeof(pcre2_real_compile_context));
@ -214,8 +214,8 @@ return new;
PCRE2_EXP_DEFN pcre2_match_context * PCRE2_CALL_CONVENTION
pcre2_match_context_copy(pcre2_match_context *mcontext)
{
pcre2_match_context *new =
mcontext->memctl.malloc(sizeof(pcre2_real_match_context),
pcre2_match_context *new =
mcontext->memctl.malloc(sizeof(pcre2_real_match_context),
mcontext->memctl.memory_data);
if (new == NULL) return NULL;
memcpy(new, mcontext, sizeof(pcre2_real_match_context));
@ -267,14 +267,14 @@ data. */
/* ------------ Compile contexts ------------ */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_character_tables(pcre2_compile_context *ccontext,
pcre2_set_character_tables(pcre2_compile_context *ccontext,
const unsigned char *tables)
{
ccontext->tables = tables;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_bsr(pcre2_compile_context *ccontext, uint32_t value)
{
switch(value)
@ -283,13 +283,13 @@ switch(value)
case PCRE2_BSR_UNICODE:
ccontext->bsr_convention = value;
return 0;
default:
return PCRE2_ERROR_BADDATA;
return PCRE2_ERROR_BADDATA;
}
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_newline(pcre2_compile_context *ccontext, uint32_t newline)
{
switch(newline)
@ -301,10 +301,10 @@ switch(newline)
case PCRE2_NEWLINE_ANYCRLF:
ccontext->newline_convention = newline;
return 0;
default:
return PCRE2_ERROR_BADDATA;
}
default:
return PCRE2_ERROR_BADDATA;
}
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
@ -315,7 +315,7 @@ return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
int (*guard)(uint32_t))
{
ccontext->stack_guard = guard;
@ -325,8 +325,8 @@ return 0;
/* ------------ Match contexts ------------ */
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_callout(pcre2_match_context *mcontext,
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_callout(pcre2_match_context *mcontext,
int (*callout)(pcre2_callout_block *), void *callout_data)
{
mcontext->callout = callout;
@ -349,8 +349,8 @@ return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_recursion_memory_management(pcre2_match_context *mcontext,
void *(*mymalloc)(size_t, void *), void (*myfree)(void *, void *),
pcre2_set_recursion_memory_management(pcre2_match_context *mcontext,
void *(*mymalloc)(size_t, void *), void (*myfree)(void *, void *),
void *mydata)
{
#ifdef HEAP_MATCH_RECURSE
@ -364,6 +364,6 @@ mcontext->stack_memctl.memory_data = mydata;
(void)mydata;
#endif
return 0;
}
}
/* End of pcre2_context.c */

View File

@ -376,7 +376,7 @@ stateblock *next_active_state, *next_new_state;
const uint8_t *ctypes, *lcc, *fcc;
PCRE2_SPTR ptr;
PCRE2_SPTR end_code;
PCRE2_SPTR end_code;
PCRE2_SPTR first_op;
dfa_recursion_info new_recursive;
@ -542,8 +542,8 @@ for (;;)
BOOL partial_newline = FALSE;
BOOL could_continue = reset_could_continue;
reset_could_continue = FALSE;
if (ptr > mb->last_used_ptr) mb->last_used_ptr = ptr;
if (ptr > mb->last_used_ptr) mb->last_used_ptr = ptr;
/* Make the new state list into the active state list and empty the
new state list. */
@ -633,7 +633,7 @@ for (;;)
/* If this opcode inspects a character, but we are at the end of the
subject, remember the fact for use when testing for a partial match. */
if (clen == 0 && poptable[codevalue] != 0)
could_continue = TRUE;
@ -975,7 +975,7 @@ for (;;)
if (utf) { FORWARDCHARTEST(temp, mb->end_subject); }
#endif
mb->last_used_ptr = temp;
}
}
#ifdef SUPPORT_UNICODE
if ((mb->poptions & PCRE2_UCP) != 0)
{
@ -2643,7 +2643,7 @@ for (;;)
if (condcode == OP_FALSE)
{ ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
/* There is also an always-true condition */
if (condcode == OP_TRUE)
@ -2999,7 +2999,7 @@ for (;;)
The "could_continue" variable is true if a state could have continued but
for the fact that the end of the subject was reached. */
if (new_count <= 0)
{
if (rlevel == 1 && /* Top level, and */
@ -3098,7 +3098,7 @@ if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
/* Plausibility checks */
if ((options & ~PUBLIC_DFA_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
if (re == NULL || subject == NULL || workspace == NULL || match_data == NULL)
if (re == NULL || subject == NULL || workspace == NULL || match_data == NULL)
return PCRE2_ERROR_NULL;
if (wscount < 20) return PCRE2_ERROR_DFA_WSSIZE;
if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
@ -3127,19 +3127,19 @@ with different endianness. */
if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
return PCRE2_ERROR_BADMODE;
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
options variable for this function. Users of PCRE2 who are not calling the
function directly would like to have a way of setting these flags, in the same
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
options variable for this function. Users of PCRE2 who are not calling the
function directly would like to have a way of setting these flags, in the same
way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be
transferred to the options for this function. The bits are guaranteed to be
adjacent, but do not have the same values. This bit of Boolean trickery assumes
that the match-time bits are not more significant than the flag bits. If by
accident this is not the case, a compile-time division by zero error will
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be
transferred to the options for this function. The bits are guaranteed to be
adjacent, but do not have the same values. This bit of Boolean trickery assumes
that the match-time bits are not more significant than the flag bits. If by
accident this is not the case, a compile-time division by zero error will
occur. */
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
options |= (re->flags & FF) / ((FF & -FF) / (OO & -OO));
#undef FF
@ -3168,7 +3168,7 @@ end_subject = subject + length;
req_cu_ptr = start_match - 1;
anchored = (options & (PCRE2_ANCHORED|PCRE2_DFA_RESTART)) != 0 ||
(re->overall_options & PCRE2_ANCHORED) != 0;
/* The "must be at the start of a line" flags are used in a loop when finding
where to start. */
@ -3307,7 +3307,7 @@ for (;;)
/* There are some optimizations that avoid running the match if a known
starting point is not found, or if a known later code unit is not present.
However, there is an option (settable at compile time) that disables
these, for testing and for ensuring that all callouts do actually occur.
these, for testing and for ensuring that all callouts do actually occur.
The optimizations must also be avoided when restarting a DFA match. */
if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 &&
@ -3493,7 +3493,7 @@ for (;;)
/* Anything other than "no match" means we are done, always; otherwise, carry
on only if not anchored. */
if (rc != PCRE2_ERROR_NOMATCH || anchored)
{
if (rc == PCRE2_ERROR_PARTIAL && match_data->oveccount > 0)
@ -3504,7 +3504,7 @@ for (;;)
match_data->leftchar = (PCRE2_SIZE)(mb->start_used_ptr - subject);
match_data->rightchar = mb->last_used_ptr - subject;
match_data->startchar = (PCRE2_SIZE)(start_match - subject);
match_data->rc = rc;
match_data->rc = rc;
return rc;
}

View File

@ -48,7 +48,7 @@ POSSIBILITY OF SUCH DAMAGE.
#define STRING(a) # a
#define XSTRING(s) STRING(s)
/* The texts of compile-time error messages. Compile-time error numbers start
/* The texts of compile-time error messages. Compile-time error numbers start
at COMPILE_ERROR_BASE (100).
Do not ever re-use any error number, because they are documented. Always add a
@ -101,7 +101,7 @@ static const char compile_error_texts[] =
"(?R or (?[+-]digits must be followed by )\0"
/* 30 */
"unknown POSIX class name\0"
"internal error in pcre2_study(): should not occur\0"
"internal error in pcre2_study(): should not occur\0"
"this version of PCRE does not have UTF or Unicode property support\0"
"parentheses are too deeply nested (stack check)\0"
"character code point value in \\x{} or \\o{} is too large\0"
@ -158,94 +158,94 @@ static const char compile_error_texts[] =
"name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
"character code point value in \\u.... sequence is too large\0"
"digits missing in \\x{} or \\o{}\0"
"syntax error in (?(VERSION condition\0"
"syntax error in (?(VERSION condition\0"
;
/* Match-time and UTF error texts are in the same format. */
static const char match_error_texts[] =
"no error\0"
"no match\0"
"no match\0"
"partial match\0"
"UTF-8 error: 1 byte missing at end\0"
"UTF-8 error: 2 bytes missing at end\0"
/* 5 */
/* 5 */
"UTF-8 error: 3 bytes missing at end\0"
"UTF-8 error: 4 bytes missing at end\0"
"UTF-8 error: 5 bytes missing at end\0"
"UTF-8 error: byte 2 top bits not 0x80\0"
"UTF-8 error: byte 3 top bits not 0x80\0"
/* 10 */
"UTF-8 error: byte 4 top bits not 0x80\0"
"UTF-8 error: byte 5 top bits not 0x80\0"
"UTF-8 error: byte 2 top bits not 0x80\0"
"UTF-8 error: byte 3 top bits not 0x80\0"
/* 10 */
"UTF-8 error: byte 4 top bits not 0x80\0"
"UTF-8 error: byte 5 top bits not 0x80\0"
"UTF-8 error: byte 6 top bits not 0x80\0"
"UTF-8 error: 5-byte character is not allowed (RFC 3629)\0"
"UTF-8 error: 5-byte character is not allowed (RFC 3629)\0"
"UTF-8 error: 6-byte character is not allowed (RFC 3629)\0"
/* 15 */
/* 15 */
"UTF-8 error: code points greater than 0x10ffff are not defined\0"
"UTF-8 error: code points 0xd800-0xdfff are not defined\0"
"UTF-8 error: overlong 2-byte sequence\0"
"UTF-8 error: overlong 3-byte sequence\0"
"UTF-8 error: code points 0xd800-0xdfff are not defined\0"
"UTF-8 error: overlong 2-byte sequence\0"
"UTF-8 error: overlong 3-byte sequence\0"
"UTF-8 error: overlong 4-byte sequence\0"
/* 20 */
/* 20 */
"UTF-8 error: overlong 5-byte sequence\0"
"UTF-8 error: overlong 6-byte sequence\0"
"UTF-8 error: isolated 0x80 byte\0"
"UTF-8 error: illegal byte (0xfe or 0xff)\0"
"UTF-16 error: missing low surrogate at end\0"
/* 25 */
"UTF-16 error: invalid low surrogate\0"
"UTF-16 error: isolated low surrogate\0"
"UTF-8 error: illegal byte (0xfe or 0xff)\0"
"UTF-16 error: missing low surrogate at end\0"
/* 25 */
"UTF-16 error: invalid low surrogate\0"
"UTF-16 error: isolated low surrogate\0"
"UTF-32 error: code points 0xd800-0xdfff are not defined\0"
"UTF-32 error: code points greater than 0x10ffff are not defined\0"
"UTF-32 error: code points greater than 0x10ffff are not defined\0"
"bad data value\0"
/* 30 */
/* 30 */
"bad length\0"
"magic number missing\0"
"pattern compiled in wrong mode: 8/16/32-bit error\0"
"bad offset value\0"
"bad option value\0"
/* 35 */
/* 35 */
"bad offset into UTF string\0"
"callout error code\0" /* Never returned by PCRE2 itself */
"callout error code\0" /* Never returned by PCRE2 itself */
"invalid data in workspace for DFA restart\0"
"too much recursion for DFA matching\0"
"backreference condition or recursion test not supported for DFA matching\0"
/* 40 */
/* 40 */
"item unsupported for DFA matching\0"
"workspace size exceeded in DFA matching\0"
"internal error - pattern overwritten?\0"
"bad JIT option\0"
"JIT stack limit reached\0"
/* 45 */
/* 45 */
"match limit exceeded\0"
"no more memory\0"
"unknown or unset substring\0"
"unknown or unset substring\0"
"NULL argument passed\0"
"nested recursion at the same subject position\0"
/* 50 */
/* 50 */
"recursion limit exceeded\0"
"requested value is not set\0"
;
"requested value is not set\0"
;
/*************************************************
* Return error message *
*************************************************/
/* This function copies an error message into a buffer whose units are of an
appropriate width. Error numbers are positive for compile-time errors, and
negative for match-time errors (except for UTF errors), but the numbers are all
/* This function copies an error message into a buffer whose units are of an
appropriate width. Error numbers are positive for compile-time errors, and
negative for match-time errors (except for UTF errors), but the numbers are all
distinct.
Arguments:
enumber error number
buffer where to put the message (zero terminated)
size size of the buffer
Returns: length of message if all is well
negative on error
*/
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_get_error_message(int enumber, PCRE2_UCHAR *buffer, size_t size)
@ -260,23 +260,23 @@ if (size == 0) return PCRE2_ERROR_NOMEMORY;
if (enumber > COMPILE_ERROR_BASE) /* Compile error */
{
message = compile_error_texts;
n = enumber - COMPILE_ERROR_BASE;
}
n = enumber - COMPILE_ERROR_BASE;
}
else /* Match or UTF error */
{
message = match_error_texts;
n = -enumber;
}
n = -enumber;
}
for (; n > 0; n--)
{
while (*message++ != CHAR_NULL) {};
if (*message == CHAR_NULL)
{
if (*message == CHAR_NULL)
{
sprintf(xbuff, "Internal error: no text for error %d", enumber);
break;
break;
}
}
}
for (i = 0; *message != 0; i++)
{
@ -287,9 +287,9 @@ for (i = 0; *message != 0; i++)
}
buffer[i] = *message++;
}
buffer[i] = 0;
return i;
return i;
}
/* End of pcre2_error.c */

View File

@ -1553,11 +1553,11 @@ enum {
/* This is used to skip a subpattern with a {0} quantifier */
OP_SKIPZERO, /* 162 */
/* This is used to identify a DEFINE group during compilation so that it can
be checked for having only one branch. It is changed to OP_FALSE before
be checked for having only one branch. It is changed to OP_FALSE before
compilation finishes. */
OP_DEFINE, /* 163 */
/* This is not an opcode, but is used to check that tables indexed by opcode
@ -1565,7 +1565,7 @@ enum {
some in the past. */
OP_TABLE_LENGTH
};
/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
@ -1708,7 +1708,7 @@ in UTF-8 mode. The code that uses this table must know about such things. */
1, 3, /* THEN, THEN_ARG */ \
1, 1, 1, 1, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */ \
1+IMM2_SIZE, 1, /* CLOSE, SKIPZERO */ \
1 /* DEFINE */
1 /* DEFINE */
/* A magic value for OP_RREF to indicate the "any recursion" condition. */
@ -1830,9 +1830,9 @@ extern const uint8_t PRIV(ucd_stage1)[];
extern const uint16_t PRIV(ucd_stage2)[];
extern const uint32_t PRIV(ucp_gbtable)[];
extern const uint32_t PRIV(ucp_gentype)[];
#ifdef SUPPORT_JIT
#ifdef SUPPORT_JIT
extern const int PRIV(ucp_typerange)[];
#endif
#endif
extern const char *PRIV(unicode_version);
extern const ucp_type_table PRIV(utt)[];
extern const char PRIV(utt_names)[];

View File

@ -39,16 +39,16 @@ POSSIBILITY OF SUCH DAMAGE.
*/
/* This module contains mode-dependent macro and structure definitions. The
/* This module contains mode-dependent macro and structure definitions. The
file is #included by pcre2_internal.h if PCRE2_CODE_UNIT_WIDTH is defined.
These mode-dependent items are kept in a separate file so that they can also be
#included multiple times for different code unit widths by pcre2test in order
to have access to the hidden structures at all supported widths.
#included multiple times for different code unit widths by pcre2test in order
to have access to the hidden structures at all supported widths.
Some of the mode-dependent macros are required at different widths for
different parts of the pcre2test code (in particular, the included
pcre_printint.c file). We undefine them here so that they can be re-defined for
multiple inclusions. Not all of these are used in pcre2test, but it's easier
multiple inclusions. Not all of these are used in pcre2test, but it's easier
just to undefine them all. */
#undef ACROSSCHAR
@ -93,7 +93,7 @@ request for an even bigger limit. For this reason, and also to make the code
easier to maintain, the storing and loading of offsets from the compiled code
unit string is now handled by the macros that are defined here.
The macros are controlled by the value of LINK_SIZE. This defaults to 2, but
The macros are controlled by the value of LINK_SIZE. This defaults to 2, but
values of 2 or 4 are also supported. */
/* ------------------- 8-bit support ------------------ */
@ -173,14 +173,14 @@ values of 2 or 4 are also supported. */
#else
#error Unsupported compiling mode
#endif
#endif
/* --------------- Other mode-specific macros ----------------- */
/* PCRE uses some other (at least) 16-bit quantities that do not change when
the size of offsets changes. There are used for repeat counts and for other
things such as capturing parenthesis numbers in back references.
things such as capturing parenthesis numbers in back references.
Define the number of code units required to hold a 16-bit count/offset, and
macros to load and store such a value. For reasons that I do not understand,
@ -196,7 +196,7 @@ arithmetic results in a signed value. Hence the cast. */
#else /* Code units are 16 or 32 bits */
#define IMM2_SIZE 1
#define GET2(a,n) a[n]
#define PUT2(a,n,d) a[n] = d
#define PUT2(a,n,d) a[n] = d
#endif
/* Other macros that are different for 8-bit mode. The MAX_255 macro checks
@ -346,7 +346,7 @@ because almost all calls are already within a block of UTF-8 only code. */
/* Same as above, but it allows a fully customizable form. */
#define ACROSSCHAR(condition, eptr, action) \
while((condition) && ((eptr) & 0xc0) == 0x80) action
/* Deposit a character into memory, returning the number of code units. */
#define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \
@ -545,10 +545,10 @@ These are all no-ops since all UTF-32 characters fit into one pcre_uchar. */
/* ----------------------- HIDDEN STRUCTURES ----------------------------- */
/* NOTE: All these structures *must* start with a pcre2_memctl structure. The
/* NOTE: All these structures *must* start with a pcre2_memctl structure. The
code that uses them is simpler because it assumes this. */
/* The real general context structure. At present it holds only data for custom
/* The real general context structure. At present it holds only data for custom
memory control. */
typedef struct pcre2_real_general_context {
@ -572,9 +572,9 @@ typedef struct pcre2_real_match_context {
pcre2_memctl memctl;
#ifdef HEAP_MATCH_RECURSE
pcre2_memctl stack_memctl;
#endif
#endif
int (*callout)(pcre2_callout_block *);
void *callout_data;
void *callout_data;
uint32_t match_limit;
uint32_t recursion_limit;
} pcre2_real_match_context;
@ -584,9 +584,9 @@ typedef struct pcre2_real_match_context {
typedef struct pcre2_real_code {
pcre2_memctl memctl; /* Memory control fields */
const uint8_t *tables; /* The character tables */
void *executable_jit; /* Pointer to JIT code */
void *executable_jit; /* Pointer to JIT code */
uint8_t start_bitmap[32]; /* Bitmap for starting code unit < 256 */
size_t blocksize; /* Total (bytes) that was malloc-ed */
size_t blocksize; /* Total (bytes) that was malloc-ed */
uint32_t magic_number; /* Paranoid and endianness check */
uint32_t compile_options; /* Options passed to pcre2_compile() */
uint32_t overall_options; /* Options after processing the pattern */
@ -596,10 +596,10 @@ typedef struct pcre2_real_code {
uint32_t first_codeunit; /* Starting code unit */
uint32_t last_codeunit; /* This codeunit must be seen */
uint16_t bsr_convention; /* What \R matches */
uint16_t newline_convention; /* What is a newline? */
uint16_t newline_convention; /* What is a newline? */
uint16_t max_lookbehind; /* Longest lookbehind (characters) */
uint16_t minlength; /* Minimum length of match */
uint16_t top_bracket; /* Highest numbered group */
uint16_t minlength; /* Minimum length of match */
uint16_t top_bracket; /* Highest numbered group */
uint16_t top_backref; /* Highest numbered back reference */
uint16_t name_entry_size; /* Size (code units) of table entries */
uint16_t name_count; /* Number of name entries in the table */
@ -614,10 +614,10 @@ typedef struct pcre2_real_match_data {
int rc; /* The return code from the match */
PCRE2_SIZE leftchar; /* Offset to leftmost code unit */
PCRE2_SIZE rightchar; /* Offset to rightmost code unit */
PCRE2_SIZE startchar; /* Offset to starting code unit */
PCRE2_SPTR mark; /* Pointer to last mark */
PCRE2_SIZE startchar; /* Offset to starting code unit */
PCRE2_SPTR mark; /* Pointer to last mark */
uint16_t oveccount; /* Number of pairs */
PCRE2_SIZE ovector[1]; /* The first field */
PCRE2_SIZE ovector[1]; /* The first field */
} pcre2_real_match_data;
@ -700,7 +700,7 @@ the system stack. */
typedef struct ovecsave_frame {
struct ovecsave_frame *next; /* Next frame on free chain */
PCRE2_SIZE saved_ovec[1]; /* First vector element */
} ovecsave_frame;
} ovecsave_frame;
/* Structure for items in a linked list that represents an explicit recursive
call within the pattern; used by pcre_match(). */
@ -738,7 +738,7 @@ typedef struct match_block {
pcre2_memctl memctl; /* For general use */
#ifdef HEAP_MATCH_RECURSE
pcre2_memctl stack_memctl; /* For "stack" frames */
#endif
#endif
uint32_t match_call_count; /* As it says */
uint32_t match_limit; /* As it says */
uint32_t match_limit_recursion; /* As it says */
@ -763,7 +763,7 @@ typedef struct match_block {
PCRE2_SPTR start_match_ptr; /* Start of matched string */
PCRE2_SPTR end_match_ptr; /* Subject position at end match */
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
PCRE2_SPTR mark; /* Mark pointer to pass back on success */
PCRE2_SPTR nomatch_mark; /* Mark pointer to pass back on failure */
PCRE2_SPTR once_target; /* Where to back up to for atomic groups */
@ -778,7 +778,7 @@ typedef struct match_block {
PCRE2_UCHAR nl[4]; /* Newline string when fixed */
eptrblock *eptrchain; /* Chain of eptrblocks for tail recursions */
recursion_info *recursive; /* Linked list of recursion data */
ovecsave_frame *ovecsave_chain; /* Linked list of free ovecsave blocks */
ovecsave_frame *ovecsave_chain; /* Linked list of free ovecsave blocks */
void *callout_data; /* To pass back to callouts */
int (*callout)(pcre2_callout_block *); /* Callout function or NULL */
#ifdef HEAP_MATCH_RECURSE
@ -795,7 +795,7 @@ typedef struct dfa_match_block {
PCRE2_SPTR start_subject ; /* Start of the subject string */
PCRE2_SPTR end_subject; /* End of subject string */
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
const uint8_t *tables; /* Character tables */
PCRE2_SIZE start_offset; /* The start offset value */
uint32_t moptions; /* Match options */

View File

@ -72,9 +72,9 @@ Arguments:
length length of subject string (may contain binary zeros)
start_offset where to start in the subject string
options option bits
match_data points to a match_data block
mcontext points to a match context
jit_stack points to a JIT stack
match_data points to a match_data block
mcontext points to a match context
jit_stack points to a JIT stack
Returns: > 0 => success; value is the number of ovector pairs filled
= 0 => success, but ovector is not big enough

View File

@ -60,9 +60,9 @@ compilation of dftables.c, in which case the macro DFTABLES is defined. */
/* This function builds a set of character tables for use by PCRE2 and returns
a pointer to them. They are build using the ctype functions, and consequently
their contents will depend upon the current locale setting. When compiled as
part of the library, the store is obtained via a general context malloc, if
supplied, but when DFTABLES is defined (when compiling the dftables auxiliary
program) malloc() is used, and the function has a different name so as not to
part of the library, the store is obtained via a general context malloc, if
supplied, but when DFTABLES is defined (when compiling the dftables auxiliary
program) malloc() is used, and the function has a different name so as not to
clash with the prototype in pcre2.h.
Arguments: none when DFTABLES is defined

View File

@ -56,7 +56,7 @@ POSSIBILITY OF SUCH DAMAGE.
(PCRE2_ANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \
PCRE2_PARTIAL_SOFT)
#define PUBLIC_JIT_MATCH_OPTIONS \
(PCRE2_NO_UTF_CHECK|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY|\
PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_SOFT|PCRE2_PARTIAL_HARD)
@ -125,24 +125,24 @@ ovector length is always a multiple of 3. */
/* This function is called only when it is known that the offset lies within
the offsets that have so far been used in the match. Note that in caseless
UTF-8 mode, the number of subject bytes matched may be different to the number
of reference bytes. (In theory this could also happen in UTF-16 mode, but it
of reference bytes. (In theory this could also happen in UTF-16 mode, but it
seems unlikely.)
Arguments:
offset index into the offset vector
offset_top top of the used offset vector
offset_top top of the used offset vector
eptr pointer into the subject
mb points to match block
caseless TRUE if caseless
lengthptr pointer for returning the length matched
lengthptr pointer for returning the length matched
Returns: = 0 sucessful match; number of code units matched is set
< 0 no match
> 0 partial match
> 0 partial match
*/
static int
match_ref(PCRE2_SIZE offset, PCRE2_SIZE offset_top, register PCRE2_SPTR eptr,
match_ref(PCRE2_SIZE offset, PCRE2_SIZE offset_top, register PCRE2_SPTR eptr,
match_block *mb, BOOL caseless, PCRE2_SIZE *lengthptr)
{
#if defined SUPPORT_UNICODE
@ -153,7 +153,7 @@ register PCRE2_SPTR p;
PCRE2_SIZE length;
PCRE2_SPTR eptr_start = eptr;
/* Deal with an unset group. The default is no match, but there is an option to
/* Deal with an unset group. The default is no match, but there is an option to
match an empty string. */
if (offset >= offset_top || mb->ovector[offset] == PCRE2_UNSET)
@ -164,7 +164,7 @@ if (offset >= offset_top || mb->ovector[offset] == PCRE2_UNSET)
return 0; /* Match */
}
else return -1; /* No match */
}
}
/* Separate the caseless and UTF cases for speed. */
@ -217,7 +217,7 @@ if (caseless)
if (eptr >= mb->end_subject) return 1; /* Partial match */
cc = UCHAR21TEST(eptr);
cp = UCHAR21TEST(p);
if (TABLE_GET(cp, mb->lcc, cp) != TABLE_GET(cc, mb->lcc, cc))
if (TABLE_GET(cp, mb->lcc, cp) != TABLE_GET(cc, mb->lcc, cc))
return -1; /* No match */
p++;
eptr++;
@ -345,7 +345,7 @@ argument of match(), which never changes. */
}
/* Structure for remembering the local variables in a private frame. Arrange it
/* Structure for remembering the local variables in a private frame. Arrange it
so as to minimize the number of holes. */
typedef struct heapframe {
@ -364,7 +364,7 @@ typedef struct heapframe {
PCRE2_SPTR Xpp;
PCRE2_SPTR Xprev;
PCRE2_SPTR Xsaved_eptr;
eptrblock *Xeptrb;
PCRE2_SIZE Xlength;
@ -377,7 +377,7 @@ typedef struct heapframe {
uint32_t Xrdepth;
uint32_t Xop;
uint32_t Xsave_capture_last;
#ifdef SUPPORT_UNICODE
uint32_t Xprop_value;
int Xprop_type;
@ -401,7 +401,7 @@ typedef struct heapframe {
#ifdef SUPPORT_UNICODE
PCRE2_UCHAR Xocchars[6];
#endif
#endif
} heapframe;
#endif
@ -414,9 +414,9 @@ typedef struct heapframe {
/* When HEAP_MATCH_RECURSE is not defined, the match() function implements
backtrack points by calling itself recursively in all but one case. The one
special case is when processing OP_RECURSE, which specifies recursion in the
pattern. The entire ovector must be saved and restored while processing
OP_RECURSE. If the ovector is small enough, instead of calling match()
directly, op_recurse_ovecsave() is called. This function uses the system stack
pattern. The entire ovector must be saved and restored while processing
OP_RECURSE. If the ovector is small enough, instead of calling match()
directly, op_recurse_ovecsave() is called. This function uses the system stack
to save the ovector while calling match() to process the pattern recursion. */
#ifndef HEAP_MATCH_RECURSE
@ -425,7 +425,7 @@ to save the ovector while calling match() to process the pattern recursion. */
op_recurse_ovecsave(). */
static int
match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart,
match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart,
PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb, uint32_t rdepth);
@ -433,7 +433,7 @@ match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart,
* Process OP_RECURSE, stacking ovector *
*************************************************/
/* When this function is called, mb->recursive has already been updated to
/* When this function is called, mb->recursive has already been updated to
point to a new recursion data block, and all its fields other than ovec_save
have been set.
@ -447,9 +447,9 @@ Arguments:
eptrb pointer to chain of blocks containing eptr at start of
brackets - for testing for empty matches
rdepth the recursion depth
Returns: a match() return code
*/
*/
static int
op_recurse_ovecsave(REGISTER PCRE2_SPTR eptr, PCRE2_SPTR callpat,
@ -472,7 +472,7 @@ data and the last captured value. */
do
{
if (cbegroup) mb->match_function_type = MATCH_CBEGROUP;
rrc = match(eptr, callpat + PRIV(OP_lengths)[*callpat], mstart, offset_top,
rrc = match(eptr, callpat + PRIV(OP_lengths)[*callpat], mstart, offset_top,
mb, eptrb, rdepth + 1);
memcpy(mb->ovector, new_recursive->ovec_save,
mb->offset_end * sizeof(PCRE2_SIZE));
@ -560,7 +560,7 @@ Returns: MATCH_MATCH if matched ) these values are >= 0
*/
static int
match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart,
match(REGISTER PCRE2_SPTR eptr, REGISTER PCRE2_SPTR ecode, PCRE2_SPTR mstart,
PCRE2_SIZE offset_top, match_block *mb, eptrblock *eptrb, uint32_t rdepth)
{
/* These variables do not need to be preserved over recursion in this function,
@ -1382,10 +1382,10 @@ for (;;)
case OP_FALSE:
break;
case OP_TRUE:
condition = TRUE;
break;
break;
/* The condition is an assertion. Call match() to evaluate it - setting
mb->match_function_type to MATCH_CONDASSERT causes it to stop at the end
@ -1475,7 +1475,7 @@ for (;;)
update the last used pointer. */
case OP_ASSERT_ACCEPT:
if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
case OP_ACCEPT:
case OP_END:
@ -1735,7 +1735,7 @@ for (;;)
case OP_RECURSE:
{
ovecsave_frame *fr;
ovecsave_frame *fr;
recursion_info *ri;
uint32_t recno;
@ -1762,15 +1762,15 @@ for (;;)
ecode += 1 + LINK_SIZE;
/* When we are using the system stack for match() recursion we can call a
function that uses the system stack for preserving the ovector while
/* When we are using the system stack for match() recursion we can call a
function that uses the system stack for preserving the ovector while
processing the pattern recursion, but only if the ovector is small
enough. */
#ifndef HEAP_MATCH_RECURSE
if (mb->offset_end <= OP_RECURSE_STACK_SAVE_MAX)
{
rrc = op_recurse_ovecsave(eptr, callpat, mstart, offset_top, mb,
rrc = op_recurse_ovecsave(eptr, callpat, mstart, offset_top, mb,
eptrb, rdepth);
mb->recursive = new_recursive.prevrec;
if (rrc != MATCH_MATCH && rrc != MATCH_ACCEPT) RRETURN(rrc);
@ -1785,10 +1785,10 @@ for (;;)
}
#endif
/* If the ovector is too big, or if we are using the heap for match()
recursion, we have to use the heap for saving the ovector. Used ovecsave
frames are kept on a chain and re-used. This makes a small improvement in
recursion, we have to use the heap for saving the ovector. Used ovecsave
frames are kept on a chain and re-used. This makes a small improvement in
execution time on Linux. */
if (mb->ovecsave_chain != NULL)
{
new_recursive.ovec_save = mb->ovecsave_chain->saved_ovec;
@ -1800,17 +1800,17 @@ for (;;)
mb->offset_end * sizeof(PCRE2_SIZE), mb->memctl.memory_data));
if (fr == NULL) RRETURN(PCRE2_ERROR_NOMEMORY);
new_recursive.ovec_save = fr->saved_ovec;
}
}
memcpy(new_recursive.ovec_save, mb->ovector,
mb->offset_end * sizeof(PCRE2_SIZE));
/* Do the recursion. After processing each alternative, restore the
ovector data and the last captured value. This code has the same overall
logic as the code in the op_recurse_ovecsave() function, but is adapted
to use RMATCH/RRETURN and to release the heap block containing the saved
ovector. */
cbegroup = (*callpat >= OP_SBRA);
do
{
@ -1821,51 +1821,51 @@ for (;;)
mb->offset_end * sizeof(PCRE2_SIZE));
mb->capture_last = new_recursive.saved_capture_last;
mb->recursive = new_recursive.prevrec;
if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
{
fr = (ovecsave_frame *)
((uint8_t *)new_recursive.ovec_save - sizeof(ovecsave_frame *));
fr->next = mb->ovecsave_chain;
mb->ovecsave_chain = fr;
mb->ovecsave_chain = fr;
/* Set where we got to in the subject, and reset the start, in case
it was changed by \K. This *is* propagated back out of a recursion,
for Perl compatibility. */
eptr = mb->end_match_ptr;
mstart = mb->start_match_ptr;
goto RECURSION_MATCHED; /* Exit loop; end processing */
}
/* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
recursion; they cause a NOMATCH for the entire recursion. These codes
are defined in a range that can be tested for. */
if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
{
{
rrc = MATCH_NOMATCH;
goto RECURSION_RETURN;
goto RECURSION_RETURN;
}
/* Any return code other than NOMATCH is an error. */
if (rrc != MATCH_NOMATCH) goto RECURSION_RETURN;
mb->recursive = &new_recursive;
callpat += GET(callpat, 1);
}
while (*callpat == OP_ALT);
RECURSION_RETURN:
mb->recursive = new_recursive.prevrec;
fr = (ovecsave_frame *)
((uint8_t *)new_recursive.ovec_save - sizeof(ovecsave_frame *));
fr->next = mb->ovecsave_chain;
mb->ovecsave_chain = fr;
mb->ovecsave_chain = fr;
RRETURN(rrc);
}
RECURSION_MATCHED:
RECURSION_MATCHED:
break;
/* An alternation is the end of a branch; scan along to find the end of the
@ -1942,7 +1942,7 @@ for (;;)
mb->end_match_ptr = eptr; /* For ONCE_NC */
mb->end_offset_top = offset_top;
mb->start_match_ptr = mstart;
if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
RRETURN(MATCH_MATCH); /* Sets mb->mark */
}
@ -1966,7 +1966,7 @@ for (;;)
{
mb->end_match_ptr = eptr;
mb->start_match_ptr = mstart;
if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
RRETURN(MATCH_MATCH);
}
@ -2010,7 +2010,7 @@ for (;;)
mb->start_match_ptr = mstart; /* In case \K reset it */
mb->end_match_ptr = eptr;
mb->end_offset_top = offset_top;
if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
if (eptr > mb->last_used_ptr) mb->last_used_ptr = eptr;
RRETURN(MATCH_KETRPOS);
}
@ -2230,8 +2230,8 @@ for (;;)
else
{
PCRE2_SPTR nextptr = eptr + 1;
FORWARDCHARTEST(nextptr, mb->end_subject);
if (nextptr > mb->last_used_ptr) mb->last_used_ptr = nextptr;
FORWARDCHARTEST(nextptr, mb->end_subject);
if (nextptr > mb->last_used_ptr) mb->last_used_ptr = nextptr;
GETCHAR(c, eptr);
if ((mb->poptions & PCRE2_UCP) != 0)
{
@ -2282,7 +2282,7 @@ for (;;)
}
else
{
if (eptr >= mb->last_used_ptr) mb->last_used_ptr = eptr + 1;
if (eptr >= mb->last_used_ptr) mb->last_used_ptr = eptr + 1;
#ifdef SUPPORT_UNICODE
if ((mb->poptions & PCRE2_UCP) != 0)
{
@ -2297,7 +2297,7 @@ for (;;)
#endif
cur_is_word = MAX_255(*eptr)
&& ((mb->ctypes[*eptr] & ctype_word) != 0);
}
}
}
/* Now see if the situation is what we want */
@ -2689,7 +2689,7 @@ for (;;)
/* Match a back reference, possibly repeatedly. Look past the end of the
item to see if there is repeat information following.
The OP_REF and OP_REFI opcodes are used for a reference to a numbered group
or to a non-duplicated named group. For a duplicated named group, OP_DNREF
and OP_DNREFI are used. In this case we must scan the list of groups to
@ -2705,7 +2705,7 @@ for (;;)
/* Initializing 'offset' avoids a compiler warning in the REF_REPEAT
code. */
offset = 0;
while (count-- > 0)
{
@ -2721,7 +2721,7 @@ for (;;)
caseless = op == OP_REFI;
offset = GET2(ecode, 1) << 1; /* Doubled ref number */
ecode += 1 + IMM2_SIZE;
/* Set up for repetition, or handle the non-repeated case */
REF_REPEAT:
@ -2750,7 +2750,7 @@ for (;;)
break;
default: /* No repeat follows */
{
{
int rc = match_ref(offset, offset_top, eptr, mb, caseless, &length);
if (rc != 0)
{
@ -2758,7 +2758,7 @@ for (;;)
CHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
}
}
eptr += length;
continue; /* With the main loop */
}
@ -2769,16 +2769,16 @@ for (;;)
also continue if PCRE2_MATCH_UNSET_BACKREF is set, because this makes unset
group be have as a zero-length group. For any other unset cases, carrying
on will result in NOMATCH. */
if (offset < offset_top && mb->ovector[offset] != PCRE2_UNSET)
{
{
if (mb->ovector[offset] == mb->ovector[offset + 1]) continue;
}
else /* Group is not set */
{
if (min == 0 || (mb->poptions & PCRE2_MATCH_UNSET_BACKREF) != 0)
continue;
}
continue;
}
/* First, ensure the minimum number of matches are present. We get back
the length of the reference string explicitly rather than passing the
@ -2787,7 +2787,7 @@ for (;;)
for (i = 1; i <= min; i++)
{
PCRE2_SIZE slength;
int rc = match_ref(offset, offset_top, eptr, mb, caseless, &slength);
int rc = match_ref(offset, offset_top, eptr, mb, caseless, &slength);
if (rc != 0)
{
if (rc > 0) eptr = mb->end_subject; /* Partial match */
@ -2808,13 +2808,13 @@ for (;;)
{
for (fi = min;; fi++)
{
int rc;
int rc;
PCRE2_SIZE slength;
RMATCH(eptr, ecode, offset_top, mb, eptrb, RM14);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (fi >= max) RRETURN(MATCH_NOMATCH);
rc = match_ref(offset, offset_top, eptr, mb, caseless, &slength);
if (rc != 0)
if (rc != 0)
{
if (rc > 0) eptr = mb->end_subject; /* Partial match */
CHECK_PARTIAL();
@ -2825,12 +2825,12 @@ for (;;)
/* Control never gets here */
}
/* If maximizing, find the longest string and work backwards, as long as
/* If maximizing, find the longest string and work backwards, as long as
the matched lengths for each iteration are the same. */
else
{
BOOL samelengths = TRUE;
BOOL samelengths = TRUE;
pp = eptr;
length = mb->ovector[offset+1] - mb->ovector[offset];
@ -2839,7 +2839,7 @@ for (;;)
PCRE2_SIZE slength;
int rc = match_ref(offset, offset_top, eptr, mb, caseless, &slength);
if (rc != 0)
if (rc != 0)
{
/* Can't use CHECK_PARTIAL because we don't want to update eptr in
the soft partial matching case. */
@ -2857,14 +2857,14 @@ for (;;)
eptr += slength;
}
/* If the length matched for each repetition is the same as the length of
the captured group, we can easily work backwards. This is the normal
case. However, in caseless UTF-8 mode there are pairs of case-equivalent
/* If the length matched for each repetition is the same as the length of
the captured group, we can easily work backwards. This is the normal
case. However, in caseless UTF-8 mode there are pairs of case-equivalent
characters whose lengths (in terms of code units) differ. However, this
is very rare, so we handle it by re-matching fewer and fewer times. */
if (samelengths)
{
{
while (eptr >= pp)
{
RMATCH(eptr, ecode, offset_top, mb, eptrb, RM15);
@ -2872,20 +2872,20 @@ for (;;)
eptr -= length;
}
}
/* The rare case of non-matching lengths. Re-scan the repetition for each
/* The rare case of non-matching lengths. Re-scan the repetition for each
iteration. We know that match_ref() will succeed every time. */
else
{
max = i;
max = i;
for (;;)
{
RMATCH(eptr, ecode, offset_top, mb, eptrb, RM68);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (eptr == pp) break; /* Failed after minimal repetition */
eptr = pp;
max--;
max--;
for (i = min; i < max; i++)
{
PCRE2_SIZE slength;
@ -2893,8 +2893,8 @@ for (;;)
eptr += slength;
}
}
}
}
RRETURN(MATCH_NOMATCH);
}
/* Control never gets here */
@ -6417,20 +6417,20 @@ with different endianness. */
if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
return PCRE2_ERROR_BADMODE;
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
options variable for this function. Users of PCRE2 who are not calling the
function directly would like to have a way of setting these flags, in the same
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
options variable for this function. Users of PCRE2 who are not calling the
function directly would like to have a way of setting these flags, in the same
way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be
transferred to the options for this function. The bits are guaranteed to be
adjacent, but do not have the same values. This bit of Boolean trickery assumes
that the match-time bits are not more significant than the flag bits. If by
accident this is not the case, a compile-time division by zero error will
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be
transferred to the options for this function. The bits are guaranteed to be
adjacent, but do not have the same values. This bit of Boolean trickery assumes
that the match-time bits are not more significant than the flag bits. If by
accident this is not the case, a compile-time division by zero error will
occur. */
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
options |= (re->flags & FF) / ((FF & -FF) / (OO & -OO));
#undef FF
@ -6541,7 +6541,7 @@ mb->match_limit = (mcontext->match_limit < re->limit_match)?
mcontext->match_limit : re->limit_match;
mb->match_limit_recursion = (mcontext->recursion_limit < re->limit_recursion)?
mcontext->recursion_limit : re->limit_recursion;
/* Pointers to the individual character tables */
mb->lcc = re->tables + lcc_offset;
@ -6580,7 +6580,7 @@ switch(re->newline_convention)
default: return PCRE2_ERROR_INTERNAL;
}
/* If the expression has got more back references than the offsets supplied can
hold, we get a temporary chunk of memory to use during the matching. Otherwise,
we can use the vector supplied. The size of the ovector is three times the
@ -6854,7 +6854,7 @@ for(;;)
mb->start_match_ptr = start_match;
mb->start_used_ptr = start_match;
mb->last_used_ptr = start_match;
mb->last_used_ptr = start_match;
mb->match_call_count = 0;
mb->match_function_type = 0;
mb->end_offset_top = 0;
@ -6990,7 +6990,7 @@ while (mb->ovecsave_chain != NULL)
ovecsave_frame *this = mb->ovecsave_chain;
mb->ovecsave_chain = this->next;
mb->memctl.free(this, mb->memctl.memory_data);
}
}
/* Fill in fields that are always returned in the match data. */
@ -7057,9 +7057,9 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
match_data->ovector[0] = mb->start_match_ptr - mb->start_subject;
match_data->ovector[1] = mb->end_match_ptr - mb->start_subject;
}
/* Set the remaining returned values */
match_data->startchar = start_match - subject;
match_data->leftchar = mb->start_used_ptr - subject;
match_data->rightchar = ((mb->last_used_ptr > mb->end_match_ptr)?
@ -7068,7 +7068,7 @@ if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
}
/* Control gets here if there has been a partial match, an error, or if the
overall match attempt has failed at all permitted starting positions. Any mark
overall match attempt has failed at all permitted starting positions. Any mark
data is in the nomatch_mark field. */
match_data->mark = mb->nomatch_mark;

View File

@ -72,10 +72,10 @@ return yield;
*************************************************/
PCRE2_EXP_DEFN pcre2_match_data * PCRE2_CALL_CONVENTION
pcre2_match_data_create_from_pattern(pcre2_code *code,
pcre2_match_data_create_from_pattern(pcre2_code *code,
pcre2_general_context *gcontext)
{
return pcre2_match_data_create(((pcre2_real_code *)code)->top_bracket + 1,
return pcre2_match_data_create(((pcre2_real_code *)code)->top_bracket + 1,
gcontext);
}
@ -88,7 +88,7 @@ return pcre2_match_data_create(((pcre2_real_code *)code)->top_bracket + 1,
PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
pcre2_match_data_free(pcre2_match_data *match_data)
{
if (match_data != NULL)
if (match_data != NULL)
match_data->memctl.free(match_data, match_data->memctl.memory_data);
}

View File

@ -60,9 +60,9 @@ http://unicode.org/unicode/reports/tr18/. */
* Check for newline at given position *
*************************************************/
/* This function is called only via the IS_NEWLINE macro, which does so only
/* This function is called only via the IS_NEWLINE macro, which does so only
when the newline type is NLTYPE_ANY or NLTYPE_ANYCRLF. The case of a fixed
newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the code unit
newline (NLTYPE_FIXED) is handled inline. It is guaranteed that the code unit
pointed to by ptr is less than the end of the string.
Arguments:
@ -76,7 +76,7 @@ Returns: TRUE or FALSE
*/
BOOL
PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr,
PRIV(is_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR endptr,
uint32_t *lenptr, BOOL utf)
{
uint32_t c;
@ -90,15 +90,15 @@ c = *ptr;
if (type == NLTYPE_ANYCRLF) switch(c)
{
case CHAR_LF:
*lenptr = 1;
case CHAR_LF:
*lenptr = 1;
return TRUE;
case CHAR_CR:
case CHAR_CR:
*lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
return TRUE;
default:
default:
return FALSE;
}
@ -111,8 +111,8 @@ else switch(c)
#endif
case CHAR_LF:
case CHAR_VT:
case CHAR_FF:
*lenptr = 1;
case CHAR_FF:
*lenptr = 1;
return TRUE;
case CHAR_CR:
@ -121,25 +121,25 @@ else switch(c)
#ifndef EBCDIC
#if PCRE2_CODE_UNIT_WIDTH == 8
case CHAR_NEL:
*lenptr = utf? 2 : 1;
case CHAR_NEL:
*lenptr = utf? 2 : 1;
return TRUE;
case 0x2028: /* LS */
case 0x2029: /* PS */
*lenptr = 3;
return TRUE;
*lenptr = 3;
return TRUE;
#else /* 16-bit or 32-bit code units */
case CHAR_NEL:
case 0x2028: /* LS */
case 0x2029: /* PS */
*lenptr = 1;
case 0x2029: /* PS */
*lenptr = 1;
return TRUE;
#endif
#endif /* Not EBCDIC */
default:
default:
return FALSE;
}
}
@ -166,7 +166,7 @@ Returns: TRUE or FALSE
*/
BOOL
PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr,
PRIV(was_newline)(PCRE2_SPTR ptr, uint32_t type, PCRE2_SPTR startptr,
uint32_t *lenptr, BOOL utf)
{
uint32_t c;
@ -190,11 +190,11 @@ if (type == NLTYPE_ANYCRLF) switch(c)
*lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
return TRUE;
case CHAR_CR:
*lenptr = 1;
case CHAR_CR:
*lenptr = 1;
return TRUE;
default:
default:
return FALSE;
}
@ -211,31 +211,31 @@ else switch(c)
#endif
case CHAR_VT:
case CHAR_FF:
case CHAR_CR:
*lenptr = 1;
case CHAR_CR:
*lenptr = 1;
return TRUE;
#ifndef EBCDIC
#if PCRE2_CODE_UNIT_WIDTH == 8
case CHAR_NEL:
*lenptr = utf? 2 : 1;
case CHAR_NEL:
*lenptr = utf? 2 : 1;
return TRUE;
case 0x2028: /* LS */
case 0x2029: /* PS */
*lenptr = 3;
*lenptr = 3;
return TRUE;
#else /* 16-bit or 32-bit code units */
case CHAR_NEL:
case 0x2028: /* LS */
case 0x2029: /* PS */
*lenptr = 1;
return TRUE;
*lenptr = 1;
return TRUE;
#endif
#endif /* Not EBCDIC */
default:
default:
return FALSE;
}
}

View File

@ -89,17 +89,17 @@ if (where == NULL) /* Requests field length */
case PCRE2_INFO_NAMECOUNT:
case PCRE2_INFO_NEWLINE:
case PCRE2_INFO_RECURSIONLIMIT:
return sizeof(uint32_t);
return sizeof(uint32_t);
case PCRE2_INFO_FIRSTBITMAP:
return sizeof(const uint8_t *);
case PCRE2_INFO_JITSIZE:
case PCRE2_INFO_SIZE:
return sizeof(size_t);
return sizeof(size_t);
case PCRE2_INFO_NAMETABLE:
return sizeof(PCRE2_SPTR);
return sizeof(PCRE2_SPTR);
}
}

View File

@ -41,8 +41,8 @@ POSSIBILITY OF SUCH DAMAGE.
/* This module contains a PCRE private debugging function for printing out the
internal form of a compiled regular expression, along with some supporting
local functions. This source file is #included in pcre2test.c at each supported
code unit width, with PCRE2_SUFFIX set appropriately, just like the functions
local functions. This source file is #included in pcre2test.c at each supported
code unit width, with PCRE2_SUFFIX set appropriately, just like the functions
that comprise the library. */
@ -82,9 +82,9 @@ Arguments:
f file to write to
ptr pointer to first code unit of the character
utf TRUE if string is UTF (will be FALSE if UTF is not supported)
Returns: number of additional code units used
*/
*/
static unsigned int
print_char(FILE *f, PCRE2_SPTR ptr, BOOL utf)
@ -105,7 +105,7 @@ if (utf)
one_code_unit = (c & 0xfffff800u) != 0xd800u;
#endif /* CODE_UNIT_WIDTH */
}
#endif /* SUPPORT_UNICODE */
#endif /* SUPPORT_UNICODE */
/* Handle a valid one-code-unit character at any width. */
@ -115,10 +115,10 @@ if (one_code_unit)
else if (c < 0x80) fprintf(f, "\\x%02x", c);
else fprintf(f, "\\x{%02x}", c);
return 0;
}
}
/* Code for invalid UTF code units and multi-unit UTF characters is different
for each width. If UTF is not supported, control should never get here, but we
/* Code for invalid UTF code units and multi-unit UTF characters is different
for each width. If UTF is not supported, control should never get here, but we
need a return statement to keep the compiler happy. */
#ifndef SUPPORT_UNICODE
@ -134,10 +134,10 @@ if ((c & 0xc0) != 0xc0)
{
fprintf(f, "\\X{%x}", c); /* Invalid starting byte */
return 0;
}
}
else
{
int i;
int i;
int a = utf8_table4[c & 0x3f]; /* Number of additional bytes */
int s = 6*a;
c = (c & utf8_table3[a]) << s;
@ -153,7 +153,7 @@ else
}
fprintf(f, "\\x{%x}", c);
return a;
}
}
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
/* UTF-16: rather than swallow a low surrogate, just stop if we hit a bad one.
@ -173,7 +173,7 @@ return 1;
/* For UTF-32 we get here only for a malformed code unit, which should only
occur if the sanity check has been turned off. Print it with \X instead of \x
as an indication. */
#if PCRE2_CODE_UNIT_WIDTH == 32
fprintf(f, "\\X{%x}", c);
return 0;
@ -187,15 +187,15 @@ return 0;
* Print string as a list of code units *
*************************************************/
/* This takes no account of UTF as it always prints each individual code unit.
/* This takes no account of UTF as it always prints each individual code unit.
The string is zero-terminated.
Arguments:
f file to write to
ptr point to the string
Returns: nothing
*/
*/
static void
print_custring(FILE *f, PCRE2_SPTR ptr)
@ -213,9 +213,9 @@ while (*ptr != '\0')
* Find Unicode property name *
*************************************************/
/* When there is no UTF/UCP support, the table of names does not exist. This
function should not be called in such configurations, because a pattern that
tries to use Unicode properties won't compile. Rather than put lots of #ifdefs
/* When there is no UTF/UCP support, the table of names does not exist. This
function should not be called in such configurations, because a pattern that
tries to use Unicode properties won't compile. Rather than put lots of #ifdefs
into the main code, however, we just put one into this function. */
static const char *
@ -244,15 +244,15 @@ return "??";
/* "Normal" properties can be printed from tables. The PT_CLIST property is a
pseudo-property that contains a pointer to a list of case-equivalent
characters.
characters.
Arguments:
f file to write to
code pointer in the compiled code
before text to print before
after text to print after
Returns: nothing
Returns: nothing
*/
static void
@ -281,14 +281,14 @@ else
/* The print_lengths flag controls whether offsets and lengths of items are
printed. Lenths can be turned off from pcre2test so that automatic tests on
bytecode can be written that do not depend on the value of LINK_SIZE.
bytecode can be written that do not depend on the value of LINK_SIZE.
Arguments:
re a compiled pattern
f the file to write to
print_lenghts show various lengths
Returns: nothing
print_lenghts show various lengths
Returns: nothing
*/
static void
@ -460,7 +460,7 @@ for(;;)
case OP_TYPEMINQUERY:
case OP_TYPEPOSQUERY:
fprintf(f, " %s ", flag);
if (*code >= OP_TYPESTAR)
{
if (code[1] == OP_PROP || code[1] == OP_NOTPROP)

View File

@ -39,7 +39,7 @@ POSSIBILITY OF SUCH DAMAGE.
*/
/* This module contains internal functions for comparing and finding the length
of strings. These are used instead of strcmp() etc because the standard
of strings. These are used instead of strcmp() etc because the standard
functions work only on 8-bit data. */
@ -54,7 +54,7 @@ functions work only on 8-bit data. */
* Compare two zero-terminated PCRE2 strings *
*************************************************/
/*
/*
Arguments:
str1 first string
str2 second string
@ -80,7 +80,7 @@ return 0;
* Compare zero-terminated PCRE2 & 8-bit strings *
*************************************************/
/* As the 8-bit string is almost always a literal, its type is specified as
/* As the 8-bit string is almost always a literal, its type is specified as
const char *.
Arguments:
@ -108,7 +108,7 @@ return 0;
* Compare two PCRE2 strings, given a length *
*************************************************/
/*
/*
Arguments:
str1 first string
str2 second string
@ -135,7 +135,7 @@ return 0;
* Compare PCRE2 string to 8-bit string by length *
*************************************************/
/* As the 8-bit string is almost always a literal, its type is specified as
/* As the 8-bit string is almost always a literal, its type is specified as
const char *.
Arguments:
@ -164,7 +164,7 @@ return 0;
* Find the length of a PCRE2 string *
*************************************************/
/*
/*
Argument: the string
Returns: the length
*/
@ -185,9 +185,9 @@ return c;
/* Arguments:
str1 buffer to receive the string
str2 8-bit string to be copied
Returns: the number of code units used (excluding trailing zero)
*/
*/
int
PRIV(strcpy_c8)(PCRE2_UCHAR *str1, const char *str2)

View File

@ -74,7 +74,7 @@ Arguments:
code pointer to start of group (the bracket)
startcode pointer to start of the whole pattern's code
recurse_depth RECURSE depth
utf UTF flag
utf UTF flag
Returns: the minimum length
-1 if \C in UTF-8 mode or (*ACCEPT) was encountered
@ -388,10 +388,10 @@ for (;;)
if ((re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
{
int count = GET2(cc, 1+IMM2_SIZE);
PCRE2_UCHAR *slot =
PCRE2_UCHAR *slot =
(PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) +
GET2(cc, 1) * re->name_entry_size;
GET2(cc, 1) * re->name_entry_size;
d = INT_MAX;
while (count-- > 0)
{
@ -579,7 +579,7 @@ for (;;)
*************************************************/
/* Given a character, set its first code unit's bit in the table, and also the
corresponding bit for the other version of a letter if we are caseless.
corresponding bit for the other version of a letter if we are caseless.
Arguments:
re points to the regex block
@ -590,20 +590,20 @@ Arguments:
Returns: pointer after the character
*/
static PCRE2_SPTR
static PCRE2_SPTR
set_table_bit(pcre2_real_code *re, PCRE2_SPTR p, BOOL caseless, BOOL utf)
{
uint32_t c = *p++; /* First code unit */
(void)utf; /* Stop compiler warning when UTF not supported */
/* In 16-bit and 32-bit modes, code units greater than 0xff set the bit for
/* In 16-bit and 32-bit modes, code units greater than 0xff set the bit for
0xff. */
#if PCRE2_CODE_UNIT_WIDTH != 8
if (c > 0xff) SET_BIT(0xff); else
if (c > 0xff) SET_BIT(0xff); else
#endif
SET_BIT(c);
SET_BIT(c);
/* In UTF-8 or UTF-16 mode, pick up the remaining code units in order to find
the end of the character, even when caseless. */
@ -617,7 +617,7 @@ if (utf)
if ((c & 0xfc00) == 0xd800) GETUTF16INC(c, p);
#endif
}
#endif /* SUPPORT_UNICODE */
#endif /* SUPPORT_UNICODE */
/* If caseless, handle the other case of the character. */
@ -669,7 +669,7 @@ static void
set_type_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit)
{
register uint32_t c;
for (c = 0; c < table_limit; c++)
for (c = 0; c < table_limit; c++)
re->start_bitmap[c] |= re->tables[c+cbits_offset+cbit_type];
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
if (table_limit == 32) return;
@ -710,7 +710,7 @@ static void
set_nottype_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit)
{
register uint32_t c;
for (c = 0; c < table_limit; c++)
for (c = 0; c < table_limit; c++)
re->start_bitmap[c] |= ~(re->tables[c+cbits_offset+cbit_type]);
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmap[c] = 0xff;
@ -724,10 +724,10 @@ if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmap[c] = 0xff;
*************************************************/
/* This function scans a compiled unanchored expression recursively and
attempts to build a bitmap of the set of possible starting code units whose
values are less than 256. In 16-bit and 32-bit mode, values above 255 all cause
attempts to build a bitmap of the set of possible starting code units whose
values are less than 256. In 16-bit and 32-bit mode, values above 255 all cause
the 255 bit to be set. When calling set[_not]_type_bits() in UTF-8 (sic) mode
we pass a value of 16 rather than 32 as the final argument. (See comments in
we pass a value of 16 rather than 32 as the final argument. (See comments in
those functions for the reason.)
The SSB_CONTINUE return is useful for parenthesized groups in patterns such as
@ -769,8 +769,8 @@ do
while (try_next) /* Loop for items in this branch */
{
int rc;
uint8_t *classmap = NULL;
uint8_t *classmap = NULL;
switch(*tcode)
{
/* If we reach something we don't understand, it means a new opcode has
@ -854,31 +854,31 @@ do
case OP_THEN:
case OP_THEN_ARG:
return SSB_FAIL;
/* A "real" property test implies no starting bits, but the fake property
PT_CLIST identifies a list of characters. These lists are short, as they
are used for characters with more than one "other case", so there is no
point in recognizing them for OP_NOTPROP. */
case OP_PROP:
if (tcode[1] != PT_CLIST) return SSB_FAIL;
{
{
const uint32_t *p = PRIV(ucd_caseless_sets) + tcode[2];
while ((c = *p++) < NOTACHAR)
{
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
if (utf)
{
PCRE2_UCHAR buff[6];
(void)PRIV(ord2utf)(c, buff);
c = buff[0];
}
#endif
}
#endif
if (c > 0xff) SET_BIT(0xff); else SET_BIT(c);
}
}
}
try_next = FALSE;
break;
break;
/* We can ignore word boundary tests. */
@ -1032,14 +1032,14 @@ do
SET_BIT(CHAR_HT);
SET_BIT(CHAR_SPACE);
/* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set
/* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set
the bits for 0xA0 and for code units >= 255, independently of UTF. */
#if PCRE2_CODE_UNIT_WIDTH != 8
SET_BIT(0xA0);
SET_BIT(0xFF);
#else
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
units of horizontal space characters. */
#ifdef SUPPORT_UNICODE
@ -1052,7 +1052,7 @@ do
}
else
#endif
/* For the 8-bit library not in UTF-8 mode, set the bit for 0xA0, unless
/* For the 8-bit library not in UTF-8 mode, set the bit for 0xA0, unless
the code is EBCDIC. */
{
#ifndef EBCDIC
@ -1060,7 +1060,7 @@ do
#endif /* Not EBCDIC */
}
#endif /* 8-bit support */
try_next = FALSE;
break;
@ -1071,16 +1071,16 @@ do
SET_BIT(CHAR_FF);
SET_BIT(CHAR_CR);
/* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set
/* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set
the bits for NEL and for code units >= 255, independently of UTF. */
#if PCRE2_CODE_UNIT_WIDTH != 8
SET_BIT(CHAR_NEL);
SET_BIT(0xFF);
#else
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
units of vertical space characters. */
#ifdef SUPPORT_UNICODE
if (utf)
{
@ -1093,8 +1093,8 @@ do
{
SET_BIT(CHAR_NEL);
}
#endif /* 8-bit support */
#endif /* 8-bit support */
try_next = FALSE;
break;
@ -1166,7 +1166,7 @@ do
case OP_ANY:
case OP_ALLANY:
return SSB_FAIL;
case OP_HSPACE:
SET_BIT(CHAR_HT);
SET_BIT(CHAR_SPACE);
@ -1178,7 +1178,7 @@ do
SET_BIT(0xA0);
SET_BIT(0xFF);
#else
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
units of horizontal space characters. */
#ifdef SUPPORT_UNICODE
@ -1191,7 +1191,7 @@ do
}
else
#endif
/* For the 8-bit library not in UTF-8 mode, set the bit for 0xA0, unless
/* For the 8-bit library not in UTF-8 mode, set the bit for 0xA0, unless
the code is EBCDIC. */
{
#ifndef EBCDIC
@ -1208,16 +1208,16 @@ do
SET_BIT(CHAR_FF);
SET_BIT(CHAR_CR);
/* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set
/* For the 16-bit and 32-bit libraries (which can never be EBCDIC), set
the bits for NEL and for code units >= 255, independently of UTF. */
#if PCRE2_CODE_UNIT_WIDTH != 8
SET_BIT(CHAR_NEL);
SET_BIT(0xFF);
#else
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
/* For the 8-bit library in UTF-8 mode, set the bits for the first code
units of vertical space characters. */
#ifdef SUPPORT_UNICODE
if (utf)
{
@ -1230,7 +1230,7 @@ do
{
SET_BIT(CHAR_NEL);
}
#endif /* 8-bit support */
#endif /* 8-bit support */
break;
case OP_NOT_DIGIT:
@ -1260,8 +1260,8 @@ do
tcode += 2;
break;
/* Extended class: if there are any property checks, or if this is a
/* Extended class: if there are any property checks, or if this is a
negative XCLASS without a map, give up. If there are no property checks,
there must be wide characters on the XCLASS list, because otherwise an
XCLASS would not have been created. This means that code points >= 255
@ -1270,19 +1270,19 @@ do
#ifdef SUPPORT_WIDE_CHARS
case OP_XCLASS:
if ((tcode[1 + LINK_SIZE] & XCL_HASPROP) != 0 ||
(tcode[1 + LINK_SIZE] & (XCL_MAP|XCL_NOT)) == XCL_NOT)
(tcode[1 + LINK_SIZE] & (XCL_MAP|XCL_NOT)) == XCL_NOT)
return SSB_FAIL;
/* We have a positive XCLASS or a negative one without a map. Set up the
map pointer if there is one, and fall through. */
classmap = ((tcode[1 + LINK_SIZE] & XCL_MAP) == 0)? NULL :
(uint8_t *)(tcode + 1 + LINK_SIZE + 1);
#endif
/* Enter here for a negative non-XCLASS. In the 8-bit library, if we are
in UTF mode, any byte with a value >= 0xc4 is a potentially valid starter
because it starts a character with a value > 255. In 8-bit non-UTF mode,
because it starts a character with a value > 255. In 8-bit non-UTF mode,
there is no difference between CLASS and NCLASS. In all other wide
character modes, set the 0xFF bit to indicate code units >= 255. */
@ -1298,26 +1298,26 @@ do
#endif
/* Fall through */
/* Enter here for a positive non-XCLASS. If we have fallen through from
an XCLASS, classmap will already be set; just advance the code pointer.
/* Enter here for a positive non-XCLASS. If we have fallen through from
an XCLASS, classmap will already be set; just advance the code pointer.
Otherwise, set up classmap for a a non-XCLASS and advance past it. */
case OP_CLASS:
if (*tcode == OP_XCLASS) tcode += GET(tcode, 1); else
{
{
classmap = (uint8_t *)(++tcode);
tcode += 32 / sizeof(PCRE2_UCHAR);
}
/* When wide characters are supported, classmap may be NULL. In UTF-8
(sic) mode, the bits in a class bit map correspond to character values,
not to byte values. However, the bit map we are constructing is for byte
values. So we have to do a conversion for characters whose code point is
values. So we have to do a conversion for characters whose code point is
greater than 127. In fact, there are only two possible starting bytes for
characters in the range 128 - 255. */
if (classmap != NULL)
{
{
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
if (utf)
{
@ -1334,11 +1334,11 @@ do
}
else
#endif
/* In all modes except UTF-8, the two bit maps are compatible. */
/* In all modes except UTF-8, the two bit maps are compatible. */
{
for (c = 0; c < 32; c++) re->start_bitmap[c] |= classmap[c];
}
}
}
/* Act on what follows the class. For a zero minimum repeat, continue;
@ -1384,13 +1384,13 @@ return yield;
*************************************************/
/* This function is handed a compiled expression that it must study to produce
information that will speed up the matching.
information that will speed up the matching.
Argument: points to the compiled expression
Returns: 0 normally; non-zero should never normally occur
1 unknown opcode in set_start_bits
2 missing capturing bracket
3 unknown opcode in find_minlength
3 unknown opcode in find_minlength
*/
int
@ -1402,7 +1402,7 @@ BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
/* Find start of compiled code */
code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) +
code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) +
re->name_entry_size * re->name_count;
/* For an anchored pattern, or an unanchored pattern that has a first code
@ -1422,17 +1422,17 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0 &&
switch(min = find_minlength(re, code, code, 0, utf))
{
case -1: /* \C in UTF mode or (*ACCEPT) was encountered */
break;
break;
case -2:
return 2; /* missing capturing bracket */
case -3:
return 3; /* unrecognized opcode */
default:
re->minlength = min;
break;
break;
}
return 0;

View File

@ -81,7 +81,7 @@ for (entry = first; entry <= last; entry += entrysize)
{
uint16_t n = GET2(entry, 0);
if (n < match_data->oveccount && match_data->ovector[n*2] != PCRE2_UNSET)
return pcre2_substring_copy_bynumber(match_data, n, buffer, sizeptr);
return pcre2_substring_copy_bynumber(match_data, n, buffer, sizeptr);
}
return PCRE2_ERROR_NOSUBSTRING;
}
@ -108,7 +108,7 @@ Returns: if successful: 0
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_copy_bynumber(pcre2_match_data *match_data,
pcre2_substring_copy_bynumber(pcre2_match_data *match_data,
unsigned int stringnumber, PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
{
PCRE2_SIZE left, right;
@ -119,7 +119,7 @@ if (stringnumber >= match_data->oveccount ||
(left = match_data->ovector[stringnumber*2]) == PCRE2_UNSET)
return PCRE2_ERROR_NOSUBSTRING;
right = match_data->ovector[stringnumber*2+1];
if (right - left + 1 > *sizeptr) return PCRE2_ERROR_NOMEMORY;
if (right - left + 1 > *sizeptr) return PCRE2_ERROR_NOMEMORY;
while (left < right) buffer[p++] = subject[left++];
buffer[p] = 0;
*sizeptr = p;
@ -140,7 +140,7 @@ Arguments:
match_data pointer to match_data
stringname the name of the required substring
stringptr where to put the pointer to the new memory
sizeptr where to put the length of the substring
sizeptr where to put the length of the substring
Returns: if successful: zero
if not successful, a negative value:
@ -162,7 +162,7 @@ for (entry = first; entry <= last; entry += entrysize)
{
uint16_t n = GET2(entry, 0);
if (n < match_data->oveccount && match_data->ovector[n*2] != PCRE2_UNSET)
return pcre2_substring_get_bynumber(match_data, n, stringptr, sizeptr);
return pcre2_substring_get_bynumber(match_data, n, stringptr, sizeptr);
}
return PCRE2_ERROR_NOSUBSTRING;
}
@ -180,7 +180,7 @@ Arguments:
match_data points to match data
stringnumber the number of the required substring
stringptr where to put a pointer to the new memory
sizeptr where to put the size of the substring
sizeptr where to put the size of the substring
Returns: if successful: zero
if not successful a negative error code:
@ -189,7 +189,7 @@ Returns: if successful: zero
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_get_bynumber(pcre2_match_data *match_data,
pcre2_substring_get_bynumber(pcre2_match_data *match_data,
unsigned int stringnumber, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
{
PCRE2_SIZE left, right;
@ -204,8 +204,8 @@ if (stringnumber >= match_data->oveccount ||
return PCRE2_ERROR_NOSUBSTRING;
right = match_data->ovector[stringnumber*2+1];
block = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
(right-left+1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)match_data);
block = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
(right-left+1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)match_data);
if (block == NULL) return PCRE2_ERROR_NOMEMORY;
yield = (PCRE2_UCHAR *)((char *)block + sizeof(pcre2_memctl));
@ -222,7 +222,7 @@ return 0;
* Free memory obtained by get_substring *
*************************************************/
/*
/*
Argument: the result of a previous pcre2_substring_get_byxxx()
Returns: nothing
*/
@ -246,7 +246,7 @@ permits duplicate names, the first substring that is set is chosen.
Arguments:
match_data pointer to match data
stringname the name of the required substring
sizeptr where to put the length
sizeptr where to put the length
Returns: 0 if successful, else a negative error number
*/
@ -265,7 +265,7 @@ for (entry = first; entry <= last; entry += entrysize)
{
uint16_t n = GET2(entry, 0);
if (n < match_data->oveccount && match_data->ovector[n*2] != PCRE2_UNSET)
return pcre2_substring_length_bynumber(match_data, n, sizeptr);
return pcre2_substring_length_bynumber(match_data, n, sizeptr);
}
return PCRE2_ERROR_NOSUBSTRING;
}
@ -281,7 +281,7 @@ return PCRE2_ERROR_NOSUBSTRING;
Arguments:
match_data pointer to match data
stringnumber the number of the required substring
sizeptr where to put the length
sizeptr where to put the length
Returns: 0 if successful, else a negative error number
*/
@ -296,7 +296,7 @@ if (stringnumber >= match_data->oveccount ||
return PCRE2_ERROR_NOSUBSTRING;
*sizeptr = match_data->ovector[stringnumber*2 + 1] -
match_data->ovector[stringnumber*2];
return 0;
return 0;
}
@ -307,23 +307,23 @@ return 0;
/* This function gets one chunk of memory and builds a list of pointers and all
the captured substrings in it. A NULL pointer is put on the end of the list.
The substrings are zero-terminated, but also, if the final argument is
non-NULL, a list of lengths is also returned. This allows binary data to be
The substrings are zero-terminated, but also, if the final argument is
non-NULL, a list of lengths is also returned. This allows binary data to be
handled.
Arguments:
match_data points to the match data
listptr set to point to the list of pointers
lengthsptr set to point to the list of lengths (may be NULL)
lengthsptr set to point to the list of lengths (may be NULL)
Returns: if successful: 0
if not successful, a negative error code:
PCRE2_ERROR_NOMEMORY: failed to get memory,
or a match failure code
or a match failure code
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_list_get(pcre2_match_data *match_data, PCRE2_UCHAR ***listptr,
pcre2_substring_list_get(pcre2_match_data *match_data, PCRE2_UCHAR ***listptr,
PCRE2_SIZE **lengthsptr)
{
int i, count, count2;
@ -343,22 +343,22 @@ if (lengthsptr != NULL) size += sizeof(PCRE2_SIZE) * count; /* For lengths */
for (i = 0; i < count2; i += 2)
size += sizeof(PCRE2_UCHAR *) + CU2BYTES(ovector[i+1] - ovector[i] + 1);
memp = PRIV(memctl_malloc)(size, (pcre2_memctl *)match_data);
memp = PRIV(memctl_malloc)(size, (pcre2_memctl *)match_data);
if (memp == NULL) return PCRE2_ERROR_NOMEMORY;
*listptr = listp = (PCRE2_UCHAR **)((char *)memp + sizeof(pcre2_memctl));
lensp = (PCRE2_SIZE *)((char *)listp + sizeof(PCRE2_UCHAR *) * (count + 1));
if (lengthsptr == NULL)
{
sp = (PCRE2_UCHAR *)lensp;
sp = (PCRE2_UCHAR *)lensp;
lensp = NULL;
}
else
{
*lengthsptr = lensp;
sp = (PCRE2_UCHAR *)((char *)lensp + sizeof(PCRE2_SIZE) * count);
}
{
*lengthsptr = lensp;
sp = (PCRE2_UCHAR *)((char *)lensp + sizeof(PCRE2_SIZE) * count);
}
for (i = 0; i < count2; i += 2)
{
@ -398,9 +398,9 @@ memctl->free(memctl, memctl->memory_data);
* Find (multiple) entries for named string *
*************************************************/
/* This function scans the nametable for a given name, using binary chop. It
returns either two pointers to the entries in the table, or, if no pointers are
given, the number of a group with the given name. If duplicate names are
/* This function scans the nametable for a given name, using binary chop. It
returns either two pointers to the entries in the table, or, if no pointers are
given, the number of a group with the given name. If duplicate names are
permitted, this may not be unique.
Arguments:
@ -428,11 +428,11 @@ while (top > bot)
uint16_t mid = (top + bot) / 2;
PCRE2_SPTR entry = nametable + entrysize*mid;
int c = PRIV(strcmp)(stringname, entry + IMM2_SIZE);
if (c == 0)
if (c == 0)
{
PCRE2_SPTR first;
PCRE2_SPTR last;
PCRE2_SPTR lastentry;
PCRE2_SPTR lastentry;
if (firstptr == NULL) return GET2(entry, 0);
lastentry = nametable + entrysize * (code->name_count - 1);
first = last = entry;
@ -447,7 +447,7 @@ while (top > bot)
last += entrysize;
}
*firstptr = first;
*lastptr = last;
*lastptr = last;
return entrysize;
}
if (c > 0) bot = mid + 1; else top = mid;
@ -462,7 +462,7 @@ return PCRE2_ERROR_NOSUBSTRING;
*************************************************/
/* This function is a convenience wrapper for pcre2_substring_nametable_scan()
when it is known that names are unique. If there are duplicate names, it is not
when it is known that names are unique. If there are duplicate names, it is not
defined which number is returned.
Arguments:
@ -474,7 +474,7 @@ Returns: the number of the named parenthesis, or a negative number
*/
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_substring_number_from_name(const pcre2_code *code,
pcre2_substring_number_from_name(const pcre2_code *code,
PCRE2_SPTR stringname)
{
return pcre2_substring_nametable_scan(code, stringname, NULL, NULL);

View File

@ -232,7 +232,7 @@ enum {
ucp_Takri,
/* New for Unicode 7.0.0: */
ucp_Bassa_Vah,
ucp_Caucasian_Albanian,
ucp_Caucasian_Albanian,
ucp_Duployan,
ucp_Elbasan,
ucp_Grantha,

View File

@ -154,11 +154,11 @@ for (p = string; length-- > 0; p++)
*erroroffset = (int)(p - string);
switch(ab - length)
{
case 1: return PCRE2_ERROR_UTF8_ERR1;
case 2: return PCRE2_ERROR_UTF8_ERR2;
case 3: return PCRE2_ERROR_UTF8_ERR3;
case 4: return PCRE2_ERROR_UTF8_ERR4;
case 5: return PCRE2_ERROR_UTF8_ERR5;
case 1: return PCRE2_ERROR_UTF8_ERR1;
case 2: return PCRE2_ERROR_UTF8_ERR2;
case 3: return PCRE2_ERROR_UTF8_ERR3;
case 4: return PCRE2_ERROR_UTF8_ERR4;
case 5: return PCRE2_ERROR_UTF8_ERR5;
}
}
length -= ab; /* Length remaining */
@ -314,7 +314,7 @@ return 0;
/* ----------------- Check a UTF-16 string ----------------- */
#elif PCRE2_CODE_UNIT_WIDTH == 16
#elif PCRE2_CODE_UNIT_WIDTH == 16
/* There's not so much work, nor so many errors, for UTF-16.
PCRE2_ERROR_UTF16_ERR1 Missing low surrogate at the end of the string

View File

@ -60,7 +60,7 @@ might contain codepoints above 255 and/or Unicode properties.
Arguments:
c the character
data points to the flag code unit of the XCLASS data
utf TRUE if in UTF mode
utf TRUE if in UTF mode
Returns: TRUE if character matches, else FALSE
*/
@ -261,7 +261,7 @@ while ((t = *data++) != XCL_END)
data += 2;
}
#else
(void)utf; /* Avoid compiler warning */
(void)utf; /* Avoid compiler warning */
#endif /* SUPPORT_UNICODE */
}

View File

@ -8,7 +8,7 @@ pcre2sample documentation for a short discussion ("man pcre2sample" if you have
the PCRE2 man pages installed). PCRE2 is a revised API for the library, and is
incompatible with the original PCRE API.
There are actually three libraries, each supporting a different code unit
There are actually three libraries, each supporting a different code unit
width. This demonstration program uses the 8-bit library.
In Unix-like environments, if PCRE2 is installed in your standard system
@ -39,8 +39,8 @@ the following line. */
/* #define PCRE2_STATIC */
/* This macro must be defined before including pcre2.h. For a program that uses
only one code unit width, it makes it possible to use generic function names
/* This macro must be defined before including pcre2.h. For a program that uses
only one code unit width, it makes it possible to use generic function names
such as pcre2_compile(). */
#define PCRE2_CODE_UNIT_WIDTH 8
@ -124,7 +124,7 @@ subject_length = strlen((char *)subject);
re = pcre2_compile(
pattern, /* the pattern */
PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */
0, /* default options */
&errornumber, /* for error number */
&erroroffset, /* for error offset */
@ -134,9 +134,9 @@ re = pcre2_compile(
if (re == NULL)
{
PCRE2_UCHAR buffer[256];
PCRE2_UCHAR buffer[256];
pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset,
printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset,
buffer);
return 1;
}
@ -180,7 +180,7 @@ if (rc < 0)
return 1;
}
/* Match succeded. Get a pointer to the output vector, where string offsets are
/* Match succeded. Get a pointer to the output vector, where string offsets are
stored. */
ovector = pcre2_get_ovector_pointer(match_data);
@ -193,7 +193,7 @@ printf("\nMatch succeeded at offset %d\n", (int)ovector[0]);
* captured. *
*************************************************************************/
/* The output vector wasn't big enough. This should not happen, because we used
/* The output vector wasn't big enough. This should not happen, because we used
pcre2_match_data_create_from_pattern() above. */
if (rc == 0)
@ -244,7 +244,7 @@ if (namecount <= 0) printf("No named substrings\n"); else
&name_entry_size); /* where to put the answer */
/* Now we can scan the table and, for each entry, print the number, the name,
and the substring itself. In the 8-bit library the number is held in two
and the substring itself. In the 8-bit library the number is held in two
bytes, most significant first. */
tabptr = name_table;
@ -289,7 +289,7 @@ if (namecount <= 0) printf("No named substrings\n"); else
if (!find_all) /* Check for -g */
{
pcre2_match_data_free(match_data); /* Release the memory that was used */
pcre2_match_data_free(match_data); /* Release the memory that was used */
pcre2_code_free(re); /* for the match data and the pattern. */
return 0; /* Exit the program. */
}
@ -307,7 +307,7 @@ sequence. */
(void)pcre2_pattern_info(re, PCRE2_INFO_NEWLINE, &newline);
crlf_is_newline = newline == PCRE2_NEWLINE_ANY ||
newline == PCRE2_NEWLINE_CRLF ||
newline == PCRE2_NEWLINE_ANYCRLF;
newline == PCRE2_NEWLINE_ANYCRLF;
/* Loop for second and subsequent matches */

View File

@ -450,7 +450,7 @@ pcre2grep_exit(int rc)
if (resource_error)
{
fprintf(stderr, "pcre2grep: Error %d, %d or %d means that a resource limit "
"was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
"was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
PCRE2_ERROR_RECURSIONLIMIT);
fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n");
}
@ -485,7 +485,7 @@ if (strlen(s) > MAXPATLEN)
{
fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n",
MAXPATLEN);
free(p);
free(p);
return NULL;
}
p->next = NULL;
@ -2381,7 +2381,7 @@ switch(letter)
unsigned char buffer[128];
(void)pcre2_config(PCRE2_CONFIG_VERSION, buffer);
fprintf(stdout, "pcre2grep version %s\n", buffer);
}
}
pcre2grep_exit(0);
break;
@ -2472,7 +2472,7 @@ if ((popts & PO_FIXED_STRINGS) != 0)
}
sprintf((char *)buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
p->compiled = pcre2_compile(buffer, -1, options, &errcode, &erroffset,
p->compiled = pcre2_compile(buffer, -1, options, &errcode, &erroffset,
compile_context);
if (p->compiled != NULL) return TRUE;
@ -2555,11 +2555,11 @@ while (fgets(buffer, PATBUFSIZE, f) != NULL)
afterwards, as a precaution against any later code trying to use it. */
*patlastptr = add_pattern(buffer, *patlastptr);
if (*patlastptr == NULL)
if (*patlastptr == NULL)
{
if (f != stdin) fclose(f);
return FALSE;
}
}
if (*patptr == NULL) *patptr = *patlastptr;
/* This loop is needed because compiling a "pattern" when -F is set may add
@ -2571,10 +2571,10 @@ while (fgets(buffer, PATBUFSIZE, f) != NULL)
{
if (!compile_pattern(*patlastptr, pcre2_options, popts, TRUE, filename,
linenumber))
{
{
if (f != stdin) fclose(f);
return FALSE;
}
}
(*patlastptr)->string = NULL; /* Insurance */
if ((*patlastptr)->next == NULL) break;
*patlastptr = (*patlastptr)->next;
@ -2622,7 +2622,7 @@ for (i = 1; i < argc; i++)
char *option_data = (char *)""; /* default to keep compiler happy */
BOOL longop;
BOOL longopwasequals = FALSE;
if (argv[i][0] != '-') break;
/* If we hit an argument that is just "-", it may be a reference to STDIN,
@ -2925,7 +2925,7 @@ for (i = 1; i < argc; i++)
else *((int *)op->dataptr) = n;
}
}
/* Options have been decoded. If -C was used, its value is used as a default
for -A and -B. */
@ -2946,15 +2946,15 @@ if ((only_matching != NULL && (file_offsets || line_offsets)) ||
"and/or --line-offsets\n");
pcre2grep_exit(usage(2));
}
/* Put limits into the match data block. */
if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
if (recursion_limit > 0) pcre2_set_recursion_limit(match_context, recursion_limit);
if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
if (recursion_limit > 0) pcre2_set_recursion_limit(match_context, recursion_limit);
if (only_matching != NULL || file_offsets || line_offsets)
show_only_matching = TRUE;
/* If a locale has not been provided as an option, see if the LC_CTYPE or
LC_ALL environment variable is set, and if so, use it. */
@ -2980,7 +2980,7 @@ if (locale != NULL)
locale, locale_from);
goto EXIT2;
}
pcre2_set_character_tables(compile_context, pcre2_maketables(NULL));
pcre2_set_character_tables(compile_context, pcre2_maketables(NULL));
}
/* Sort out colouring */
@ -3007,27 +3007,27 @@ if (colour_option != NULL && strcmp(colour_option, "never") != 0)
if (newline_arg != NULL)
{
for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *));
for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *));
endlinetype++)
{
if (strcmpic(newline_arg, newlines[endlinetype]) == 0) break;
}
if (endlinetype < (int)(sizeof(newlines)/sizeof(char *)))
pcre2_set_newline(compile_context, endlinetype);
else
else
{
fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n",
fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n",
newline_arg);
goto EXIT2;
}
}
}
/* Find default newline convention */
/* Find default newline convention */
else
{
(void)pcre2_config(PCRE2_CONFIG_NEWLINE, &endlinetype);
}
}
/* Interpret the text values for -d and -D */

View File

@ -68,7 +68,7 @@ already set. */
#include "pcre2_internal.h"
#include "pcre2posix.h"
/* Table to translate PCRE2 compile time error codes into POSIX error codes.
/* Table to translate PCRE2 compile time error codes into POSIX error codes.
Only a few PCRE2 errors with a value greater than 23 turn into special POSIX
codes: most go to REG_BADPAT. The second table lists, in pairs, those that
don't. */
@ -89,7 +89,7 @@ static const int eint1[] = {
REG_ASSERT, /* internal error: unexpected repeat */
REG_BADPAT, /* unrecognized character after (? or (?- */
REG_BADPAT, /* POSIX named classes are supported only within a class */
REG_BADPAT, /* POSIX collating elements are not supported */
REG_BADPAT, /* POSIX collating elements are not supported */
REG_EPAREN, /* missing ) */
/* 15 */
REG_ESUBREG, /* reference to non-existent subpattern */
@ -103,7 +103,7 @@ static const int eint1[] = {
REG_EPAREN, /* unmatched closing parenthesis */
REG_ASSERT /* internal error: code overflow */
};
static const int eint2[] = {
30, REG_ECTYPE, /* unknown POSIX class name */
32, REG_INVARG, /* this version of PCRE does not have UTF or UCP support */
@ -216,14 +216,14 @@ if ((cflags & REG_UTF) != 0) options |= PCRE2_UTF;
if ((cflags & REG_UCP) != 0) options |= PCRE2_UCP;
if ((cflags & REG_UNGREEDY) != 0) options |= PCRE2_UNGREEDY;
preg->re_pcre2_code = pcre2_compile((PCRE2_SPTR)pattern, -1, options,
preg->re_pcre2_code = pcre2_compile((PCRE2_SPTR)pattern, -1, options,
&errorcode, &erroffset, NULL);
preg->re_erroffset = erroffset;
if (preg->re_pcre2_code == NULL)
{
unsigned int i;
if (errorcode < 0) return REG_BADPAT; /* UTF error */
unsigned int i;
if (errorcode < 0) return REG_BADPAT; /* UTF error */
errorcode -= COMPILE_ERROR_BASE;
if (errorcode < (int)(sizeof(eint1)/sizeof(const int)))
return eint1[errorcode];
@ -232,7 +232,7 @@ if (preg->re_pcre2_code == NULL)
return REG_BADPAT;
}
(void)pcre2_pattern_info((const pcre2_code *)preg->re_pcre2_code,
(void)pcre2_pattern_info((const pcre2_code *)preg->re_pcre2_code,
PCRE2_INFO_CAPTURECOUNT, &re_nsub);
preg->re_nsub = (size_t)re_nsub;
if ((options & PCRE2_NO_AUTO_CAPTURE) != 0) re_nsub = -1;
@ -288,7 +288,7 @@ else
eo = (int)strlen(string);
}
rc = pcre2_match((const pcre2_code *)preg->re_pcre2_code,
rc = pcre2_match((const pcre2_code *)preg->re_pcre2_code,
(PCRE2_SPTR)string + so, (eo - so), 0, options, md, NULL);
/* Successful match */

View File

@ -95,7 +95,7 @@ enum {
typedef struct {
void *re_pcre2_code;
void *re_match_data;
void *re_match_data;
size_t re_nsub;
size_t re_erroffset;
} regex_t;

View File

@ -4797,9 +4797,9 @@ for (gmatched = 0;; gmatched++)
PCRE2_SIZE length;
uint32_t copybuffer[256];
int namelen = strlen((const char *)nptr);
#if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
#if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
PCRE2_SIZE cnl = namelen;
#endif
#endif
if (namelen == 0) break;
#ifdef SUPPORT_PCRE2_8
@ -4864,9 +4864,9 @@ for (gmatched = 0;; gmatched++)
void *gotbuffer;
int rc;
int namelen = strlen((const char *)nptr);
#if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
#if defined SUPPORT_PCRE2_16 || defined SUPPORT_PCRE2_32
PCRE2_SIZE cnl = namelen;
#endif
#endif
if (namelen == 0) break;
#ifdef SUPPORT_PCRE2_8
@ -5389,25 +5389,25 @@ if (PO(options) != DO(options) || PO(control) != DO(control))
return 1;
}
/* Get the PCRE2 and Unicode version number and JIT target information, at the
same time checking that a request for the length gives the same answer. Also
/* Get the PCRE2 and Unicode version number and JIT target information, at the
same time checking that a request for the length gives the same answer. Also
check lengths for non-string items. */
if (PCRE2_CONFIG(PCRE2_CONFIG_VERSION, NULL) !=
if (PCRE2_CONFIG(PCRE2_CONFIG_VERSION, NULL) !=
PCRE2_CONFIG(PCRE2_CONFIG_VERSION, version) ||
PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, NULL) !=
PCRE2_CONFIG(PCRE2_CONFIG_UNICODE_VERSION, uversion) ||
PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, NULL) !=
PCRE2_CONFIG(PCRE2_CONFIG_JITTARGET, jittarget) ||
PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, NULL) != sizeof(int) ||
PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, NULL) != sizeof(long int))
PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, NULL) != sizeof(long int))
{
fprintf(stderr, "** Error in pcre2_config(): bad length\n");
return 1;
}
}
/* Get buffers from malloc() so that valgrind will check their misuse when
debugging. They grow automatically when very long lines are read. The 16-