More file tidies for 10.33-RC1
This commit is contained in:
parent
02ff543f9c
commit
7375089fa5
|
@ -88,7 +88,7 @@
|
|||
|
||||
PROJECT(PCRE2 C)
|
||||
|
||||
# Increased minimum to 2.8.0 to support newer add_test features.
|
||||
# Increased minimum to 2.8.0 to support newer add_test features.
|
||||
CMAKE_MINIMUM_REQUIRED(VERSION 2.8.0)
|
||||
|
||||
# Set policy CMP0026 to avoid warnings for the use of LOCATION in
|
||||
|
@ -324,7 +324,7 @@ ENDIF(PCRE2_SUPPORT_VALGRIND)
|
|||
|
||||
IF(PCRE2_DISABLE_PERCENT_ZT)
|
||||
SET(DISABLE_PERCENT_ZT 1)
|
||||
ENDIF(PCRE2_DISABLE_PERCENT_ZT)
|
||||
ENDIF(PCRE2_DISABLE_PERCENT_ZT)
|
||||
|
||||
# This next one used to reference ${READLINE_LIBRARY})
|
||||
# but I was advised to add the NCURSES test as well, along with
|
||||
|
@ -459,7 +459,7 @@ SET(PCRE2_SOURCES
|
|||
src/pcre2_newline.c
|
||||
src/pcre2_ord2utf.c
|
||||
src/pcre2_pattern_info.c
|
||||
src/pcre2_script_run.c
|
||||
src/pcre2_script_run.c
|
||||
src/pcre2_serialize.c
|
||||
src/pcre2_string_utils.c
|
||||
src/pcre2_study.c
|
||||
|
@ -651,10 +651,10 @@ IF(PCRE2_BUILD_TESTS)
|
|||
|
||||
# exes in Debug location tested by the RunTest and RunGrepTest shell scripts
|
||||
# via "make test"
|
||||
|
||||
|
||||
# The commented out code below provokes a warning about future removal
|
||||
# of the facility, and requires policy CMP0026 to be set to "OLD". I have
|
||||
# got fed-up with the warnings, but my plea for help on the mailing list
|
||||
# got fed-up with the warnings, but my plea for help on the mailing list
|
||||
# produced no response. So, I've hacked. The new code below seems to work on
|
||||
# Linux.
|
||||
|
||||
|
@ -857,9 +857,9 @@ IF(PCRE2_SHOW_REPORT)
|
|||
MESSAGE(STATUS " Support Valgrind .................: ${PCRE2_SUPPORT_VALGRIND}")
|
||||
IF(PCRE2_DISABLE_PERCENT_ZT)
|
||||
MESSAGE(STATUS " Use %zu and %td ..................: OFF" )
|
||||
ELSE(PCRE2_DISABLE_PERCENT_ZT)
|
||||
ELSE(PCRE2_DISABLE_PERCENT_ZT)
|
||||
MESSAGE(STATUS " Use %zu and %td ..................: AUTO" )
|
||||
ENDIF(PCRE2_DISABLE_PERCENT_ZT)
|
||||
ENDIF(PCRE2_DISABLE_PERCENT_ZT)
|
||||
|
||||
IF(MINGW AND NOT PCRE2_STATIC)
|
||||
MESSAGE(STATUS " Non-standard dll names (prefix) . : ${NON_STANDARD_LIB_PREFIX}")
|
||||
|
|
86
ChangeLog
86
ChangeLog
|
@ -14,11 +14,11 @@ a greater than 1 fixed quantifier. This issue was found by Yunho Kim.
|
|||
|
||||
3. Added support for callouts from pcre2_substitute().
|
||||
|
||||
4. The POSIX functions are now all called pcre2_regcomp() etc., with wrapper
|
||||
4. The POSIX functions are now all called pcre2_regcomp() etc., with wrapper
|
||||
functions that use the standard POSIX names. However, in pcre2posix.h the POSIX
|
||||
names are defined as macros. This should help avoid linking with the wrong
|
||||
library in some environments while still exporting the POSIX names for
|
||||
pre-existing programs that use them. (The Debian alternative names are also
|
||||
pre-existing programs that use them. (The Debian alternative names are also
|
||||
defined as macros, but not documented.)
|
||||
|
||||
5. Fix an xclass matching issue in JIT.
|
||||
|
@ -33,29 +33,29 @@ new "is lower case letter" bit. At the same time, the now unused "is
|
|||
hexadecimal digit" bit was removed. The default tables in
|
||||
src/pcre2_chartables.c.dist are updated.
|
||||
|
||||
8. Implement the new Perl "script run" features (*script_run:...) and
|
||||
8. Implement the new Perl "script run" features (*script_run:...) and
|
||||
(*atomic_script_run:...) aka (*sr:...) and (*asr:...).
|
||||
|
||||
9. Fixed two typos in change 22 for 10.21, which added special handling for
|
||||
ranges such as a-z in EBCDIC environments. The original code probably never
|
||||
worked, though there were no bug reports.
|
||||
ranges such as a-z in EBCDIC environments. The original code probably never
|
||||
worked, though there were no bug reports.
|
||||
|
||||
10. Implement PCRE2_COPY_MATCHED_SUBJECT for pcre2_match() (including JIT via
|
||||
pcre2_match()) and pcre2_dfa_match(), but *not* the pcre2_jit_match() fast
|
||||
path. Also, when a match fails, set the subject field in the match data to NULL
|
||||
for tidiness - none of the substring extractors should reference this after
|
||||
path. Also, when a match fails, set the subject field in the match data to NULL
|
||||
for tidiness - none of the substring extractors should reference this after
|
||||
match failure.
|
||||
|
||||
11. If a pattern started with a subroutine call that had a quantifier with a
|
||||
minimum of zero, an incorrect "match must start with this character" could be
|
||||
recorded. Example: /(?&xxx)*ABC(?<xxx>XYZ)/ would (incorrectly) expect 'A' to
|
||||
be the first character of a match.
|
||||
11. If a pattern started with a subroutine call that had a quantifier with a
|
||||
minimum of zero, an incorrect "match must start with this character" could be
|
||||
recorded. Example: /(?&xxx)*ABC(?<xxx>XYZ)/ would (incorrectly) expect 'A' to
|
||||
be the first character of a match.
|
||||
|
||||
12. The heap limit checking code in pcre2_dfa_match() could suffer from
|
||||
overflow if the heap limit was set very large. This could cause incorrect "heap
|
||||
limit exceeded" errors.
|
||||
12. The heap limit checking code in pcre2_dfa_match() could suffer from
|
||||
overflow if the heap limit was set very large. This could cause incorrect "heap
|
||||
limit exceeded" errors.
|
||||
|
||||
13. Add "kibibytes" to the heap limit output from pcre2test -C to make the
|
||||
13. Add "kibibytes" to the heap limit output from pcre2test -C to make the
|
||||
units clear.
|
||||
|
||||
14. Add a call to pcre2_jit_free_unused_memory() in pcre2grep, for tidiness.
|
||||
|
@ -71,33 +71,33 @@ inttypes.h. This supports environments that do not have stdint.h but do have
|
|||
inttypes.h, which are known to exist. A note in the autotools documentation
|
||||
says (November 2018) that there are none known that are the other way round.
|
||||
|
||||
17. Added --disable-percent-zt to "configure" (and equivalent to CMake) to
|
||||
forcibly disable the use of %zu and %td in formatting strings because there is
|
||||
at least one version of VMS that claims to be C99 but does not support these
|
||||
17. Added --disable-percent-zt to "configure" (and equivalent to CMake) to
|
||||
forcibly disable the use of %zu and %td in formatting strings because there is
|
||||
at least one version of VMS that claims to be C99 but does not support these
|
||||
modifiers.
|
||||
|
||||
18. Added --disable-pcre2grep-callout-fork, which restricts the callout support
|
||||
in pcre2grep to the inbuilt echo facility. This may be useful in environments
|
||||
18. Added --disable-pcre2grep-callout-fork, which restricts the callout support
|
||||
in pcre2grep to the inbuilt echo facility. This may be useful in environments
|
||||
that do not support fork().
|
||||
|
||||
19. Fix two instances of <= 0 being applied to unsigned integers (the VMS
|
||||
19. Fix two instances of <= 0 being applied to unsigned integers (the VMS
|
||||
compiler complains).
|
||||
|
||||
20. Added "fork" support for VMS to pcre2grep, for running an external program
|
||||
20. Added "fork" support for VMS to pcre2grep, for running an external program
|
||||
via a string callout.
|
||||
|
||||
21. Improve MAP_JIT flag usage on MacOS. Patch by Rich Siegel.
|
||||
|
||||
22. If a pattern started with (*MARK), (*COMMIT), (*PRUNE), (*SKIP), or (*THEN)
|
||||
22. If a pattern started with (*MARK), (*COMMIT), (*PRUNE), (*SKIP), or (*THEN)
|
||||
followed by ^ it was not recognized as anchored.
|
||||
|
||||
23. The RunGrepTest script used to cut out the test of NUL characters for
|
||||
Solaris and MacOS as printf and sed can't handle them. It seems that the *BSD
|
||||
systems can't either. I've inverted the test so that only those OS that are
|
||||
23. The RunGrepTest script used to cut out the test of NUL characters for
|
||||
Solaris and MacOS as printf and sed can't handle them. It seems that the *BSD
|
||||
systems can't either. I've inverted the test so that only those OS that are
|
||||
known to work (currently only Linux) try to run this test.
|
||||
|
||||
24. Some tests in RunGrepTest appended to testtrygrep from two different file
|
||||
descriptors instead of redirecting stderr to stdout. This worked on Linux, but
|
||||
24. Some tests in RunGrepTest appended to testtrygrep from two different file
|
||||
descriptors instead of redirecting stderr to stdout. This worked on Linux, but
|
||||
it was reported not to on other systems, causing the tests to fail.
|
||||
|
||||
25. In the RunTest script, make the test for stack setting use the same value
|
||||
|
@ -105,27 +105,27 @@ for the stack as it needs for -bigstack.
|
|||
|
||||
26. Insert a cast in pcre2_dfa_match.c to suppress a compiler warning.
|
||||
|
||||
26. With PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL set, escape sequences such as \s
|
||||
which are valid in character classes, but not as the end of ranges, were being
|
||||
treated as literals. An example is [_-\s] (but not [\s-_] because that gave an
|
||||
error at the *start* of a range). Now an "invalid range" error is given
|
||||
26. With PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL set, escape sequences such as \s
|
||||
which are valid in character classes, but not as the end of ranges, were being
|
||||
treated as literals. An example is [_-\s] (but not [\s-_] because that gave an
|
||||
error at the *start* of a range). Now an "invalid range" error is given
|
||||
independently of PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL.
|
||||
|
||||
27. Related to 26 above, PCRE2_BAD_ESCAPE_IS_LITERAL was affecting known escape
|
||||
27. Related to 26 above, PCRE2_BAD_ESCAPE_IS_LITERAL was affecting known escape
|
||||
sequences such as \eX when they appeared invalidly in a character class. Now
|
||||
the option applies only to unrecognized or malformed escape sequences.
|
||||
|
||||
28. Fix word boundary in JIT compiler. Patch by Mike Munday.
|
||||
|
||||
29. The pcre2_dfa_match() function was incorrectly handling conditional version
|
||||
tests such as (?(VERSION>=0)...) when the version test was true. Incorrect
|
||||
tests such as (?(VERSION>=0)...) when the version test was true. Incorrect
|
||||
processing or a crash could result.
|
||||
|
||||
30. When PCRE2_UTF is set, allow non-ASCII letters and decimal digits in group
|
||||
30. When PCRE2_UTF is set, allow non-ASCII letters and decimal digits in group
|
||||
names, as Perl does. There was a small bug in this new code, found by
|
||||
ClusterFuzz 12950, fixed before release.
|
||||
|
||||
31. Implemented PCRE2_EXTRA_ALT_BSUX to support ECMAScript 6's \u{hhh}
|
||||
31. Implemented PCRE2_EXTRA_ALT_BSUX to support ECMAScript 6's \u{hhh}
|
||||
construct.
|
||||
|
||||
32. Compile \p{Any} to be the same as . in DOTALL mode, so that it benefits
|
||||
|
@ -133,15 +133,15 @@ from auto-anchoring if \p{Any}* starts a pattern.
|
|||
|
||||
33. Compile invalid UTF check in JIT test when only pcre32 is enabled.
|
||||
|
||||
34. For some time now, CMake has been warning about the setting of policy
|
||||
CMP0026 to "OLD" in CmakeLists.txt, and hinting that the feature might be
|
||||
removed in a future version. A request for CMake expertise on the list produced
|
||||
no result, so I have now hacked CMakeLists.txt along the lines of some changes
|
||||
I found on the Internet. The new code no longer needs the policy setting, and
|
||||
it appears to work fine on Linux.
|
||||
34. For some time now, CMake has been warning about the setting of policy
|
||||
CMP0026 to "OLD" in CmakeLists.txt, and hinting that the feature might be
|
||||
removed in a future version. A request for CMake expertise on the list produced
|
||||
no result, so I have now hacked CMakeLists.txt along the lines of some changes
|
||||
I found on the Internet. The new code no longer needs the policy setting, and
|
||||
it appears to work fine on Linux.
|
||||
|
||||
35. Setting --enable-jit=auto for an out-of-tree build failed because the
|
||||
source directory wasn't in the search path for AC_TRY_COMPILE always. Patch
|
||||
source directory wasn't in the search path for AC_TRY_COMPILE always. Patch
|
||||
from Ross Burton.
|
||||
|
||||
|
||||
|
|
2
NEWS
2
NEWS
|
@ -5,7 +5,7 @@ News about PCRE2 releases
|
|||
Version 10.33-RC1 03-March-2019
|
||||
-------------------------------
|
||||
|
||||
Yet more bugfixes, tidies, and a few enhancements, summarized here (see
|
||||
Yet more bugfixes, tidies, and a few enhancements, summarized here (see
|
||||
ChangeLog for the full list):
|
||||
|
||||
1. Callouts from pcre2_substitute() are now available.
|
||||
|
|
|
@ -47,7 +47,7 @@ can skip ahead to the CMake section.
|
|||
environment. In particular, you can alter the definition of the NEWLINE
|
||||
macro to specify what character(s) you want to be interpreted as line
|
||||
terminators by default.
|
||||
|
||||
|
||||
When you subsequently compile any of the PCRE2 modules, you must specify
|
||||
-DHAVE_CONFIG_H to your compiler so that src/config.h is included in the
|
||||
sources.
|
||||
|
@ -61,7 +61,7 @@ can skip ahead to the CMake section.
|
|||
configure/make world, this is handled automatically.) When upgrading to a
|
||||
new release, you are strongly advised to review src/config.h.generic
|
||||
before re-using what you had previously.
|
||||
|
||||
|
||||
Note also that the src/config.h.generic file is created from a config.h
|
||||
that was generated by Autotools, which automatically includes settings of
|
||||
a number of macros that are not actually used by PCRE2 (for example,
|
||||
|
@ -109,7 +109,7 @@ can skip ahead to the CMake section.
|
|||
pcre2_newline.c
|
||||
pcre2_ord2utf.c
|
||||
pcre2_pattern_info.c
|
||||
pcre2_script_run.c
|
||||
pcre2_script_run.c
|
||||
pcre2_serialize.c
|
||||
pcre2_string_utils.c
|
||||
pcre2_study.c
|
||||
|
|
24
README
24
README
|
@ -53,7 +53,7 @@ The header file for the POSIX-style functions is called pcre2posix.h. The
|
|||
official POSIX name is regex.h, but I did not want to risk possible problems
|
||||
with existing files of that name by distributing it that way. To use PCRE2 with
|
||||
an existing program that uses the POSIX API, pcre2posix.h will have to be
|
||||
renamed or pointed at by a link (or the program modified, of course). See the
|
||||
renamed or pointed at by a link (or the program modified, of course). See the
|
||||
pcre2posix documentation for more details.
|
||||
|
||||
|
||||
|
@ -311,10 +311,10 @@ library. They are also documented in the pcre2build man page.
|
|||
. There is support for calling external programs during matching in the
|
||||
pcre2grep command, using PCRE2's callout facility with string arguments. This
|
||||
support can be disabled by adding --disable-pcre2grep-callout to the
|
||||
"configure" command. There are two kinds of callout: one that generates
|
||||
output from inbuilt code, and another that calls an external program. The
|
||||
latter has special support for Windows and VMS; otherwise it assumes the
|
||||
existence of the fork() function. This facility can be disabled by adding
|
||||
"configure" command. There are two kinds of callout: one that generates
|
||||
output from inbuilt code, and another that calls an external program. The
|
||||
latter has special support for Windows and VMS; otherwise it assumes the
|
||||
existence of the fork() function. This facility can be disabled by adding
|
||||
--disable-pcre2grep-callout-fork to the "configure" command.
|
||||
|
||||
. The pcre2grep program currently supports only 8-bit data files, and so
|
||||
|
@ -344,7 +344,7 @@ library. They are also documented in the pcre2build man page.
|
|||
|
||||
The default is either 1048576 or the value of --with-pcre2grep-bufsize,
|
||||
whichever is the larger.
|
||||
|
||||
|
||||
. It is possible to compile pcre2test so that it links with the libreadline
|
||||
or libedit libraries, by specifying, respectively,
|
||||
|
||||
|
@ -367,14 +367,14 @@ library. They are also documented in the pcre2build man page.
|
|||
If you get error messages about missing functions tgetstr, tgetent, tputs,
|
||||
tgetflag, or tgoto, this is the problem, and linking with the ncurses library
|
||||
should fix it.
|
||||
|
||||
. The C99 standard defines formatting modifiers z and t for size_t and
|
||||
ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in
|
||||
environments other than Microsoft Visual Studio when __STDC_VERSION__ is
|
||||
|
||||
. The C99 standard defines formatting modifiers z and t for size_t and
|
||||
ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in
|
||||
environments other than Microsoft Visual Studio when __STDC_VERSION__ is
|
||||
defined and has a value greater than or equal to 199901L (indicating C99).
|
||||
However, there is at least one environment that claims to be C99 but does not
|
||||
support these modifiers. If --disable-percent-zt is specified, no use is made
|
||||
of the z or t modifiers. Instead or %td or %zu, %lu is used, with a cast for
|
||||
of the z or t modifiers. Instead or %td or %zu, %lu is used, with a cast for
|
||||
size_t values.
|
||||
|
||||
. There is a special option called --enable-fuzz-support for use by people who
|
||||
|
@ -790,7 +790,7 @@ The distribution should contain the files listed below.
|
|||
src/pcre2_newline.c )
|
||||
src/pcre2_ord2utf.c )
|
||||
src/pcre2_pattern_info.c )
|
||||
src/pcre2_script_run.c )
|
||||
src/pcre2_script_run.c )
|
||||
src/pcre2_serialize.c )
|
||||
src/pcre2_string_utils.c )
|
||||
src/pcre2_study.c )
|
||||
|
|
|
@ -753,7 +753,7 @@ if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'callout scri
|
|||
else
|
||||
$cf $srcdir/testdata/grepoutputC testtrygrep
|
||||
fi
|
||||
|
||||
|
||||
if [ $? != 0 ] ; then exit 1; fi
|
||||
else
|
||||
echo "Script callouts are not supported"
|
||||
|
|
14
configure.ac
14
configure.ac
|
@ -147,14 +147,14 @@ AC_ARG_ENABLE(jit,
|
|||
if test "$enable_jit" = "auto"; then
|
||||
AC_LANG(C)
|
||||
SAVE_CPPFLAGS=$CPPFLAGS
|
||||
CPPFLAGS=-I$srcdir
|
||||
CPPFLAGS=-I$srcdir
|
||||
AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
|
||||
#define SLJIT_CONFIG_AUTO 1
|
||||
#include "src/sljit/sljitConfigInternal.h"
|
||||
#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
|
||||
#error unsupported
|
||||
#endif]])], enable_jit=yes, enable_jit=no)
|
||||
CPPFLAGS=$SAVE_CPPFLAGS
|
||||
CPPFLAGS=$SAVE_CPPFLAGS
|
||||
echo checking for JIT support on this hardware... $enable_jit
|
||||
fi
|
||||
|
||||
|
@ -607,7 +607,7 @@ if test "$enable_percent_zt" = "no"; then
|
|||
Define to any value to disable the use of the z and t modifiers in
|
||||
formatting settings such as %zu or %td (this is rarely needed).])
|
||||
else
|
||||
enable_percent_zt=auto
|
||||
enable_percent_zt=auto
|
||||
fi
|
||||
|
||||
# Unless running under Windows, JIT support requires pthreads.
|
||||
|
@ -647,13 +647,13 @@ if test "$enable_pcre2grep_callout" = "yes"; then
|
|||
fi
|
||||
AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT_FORK], [], [
|
||||
Define to any value to enable fork support in pcre2grep callout scripts.
|
||||
This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also
|
||||
defined.])
|
||||
This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also
|
||||
defined.])
|
||||
fi
|
||||
AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT], [], [
|
||||
Define to any value to enable callout script support in pcre2grep.])
|
||||
else
|
||||
enable_pcre2grep_callout_fork="no"
|
||||
enable_pcre2grep_callout_fork="no"
|
||||
fi
|
||||
|
||||
if test "$enable_unicode" = "yes"; then
|
||||
|
@ -1055,7 +1055,7 @@ $PACKAGE-$VERSION configuration summary:
|
|||
Build static libs .................. : ${enable_static}
|
||||
Use JIT in pcre2grep ............... : ${enable_pcre2grep_jit}
|
||||
Enable callouts in pcre2grep ....... : ${enable_pcre2grep_callout}
|
||||
Enable fork in pcre2grep callouts .. : ${enable_pcre2grep_callout_fork}
|
||||
Enable fork in pcre2grep callouts .. : ${enable_pcre2grep_callout_fork}
|
||||
Initial buffer size for pcre2grep .. : ${with_pcre2grep_bufsize}
|
||||
Maximum buffer size for pcre2grep .. : ${with_pcre2grep_max_bufsize}
|
||||
Link pcre2grep with libz ........... : ${enable_pcre2grep_libz}
|
||||
|
|
|
@ -47,7 +47,7 @@ can skip ahead to the CMake section.
|
|||
environment. In particular, you can alter the definition of the NEWLINE
|
||||
macro to specify what character(s) you want to be interpreted as line
|
||||
terminators by default.
|
||||
|
||||
|
||||
When you subsequently compile any of the PCRE2 modules, you must specify
|
||||
-DHAVE_CONFIG_H to your compiler so that src/config.h is included in the
|
||||
sources.
|
||||
|
@ -61,7 +61,7 @@ can skip ahead to the CMake section.
|
|||
configure/make world, this is handled automatically.) When upgrading to a
|
||||
new release, you are strongly advised to review src/config.h.generic
|
||||
before re-using what you had previously.
|
||||
|
||||
|
||||
Note also that the src/config.h.generic file is created from a config.h
|
||||
that was generated by Autotools, which automatically includes settings of
|
||||
a number of macros that are not actually used by PCRE2 (for example,
|
||||
|
@ -109,7 +109,7 @@ can skip ahead to the CMake section.
|
|||
pcre2_newline.c
|
||||
pcre2_ord2utf.c
|
||||
pcre2_pattern_info.c
|
||||
pcre2_script_run.c
|
||||
pcre2_script_run.c
|
||||
pcre2_serialize.c
|
||||
pcre2_string_utils.c
|
||||
pcre2_study.c
|
||||
|
|
|
@ -53,7 +53,7 @@ The header file for the POSIX-style functions is called pcre2posix.h. The
|
|||
official POSIX name is regex.h, but I did not want to risk possible problems
|
||||
with existing files of that name by distributing it that way. To use PCRE2 with
|
||||
an existing program that uses the POSIX API, pcre2posix.h will have to be
|
||||
renamed or pointed at by a link (or the program modified, of course). See the
|
||||
renamed or pointed at by a link (or the program modified, of course). See the
|
||||
pcre2posix documentation for more details.
|
||||
|
||||
|
||||
|
@ -311,7 +311,11 @@ library. They are also documented in the pcre2build man page.
|
|||
. There is support for calling external programs during matching in the
|
||||
pcre2grep command, using PCRE2's callout facility with string arguments. This
|
||||
support can be disabled by adding --disable-pcre2grep-callout to the
|
||||
"configure" command.
|
||||
"configure" command. There are two kinds of callout: one that generates
|
||||
output from inbuilt code, and another that calls an external program. The
|
||||
latter has special support for Windows and VMS; otherwise it assumes the
|
||||
existence of the fork() function. This facility can be disabled by adding
|
||||
--disable-pcre2grep-callout-fork to the "configure" command.
|
||||
|
||||
. The pcre2grep program currently supports only 8-bit data files, and so
|
||||
requires the 8-bit PCRE2 library. It is possible to compile pcre2grep to use
|
||||
|
@ -363,14 +367,14 @@ library. They are also documented in the pcre2build man page.
|
|||
If you get error messages about missing functions tgetstr, tgetent, tputs,
|
||||
tgetflag, or tgoto, this is the problem, and linking with the ncurses library
|
||||
should fix it.
|
||||
|
||||
. The C99 standard defines formatting modifiers z and t for size_t and
|
||||
ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in
|
||||
environments other than Microsoft Visual Studio when __STDC_VERSION__ is
|
||||
|
||||
. The C99 standard defines formatting modifiers z and t for size_t and
|
||||
ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in
|
||||
environments other than Microsoft Visual Studio when __STDC_VERSION__ is
|
||||
defined and has a value greater than or equal to 199901L (indicating C99).
|
||||
However, there is at least one environment that claims to be C99 but does not
|
||||
support these modifiers. If --disable-percent-zt is specified, no use is made
|
||||
of the z or t modifiers. Instead or %td or %zu, %lu is used, with a cast for
|
||||
of the z or t modifiers. Instead or %td or %zu, %lu is used, with a cast for
|
||||
size_t values.
|
||||
|
||||
. There is a special option called --enable-fuzz-support for use by people who
|
||||
|
@ -786,7 +790,7 @@ The distribution should contain the files listed below.
|
|||
src/pcre2_newline.c )
|
||||
src/pcre2_ord2utf.c )
|
||||
src/pcre2_pattern_info.c )
|
||||
src/pcre2_script_run.c )
|
||||
src/pcre2_script_run.c )
|
||||
src/pcre2_serialize.c )
|
||||
src/pcre2_string_utils.c )
|
||||
src/pcre2_study.c )
|
||||
|
@ -886,4 +890,4 @@ The distribution should contain the files listed below.
|
|||
Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
Last updated: 29 January 2019
|
||||
Last updated: 03 March 2019
|
||||
|
|
|
@ -52,7 +52,7 @@ characters. The options are:
|
|||
<pre>
|
||||
PCRE2_ANCHORED Match only at the first position
|
||||
PCRE2_COPY_MATCHED_SUBJECT
|
||||
On success, make a private subject copy
|
||||
On success, make a private subject copy
|
||||
PCRE2_ENDANCHORED Pattern can match only at end of subject
|
||||
PCRE2_NOTBOL Subject is not the beginning of a line
|
||||
PCRE2_NOTEOL Subject is not the end of a line
|
||||
|
|
|
@ -61,7 +61,7 @@ terminated by a binary zero code unit. The options are:
|
|||
<pre>
|
||||
PCRE2_ANCHORED Match only at the first position
|
||||
PCRE2_COPY_MATCHED_SUBJECT
|
||||
On success, make a private subject copy
|
||||
On success, make a private subject copy
|
||||
PCRE2_ENDANCHORED Pattern can match only at end of subject
|
||||
PCRE2_NOTBOL Subject string is not the beginning of a line
|
||||
PCRE2_NOTEOL Subject string is not the end of a line
|
||||
|
|
|
@ -31,7 +31,7 @@ using the memory freeing function from the general context or compiled pattern
|
|||
with which it was created, or <b>free()</b> if that was not set.
|
||||
</P>
|
||||
<P>
|
||||
If the PCRE2_COPY_MATCHED_SUBJECT was used for a successful match using this
|
||||
If the PCRE2_COPY_MATCHED_SUBJECT was used for a successful match using this
|
||||
match data block, the copy of the subject that was remembered with the block is
|
||||
also freed.
|
||||
</P>
|
||||
|
|
|
@ -31,7 +31,7 @@ housed in a compile context. It completely replaces all the bits. The extra
|
|||
options are:
|
||||
<pre>
|
||||
PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES Allow \x{df800} to \x{dfff} in UTF-8 and UTF-32 modes
|
||||
PCRE2_EXTRA_ALT_BSUX Extended alternate \u, \U, and \x handling
|
||||
PCRE2_EXTRA_ALT_BSUX Extended alternate \u, \U, and \x handling
|
||||
PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL Treat all invalid escapes as a literal following character
|
||||
PCRE2_EXTRA_ESCAPED_CR_IS_LF Interpret \r as \n
|
||||
PCRE2_EXTRA_MATCH_LINE Pattern matches whole lines
|
||||
|
|
|
@ -1309,7 +1309,7 @@ be referenced by the substring extraction functions after a successful match.
|
|||
After running a match, you must not free a compiled pattern or a subject string
|
||||
until after all operations on the
|
||||
<a href="#matchdatablock">match data block</a>
|
||||
have taken place, unless, in the case of the subject string, you have used the
|
||||
have taken place, unless, in the case of the subject string, you have used the
|
||||
PCRE2_COPY_MATCHED_SUBJECT option, which is described in the section entitled
|
||||
"Option bits for <b>pcre2_match()</b>"
|
||||
<a href="#matchoptions>">below.</a>
|
||||
|
@ -1437,8 +1437,8 @@ binary zero character followed by z).
|
|||
ECMAscript 6 added additional functionality to \u. This can be accessed using
|
||||
the PCRE2_EXTRA_ALT_BSUX extra option (see "Extra compile options"
|
||||
<a href="#extracompileoptions">below).</a>
|
||||
Note that this alternative escape handling applies only to patterns. Neither of
|
||||
these options affects the processing of replacement strings passed to
|
||||
Note that this alternative escape handling applies only to patterns. Neither of
|
||||
these options affects the processing of replacement strings passed to
|
||||
<b>pcre2_substitute()</b>.
|
||||
<pre>
|
||||
PCRE2_ALT_CIRCUMFLEX
|
||||
|
@ -1875,10 +1875,10 @@ characters if the matching function is called with PCRE2_NO_UTF_CHECK set.
|
|||
<pre>
|
||||
PCRE2_EXTRA_ALT_BSUX
|
||||
</pre>
|
||||
The original option PCRE2_ALT_BSUX causes PCRE2 to process \U, \u, and \x in
|
||||
the way that ECMAscript (aka JavaScript) does. Additional functionality was
|
||||
defined by ECMAscript 6; setting PCRE2_EXTRA_ALT_BSUX has the effect of
|
||||
PCRE2_ALT_BSUX, but in addition it recognizes \u{hhh..} as a hexadecimal
|
||||
The original option PCRE2_ALT_BSUX causes PCRE2 to process \U, \u, and \x in
|
||||
the way that ECMAscript (aka JavaScript) does. Additional functionality was
|
||||
defined by ECMAscript 6; setting PCRE2_EXTRA_ALT_BSUX has the effect of
|
||||
PCRE2_ALT_BSUX, but in addition it recognizes \u{hhh..} as a hexadecimal
|
||||
character code, where hhh.. is any number of hexadecimal digits.
|
||||
<pre>
|
||||
PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL
|
||||
|
@ -1896,7 +1896,7 @@ If the PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL extra option is passed to
|
|||
<b>pcre2_compile()</b>, all unrecognized or malformed escape sequences are
|
||||
treated as single-character escapes. For example, \j is a literal "j" and
|
||||
\x{2z} is treated as the literal string "x{2z}". Setting this option means
|
||||
that typos in patterns may go undetected and have unexpected results. Also note
|
||||
that typos in patterns may go undetected and have unexpected results. Also note
|
||||
that a sequence such as [\N{] is interpreted as a malformed attempt at
|
||||
[\N{...}] and so is treated as [N{] whereas [\N] gives an error because an
|
||||
unqualified \N is a valid escape sequence but is not supported in a character
|
||||
|
@ -1904,9 +1904,9 @@ class. To reiterate: this is a dangerous option. Use with great care.
|
|||
<pre>
|
||||
PCRE2_EXTRA_ESCAPED_CR_IS_LF
|
||||
</pre>
|
||||
There are some legacy applications where the escape sequence \r in a pattern
|
||||
is expected to match a newline. If this option is set, \r in a pattern is
|
||||
converted to \n so that it matches a LF (linefeed) instead of a CR (carriage
|
||||
There are some legacy applications where the escape sequence \r in a pattern
|
||||
is expected to match a newline. If this option is set, \r in a pattern is
|
||||
converted to \n so that it matches a LF (linefeed) instead of a CR (carriage
|
||||
return) character. The option does not affect a literal CR in the pattern, nor
|
||||
does it affect CR specified as an explicit code point such as \x{0D}.
|
||||
<pre>
|
||||
|
@ -2564,7 +2564,7 @@ Option bits for <b>pcre2_match()</b>
|
|||
</b><br>
|
||||
<P>
|
||||
The unused bits of the <i>options</i> argument for <b>pcre2_match()</b> must be
|
||||
zero. The only bits that may be set are PCRE2_ANCHORED,
|
||||
zero. The only bits that may be set are PCRE2_ANCHORED,
|
||||
PCRE2_COPY_MATCHED_SUBJECT, PCRE2_ENDANCHORED, PCRE2_NOTBOL, PCRE2_NOTEOL,
|
||||
PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_JIT, PCRE2_NO_UTF_CHECK,
|
||||
PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Their action is described below.
|
||||
|
@ -2585,8 +2585,8 @@ matching.
|
|||
<pre>
|
||||
PCRE2_COPY_MATCHED_SUBJECT
|
||||
</pre>
|
||||
By default, a pointer to the subject is remembered in the match data block so
|
||||
that, after a successful match, it can be referenced by the substring
|
||||
By default, a pointer to the subject is remembered in the match data block so
|
||||
that, after a successful match, it can be referenced by the substring
|
||||
extraction functions. This means that the subject's memory must not be freed
|
||||
until all such operations are complete. For some applications where the
|
||||
lifetime of the subject string is not guaranteed, it may be necessary to make a
|
||||
|
@ -2866,8 +2866,8 @@ undefined.
|
|||
<P>
|
||||
After a successful match, a partial match (PCRE2_ERROR_PARTIAL), or a failure
|
||||
to match (PCRE2_ERROR_NOMATCH), a mark name may be available. The function
|
||||
<b>pcre2_get_mark()</b> can be called to access this name, which can be
|
||||
specified in the pattern by any of the backtracking control verbs, not just
|
||||
<b>pcre2_get_mark()</b> can be called to access this name, which can be
|
||||
specified in the pattern by any of the backtracking control verbs, not just
|
||||
(*MARK). The same function applies to all the verbs. It returns a pointer to
|
||||
the zero-terminated name, which is within the compiled pattern. If no name is
|
||||
available, NULL is returned. The length of the name (excluding the terminating
|
||||
|
@ -3002,7 +3002,7 @@ The backtracking match limit was reached.
|
|||
If a pattern contains many nested backtracking points, heap memory is used to
|
||||
remember them. This error is given when the memory allocation function (default
|
||||
or custom) fails. Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given
|
||||
if the amount of memory needed exceeds the heap limit. PCRE2_ERROR_NOMEMORY is
|
||||
if the amount of memory needed exceeds the heap limit. PCRE2_ERROR_NOMEMORY is
|
||||
also returned if PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
|
||||
<pre>
|
||||
PCRE2_ERROR_NULL
|
||||
|
@ -3405,7 +3405,7 @@ capture groups and letters within \Q...\E quoted sequences.
|
|||
<P>
|
||||
Note that case forcing sequences such as \U...\E do not nest. For example,
|
||||
the result of processing "\Uaa\LBB\Ecc\E" is "AAbbcc"; the final \E has no
|
||||
effect. Note also that the PCRE2_ALT_BSUX and PCRE2_EXTRA_ALT_BSUX options do
|
||||
effect. Note also that the PCRE2_ALT_BSUX and PCRE2_EXTRA_ALT_BSUX options do
|
||||
not apply to not apply to replacement strings.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -3439,7 +3439,7 @@ substitutions. However, PCRE2_SUBSTITUTE_UNKNOWN_UNSET does cause unknown
|
|||
groups in the extended syntax forms to be treated as unset.
|
||||
</P>
|
||||
<P>
|
||||
If successful, <b>pcre2_substitute()</b> returns the number of successful
|
||||
If successful, <b>pcre2_substitute()</b> returns the number of successful
|
||||
matches. This may be zero if no matches were found, and is never greater than 1
|
||||
unless PCRE2_SUBSTITUTE_GLOBAL is set.
|
||||
</P>
|
||||
|
@ -3489,8 +3489,8 @@ Substitution callouts
|
|||
<br>
|
||||
The <b>pcre2_set_substitution_callout()</b> function can be used to specify a
|
||||
callout function for <b>pcre2_substitute()</b>. This information is passed in
|
||||
a match context. The callout function is called after each substitution has
|
||||
been processed, but it can cause the replacement not to happen. The callout
|
||||
a match context. The callout function is called after each substitution has
|
||||
been processed, but it can cause the replacement not to happen. The callout
|
||||
function is not called for simulated substitutions that happen as a result of
|
||||
the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option.
|
||||
</P>
|
||||
|
@ -3500,10 +3500,10 @@ block structure, which contains the following fields, not necessarily in this
|
|||
order:
|
||||
<pre>
|
||||
uint32_t <i>version</i>;
|
||||
uint32_t <i>subscount</i>;
|
||||
uint32_t <i>subscount</i>;
|
||||
PCRE2_SPTR <i>input</i>;
|
||||
PCRE2_SPTR <i>output</i>;
|
||||
PCRE2_SIZE <i>*ovector</i>;
|
||||
PCRE2_SPTR <i>output</i>;
|
||||
PCRE2_SIZE <i>*ovector</i>;
|
||||
uint32_t <i>oveccount</i>;
|
||||
PCRE2_SIZE <i>output_offsets[2]</i>;
|
||||
</pre>
|
||||
|
@ -3517,9 +3517,9 @@ first callout, 2 for the second, and so on. The <i>input</i> and <i>output</i>
|
|||
pointers are copies of the values passed to <b>pcre2_substitute()</b>.
|
||||
</P>
|
||||
<P>
|
||||
The <i>ovector</i> field points to the ovector, which contains the result of the
|
||||
most recent match. The <i>oveccount</i> field contains the number of pairs that
|
||||
are set in the ovector, and is always greater than zero.
|
||||
The <i>ovector</i> field points to the ovector, which contains the result of the
|
||||
most recent match. The <i>oveccount</i> field contains the number of pairs that
|
||||
are set in the ovector, and is always greater than zero.
|
||||
</P>
|
||||
<P>
|
||||
The <i>output_offsets</i> vector contains the offsets of the replacement in the
|
||||
|
|
|
@ -376,12 +376,15 @@ environment.
|
|||
</P>
|
||||
<br><a name="SEC14" href="#TOC1">PCRE2GREP SUPPORT FOR EXTERNAL SCRIPTS</a><br>
|
||||
<P>
|
||||
By default, on non-Windows systems, <b>pcre2grep</b> supports the use of
|
||||
callouts with string arguments within the patterns it is matching, in order to
|
||||
run external scripts. For details, see the
|
||||
By default <b>pcre2grep</b> supports the use of callouts with string arguments
|
||||
within the patterns it is matching. There are two kinds: one that generates
|
||||
output using local code, and another that calls an external program or script.
|
||||
If --disable-pcre2grep-callout-fork is added to the <b>configure</b> command,
|
||||
only the first kind of callout is supported; if --disable-pcre2grep-callout is
|
||||
used, all callouts are completely ignored. For more details of <b>pcre2grep</b>
|
||||
callouts, see the
|
||||
<a href="pcre2grep.html"><b>pcre2grep</b></a>
|
||||
documentation. This support can be disabled by adding
|
||||
--disable-pcre2grep-callout to the <b>configure</b> command.
|
||||
documentation.
|
||||
</P>
|
||||
<br><a name="SEC15" href="#TOC1">PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT</a><br>
|
||||
<P>
|
||||
|
@ -526,14 +529,14 @@ documentation.
|
|||
</P>
|
||||
<br><a name="SEC21" href="#TOC1">DISABLING THE Z AND T FORMATTING MODIFIERS</a><br>
|
||||
<P>
|
||||
The C99 standard defines formatting modifiers z and t for size_t and
|
||||
ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in
|
||||
environments other than Microsoft Visual Studio when __STDC_VERSION__ is
|
||||
The C99 standard defines formatting modifiers z and t for size_t and
|
||||
ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in
|
||||
environments other than Microsoft Visual Studio when __STDC_VERSION__ is
|
||||
defined and has a value greater than or equal to 199901L (indicating C99).
|
||||
However, there is at least one environment that claims to be C99 but does not
|
||||
support these modifiers. If
|
||||
support these modifiers. If
|
||||
<pre>
|
||||
--disable-percent-zt
|
||||
--disable-percent-zt
|
||||
</pre>
|
||||
is specified, no use is made of the z or t modifiers. Instead or %td or %zu,
|
||||
%lu is used, with a cast for size_t values.
|
||||
|
@ -589,9 +592,9 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC26" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 15 November 2018
|
||||
Last updated: 03 March 2019
|
||||
<br>
|
||||
Copyright © 1997-2018 University of Cambridge.
|
||||
Copyright © 1997-2019 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
|
|
|
@ -48,7 +48,7 @@ When using the <b>pcre2_substitute()</b> function, an additional callout feature
|
|||
is available. This does a callout after each change to the subject string and
|
||||
is described in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
documentation; the rest of this document is concerned with callouts during
|
||||
documentation; the rest of this document is concerned with callouts during
|
||||
pattern matching.
|
||||
</P>
|
||||
<P>
|
||||
|
|
|
@ -871,8 +871,8 @@ only callouts with string arguments are useful.
|
|||
Calling external programs or scripts
|
||||
</b><br>
|
||||
<P>
|
||||
This facility can be independently disabled when <b>pcre2grep</b> is built. It
|
||||
is supported for Windows, where a call to <b>_spawnvp()</b> is used, for VMS,
|
||||
This facility can be independently disabled when <b>pcre2grep</b> is built. It
|
||||
is supported for Windows, where a call to <b>_spawnvp()</b> is used, for VMS,
|
||||
where <b>lib$spawn()</b> is used, and for any other Unix-like environment where
|
||||
<b>fork()</b> and <b>execv()</b> are available.
|
||||
</P>
|
||||
|
|
|
@ -418,13 +418,13 @@ two compile-time options. If PCRE2_ALT_BSUX is set, the sequence \x followed
|
|||
by { is not recognized. Only if \x is followed by two hexadecimal digits is it
|
||||
recognized as a character escape. Otherwise it is interpreted as a literal "x"
|
||||
character. In this mode, support for code points greater than 256 is provided
|
||||
by \u, which must be followed by four hexadecimal digits; otherwise it is
|
||||
by \u, which must be followed by four hexadecimal digits; otherwise it is
|
||||
interpreted as a literal "u" character.
|
||||
</P>
|
||||
<P>
|
||||
PCRE2_EXTRA_ALT_BSUX has the same effect as PCRE2_ALT_BSUX and, in addition,
|
||||
\u{hhh..} is recognized as the character specified by hexadecimal code point.
|
||||
There may be any number of hexadecimal digits. This syntax is from ECMAScript
|
||||
There may be any number of hexadecimal digits. This syntax is from ECMAScript
|
||||
6.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -1194,7 +1194,7 @@ character. If any other of these assertions appears in a character class, an
|
|||
A word boundary is a position in the subject string where the current character
|
||||
and the previous character do not both match \w or \W (i.e. one matches
|
||||
\w and the other matches \W), or the start or end of the string if the
|
||||
first or last character matches \w, respectively. When PCRE2 is built with
|
||||
first or last character matches \w, respectively. When PCRE2 is built with
|
||||
Unicode support, the meanings of \w and \W can be changed by setting the
|
||||
PCRE2_UCP option. When this is done, it also affects \b and \B. Neither PCRE2
|
||||
nor Perl has a separate "start of word" or "end of word" metasequence. However,
|
||||
|
|
|
@ -50,13 +50,13 @@ expression 8-bit library. There are no POSIX-style wrappers for PCRE2's 16-bit
|
|||
and 32-bit libraries. See the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
documentation for a description of PCRE2's native API, which contains much
|
||||
additional functionality.
|
||||
additional functionality.
|
||||
</P>
|
||||
<P>
|
||||
The functions described here are wrapper functions that ultimately call the
|
||||
PCRE2 native API. Their prototypes are defined in the <b>pcre2posix.h</b> header
|
||||
file, and they all have unique names starting with <b>pcre2_</b>. However, the
|
||||
<b>pcre2posix.h</b> header also contains macro definitions that convert the
|
||||
<b>pcre2posix.h</b> header also contains macro definitions that convert the
|
||||
standard POSIX names such <b>regcomp()</b> into <b>pcre2_regcomp()</b> etc. This
|
||||
means that a program can use the usual POSIX names without running the risk of
|
||||
accidentally linking with POSIX functions from a different library.
|
||||
|
@ -68,7 +68,7 @@ application. Because the POSIX functions call the native ones, it is also
|
|||
necessary to add <b>-lpcre2-8</b>.
|
||||
</P>
|
||||
<P>
|
||||
Although they are not defined as protypes in <b>pcre2posix.h</b>, the library
|
||||
Although they are not defined as protypes in <b>pcre2posix.h</b>, the library
|
||||
does contain functions with the POSIX names <b>regcomp()</b> etc. These simply
|
||||
pass their arguments to the PCRE2 functions. These functions are provided for
|
||||
backwards compatibility with earlier versions of PCRE2, so that existing
|
||||
|
|
|
@ -58,7 +58,7 @@ documentation. This document contains a quick-reference summary of the syntax.
|
|||
</P>
|
||||
<br><a name="SEC3" href="#TOC1">ESCAPED CHARACTERS</a><br>
|
||||
<P>
|
||||
This table applies to ASCII and Unicode environments. An unrecognized escape
|
||||
This table applies to ASCII and Unicode environments. An unrecognized escape
|
||||
sequence causes an error.
|
||||
<pre>
|
||||
\a alarm, that is, the BEL character (hex 07)
|
||||
|
@ -85,7 +85,7 @@ following are also recognized:
|
|||
When \x is not followed by {, from zero to two hexadecimal digits are read,
|
||||
but in ALT_BSUX mode \x must be followed by two hexadecimal digits to be
|
||||
recognized as a hexadecimal escape; otherwise it matches a literal "x".
|
||||
Likewise, if \u (in ALT_BSUX mode) is not followed by four hexadecimal digits
|
||||
Likewise, if \u (in ALT_BSUX mode) is not followed by four hexadecimal digits
|
||||
or (in EXTRA_ALT_BSUX mode) a sequence of hex digits in curly brackets, it
|
||||
matches a literal "u".
|
||||
</P>
|
||||
|
|
|
@ -606,10 +606,10 @@ for a description of the effects of these options.
|
|||
/s dotall set PCRE2_DOTALL
|
||||
dupnames set PCRE2_DUPNAMES
|
||||
endanchored set PCRE2_ENDANCHORED
|
||||
escaped_cr_is_lf set PCRE2_EXTRA_ESCAPED_CR_IS_LF
|
||||
escaped_cr_is_lf set PCRE2_EXTRA_ESCAPED_CR_IS_LF
|
||||
/x extended set PCRE2_EXTENDED
|
||||
/xx extended_more set PCRE2_EXTENDED_MORE
|
||||
extra_alt_bsux set PCRE2_EXTRA_ALT_BSUX
|
||||
extra_alt_bsux set PCRE2_EXTRA_ALT_BSUX
|
||||
firstline set PCRE2_FIRSTLINE
|
||||
literal set PCRE2_LITERAL
|
||||
match_line set PCRE2_EXTRA_MATCH_LINE
|
||||
|
@ -1043,7 +1043,7 @@ process.
|
|||
aftertext show text after match
|
||||
allaftertext show text after captures
|
||||
allcaptures show all captures
|
||||
allvector show the entire ovector
|
||||
allvector show the entire ovector
|
||||
allusedtext show all consulted text
|
||||
altglobal alternative global matching
|
||||
/g global global matching
|
||||
|
@ -1051,9 +1051,9 @@ process.
|
|||
mark show mark values
|
||||
replace=<string> specify a replacement string
|
||||
startchar show starting character when relevant
|
||||
substitute_callout use substitution callouts
|
||||
substitute_callout use substitution callouts
|
||||
substitute_extended use PCRE2_SUBSTITUTE_EXTENDED
|
||||
substitute_skip=<n> skip substitution number n
|
||||
substitute_skip=<n> skip substitution number n
|
||||
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||
substitute_stop=<n> skip substitution number n and greater
|
||||
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||
|
@ -1191,7 +1191,7 @@ pattern.
|
|||
aftertext show text after match
|
||||
allaftertext show text after captures
|
||||
allcaptures show all captures
|
||||
allvector show the entire ovector
|
||||
allvector show the entire ovector
|
||||
allusedtext show all consulted text (non-JIT only)
|
||||
altglobal alternative global matching
|
||||
callout_capture show captures at callout time
|
||||
|
@ -1221,9 +1221,9 @@ pattern.
|
|||
replace=<string> specify a replacement string
|
||||
startchar show startchar when relevant
|
||||
startoffset=<n> same as offset=<n>
|
||||
substitute_callout use substitution callouts
|
||||
substitute_callout use substitution callouts
|
||||
substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED
|
||||
substitute_skip=<n> skip substitution number n
|
||||
substitute_skip=<n> skip substitution number n
|
||||
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||
substitute_stop=<n> skip substitution number n and greater
|
||||
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||
|
@ -1306,9 +1306,9 @@ result, and also for DFA matching, provides a means of checking that there are
|
|||
no unexpected modifications to ovector fields. Before each match attempt, the
|
||||
ovector is filled with a special value, and if this is found in both elements
|
||||
of a capturing pair, "<unchanged>" is output. After a successful match, this
|
||||
applies to all groups after the maximum capture group for the pattern. In other
|
||||
cases it applies to the entire ovector. After a partial match, the first two
|
||||
elements are the only ones that should be set. After a DFA match, the amount of
|
||||
applies to all groups after the maximum capture group for the pattern. In other
|
||||
cases it applies to the entire ovector. After a partial match, the first two
|
||||
elements are the only ones that should be set. After a DFA match, the amount of
|
||||
ovector that is used depends on the number of matches that were found.
|
||||
</P>
|
||||
<br><b>
|
||||
|
@ -1320,7 +1320,7 @@ functions, unless <b>callout_none</b> is specified. Its behaviour can be
|
|||
controlled by various modifiers listed above whose names begin with
|
||||
<b>callout_</b>. Details are given in the section entitled "Callouts"
|
||||
<a href="#callouts">below.</a>
|
||||
Testing callouts from <b>pcre2_substitute()</b> is decribed separately in
|
||||
Testing callouts from <b>pcre2_substitute()</b> is decribed separately in
|
||||
"Testing the substitution function"
|
||||
<a href="#substitution">below.</a>
|
||||
</P>
|
||||
|
@ -1449,14 +1449,14 @@ matching provokes an error return ("bad option value") from
|
|||
Testing substitute callouts
|
||||
</b><br>
|
||||
<P>
|
||||
If the <b>substitute_callout</b> modifier is set, a substitution callout
|
||||
If the <b>substitute_callout</b> modifier is set, a substitution callout
|
||||
function is set up. When it is called (after each substitution), details of the
|
||||
the input and output strings are output. For example:
|
||||
<pre>
|
||||
/abc/g,replace=<$0>,substitute_callout
|
||||
abcdefabcpqr
|
||||
1(1) Old 0 3 "abc" New 0 5 "<abc>"
|
||||
2(1) Old 6 9 "abc" New 8 13 "<abc>"
|
||||
2(1) Old 6 9 "abc" New 8 13 "<abc>"
|
||||
2: <abc>def<abc>pqr
|
||||
</pre>
|
||||
The first number on each callout line is the count of matches. The
|
||||
|
@ -1466,11 +1466,11 @@ listed the offsets of the old substring, its contents, and the same for the
|
|||
replacement.
|
||||
</P>
|
||||
<P>
|
||||
By default, the substitution callout function returns zero, which accepts the
|
||||
replacement and causes matching to continue if /g was used. Two further
|
||||
modifiers can be used to test other return values. If <b>substitute_skip</b> is
|
||||
set to a value greater than zero the callout function returns +1 for the match
|
||||
of that number, and similarly <b>substitute_stop</b> returns -1. These cause the
|
||||
By default, the substitution callout function returns zero, which accepts the
|
||||
replacement and causes matching to continue if /g was used. Two further
|
||||
modifiers can be used to test other return values. If <b>substitute_skip</b> is
|
||||
set to a value greater than zero the callout function returns +1 for the match
|
||||
of that number, and similarly <b>substitute_stop</b> returns -1. These cause the
|
||||
replacement to be rejected, and -1 causes no further matching to take place. If
|
||||
either of them are set, <b>substitute_callout</b> is assumed. For example:
|
||||
<pre>
|
||||
|
@ -1483,7 +1483,7 @@ either of them are set, <b>substitute_callout</b> is assumed. For example:
|
|||
1(1) Old 0 3 "abc" New 0 5 "<abc> STOPPED"
|
||||
1: abcdefabcpqr
|
||||
</pre>
|
||||
If both are set for the same number, stop takes precedence. Only a single skip
|
||||
If both are set for the same number, stop takes precedence. Only a single skip
|
||||
or stop is supported, which is sufficient for testing that the feature works.
|
||||
</P>
|
||||
<br><b>
|
||||
|
|
|
@ -82,7 +82,7 @@ The escape sequence \C can be used to match a single code unit in a UTF mode,
|
|||
but its use can lead to some strange effects because it breaks up multi-unit
|
||||
characters (see the description of \C in the
|
||||
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
|
||||
documentation). For this reason, there is a build-time option that disables
|
||||
documentation). For this reason, there is a build-time option that disables
|
||||
support for \C completely. There is also a less draconian compile-time option
|
||||
for locking out the use of \C when a pattern is compiled.
|
||||
</P>
|
||||
|
@ -144,14 +144,14 @@ scripts are commonly used together, and because some diacritical and other
|
|||
marks are used with multiple scripts, it is not that simple.
|
||||
</P>
|
||||
<P>
|
||||
Every Unicode character has a Script property, mostly with a value
|
||||
Every Unicode character has a Script property, mostly with a value
|
||||
corresponding to the name of a script, such as Latin, Greek, or Cyrillic. There
|
||||
are also three special values:
|
||||
</P>
|
||||
<P>
|
||||
"Unknown" is used for code points that have not been assigned, and also for the
|
||||
surrogate code points. In the PCRE2 32-bit library, characters whose code
|
||||
points are greater than the Unicode maximum (U+10FFFF), which are accessible
|
||||
points are greater than the Unicode maximum (U+10FFFF), which are accessible
|
||||
only in non-UTF mode, are assigned the Unknown script.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -165,20 +165,20 @@ previous character. These are considered to take on the script of the character
|
|||
that they modify.
|
||||
</P>
|
||||
<P>
|
||||
Some Inherited characters are used with many scripts, but many of them are only
|
||||
normally used with a small number of scripts. For example, U+102E0 (Coptic
|
||||
Epact thousands mark) is used only with Arabic and Coptic. In order to make it
|
||||
possible to check this, a Unicode property called Script Extension exists. Its
|
||||
value is a list of scripts that apply to the character. For the majority of
|
||||
Some Inherited characters are used with many scripts, but many of them are only
|
||||
normally used with a small number of scripts. For example, U+102E0 (Coptic
|
||||
Epact thousands mark) is used only with Arabic and Coptic. In order to make it
|
||||
possible to check this, a Unicode property called Script Extension exists. Its
|
||||
value is a list of scripts that apply to the character. For the majority of
|
||||
characters, the list contains just one script, the same one as the Script
|
||||
property. However, for characters such as U+102E0 more than one Script is
|
||||
listed. There are also some Common characters that have a single, non-Common
|
||||
script in their Script Extension list.
|
||||
</P>
|
||||
<P>
|
||||
The next section describes the basic rules for deciding whether a given string
|
||||
of characters is a script run. Note, however, that there are some special cases
|
||||
involving the Chinese Han script, and an additional constraint for decimal
|
||||
The next section describes the basic rules for deciding whether a given string
|
||||
of characters is a script run. Note, however, that there are some special cases
|
||||
involving the Chinese Han script, and an additional constraint for decimal
|
||||
digits. These are covered in subsequent sections.
|
||||
</P>
|
||||
<br><b>
|
||||
|
@ -201,17 +201,17 @@ all the sets of scripts must not be empty.
|
|||
<P>
|
||||
A simple example is an Internet name such as "google.com". The letters are all
|
||||
in the Latin script, and the dot is Common, so this string is a script run.
|
||||
However, the Cyrillic letter "o" looks exactly the same as the Latin "o"; a
|
||||
However, the Cyrillic letter "o" looks exactly the same as the Latin "o"; a
|
||||
string that looks the same, but with Cyrillic "o"s is not a script run.
|
||||
</P>
|
||||
<P>
|
||||
More interesting examples involve characters with more than one script in their
|
||||
More interesting examples involve characters with more than one script in their
|
||||
Script Extension. Consider the following characters:
|
||||
<pre>
|
||||
U+060C Arabic comma
|
||||
U+06D4 Arabic full stop
|
||||
</pre>
|
||||
The first has the Script Extension list Arabic, Hanifi Rohingya, Syriac, and
|
||||
The first has the Script Extension list Arabic, Hanifi Rohingya, Syriac, and
|
||||
Thaana; the second has just Arabic and Hanifi Rohingya. Both of them could
|
||||
appear in script runs of either Arabic or Hanifi Rohingya. The first could also
|
||||
appear in Syriac or Thaana script runs, but the second could not.
|
||||
|
@ -220,8 +220,8 @@ appear in Syriac or Thaana script runs, but the second could not.
|
|||
The Chinese Han script
|
||||
</b><br>
|
||||
<P>
|
||||
The Chinese Han script is commonly used in conjunction with other scripts for
|
||||
writing certain languages. Japanese uses the Hiragana and Katakana scripts
|
||||
The Chinese Han script is commonly used in conjunction with other scripts for
|
||||
writing certain languages. Japanese uses the Hiragana and Katakana scripts
|
||||
together with Han; Korean uses Hangul and Han; Taiwanese Mandarin uses Bopomofo
|
||||
and Han. These three combinations are treated as special cases when checking
|
||||
script runs and are, in effect, "virtual scripts". Thus, a script run may
|
||||
|
|
183
doc/pcre2.txt
183
doc/pcre2.txt
|
@ -180,8 +180,8 @@ REVISION
|
|||
Last updated: 17 September 2018
|
||||
Copyright (c) 1997-2018 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2API(3) Library Functions Manual PCRE2API(3)
|
||||
|
||||
|
||||
|
@ -3681,8 +3681,8 @@ REVISION
|
|||
Last updated: 14 February 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2BUILD(3) Library Functions Manual PCRE2BUILD(3)
|
||||
|
||||
|
||||
|
@ -4027,45 +4027,48 @@ USING EBCDIC CODE
|
|||
|
||||
PCRE2GREP SUPPORT FOR EXTERNAL SCRIPTS
|
||||
|
||||
By default, on non-Windows systems, pcre2grep supports the use of call-
|
||||
outs with string arguments within the patterns it is matching, in order
|
||||
to run external scripts. For details, see the pcre2grep documentation.
|
||||
This support can be disabled by adding --disable-pcre2grep-callout to
|
||||
the configure command.
|
||||
By default pcre2grep supports the use of callouts with string arguments
|
||||
within the patterns it is matching. There are two kinds: one that gen-
|
||||
erates output using local code, and another that calls an external pro-
|
||||
gram or script. If --disable-pcre2grep-callout-fork is added to the
|
||||
configure command, only the first kind of callout is supported; if
|
||||
--disable-pcre2grep-callout is used, all callouts are completely
|
||||
ignored. For more details of pcre2grep callouts, see the pcre2grep doc-
|
||||
umentation.
|
||||
|
||||
|
||||
PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT
|
||||
|
||||
By default, pcre2grep reads all files as plain text. You can build it
|
||||
so that it recognizes files whose names end in .gz or .bz2, and reads
|
||||
By default, pcre2grep reads all files as plain text. You can build it
|
||||
so that it recognizes files whose names end in .gz or .bz2, and reads
|
||||
them with libz or libbz2, respectively, by adding one or both of
|
||||
|
||||
--enable-pcre2grep-libz
|
||||
--enable-pcre2grep-libbz2
|
||||
|
||||
to the configure command. These options naturally require that the rel-
|
||||
evant libraries are installed on your system. Configuration will fail
|
||||
evant libraries are installed on your system. Configuration will fail
|
||||
if they are not.
|
||||
|
||||
|
||||
PCRE2GREP BUFFER SIZE
|
||||
|
||||
pcre2grep uses an internal buffer to hold a "window" on the file it is
|
||||
pcre2grep uses an internal buffer to hold a "window" on the file it is
|
||||
scanning, in order to be able to output "before" and "after" lines when
|
||||
it finds a match. The default starting size of the buffer is 20KiB. The
|
||||
buffer itself is three times this size, but because of the way it is
|
||||
buffer itself is three times this size, but because of the way it is
|
||||
used for holding "before" lines, the longest line that is guaranteed to
|
||||
be processable is the notional buffer size. If a longer line is encoun-
|
||||
tered, pcre2grep automatically expands the buffer, up to a specified
|
||||
maximum size, whose default is 1MiB or the starting size, whichever is
|
||||
the larger. You can change the default parameter values by adding, for
|
||||
tered, pcre2grep automatically expands the buffer, up to a specified
|
||||
maximum size, whose default is 1MiB or the starting size, whichever is
|
||||
the larger. You can change the default parameter values by adding, for
|
||||
example,
|
||||
|
||||
--with-pcre2grep-bufsize=51200
|
||||
--with-pcre2grep-max-bufsize=2097152
|
||||
|
||||
to the configure command. The caller of pcre2grep can override these
|
||||
values by using --buffer-size and --max-buffer-size on the command
|
||||
to the configure command. The caller of pcre2grep can override these
|
||||
values by using --buffer-size and --max-buffer-size on the command
|
||||
line.
|
||||
|
||||
|
||||
|
@ -4076,26 +4079,26 @@ PCRE2TEST OPTION FOR LIBREADLINE SUPPORT
|
|||
--enable-pcre2test-libreadline
|
||||
--enable-pcre2test-libedit
|
||||
|
||||
to the configure command, pcre2test is linked with the libreadline
|
||||
to the configure command, pcre2test is linked with the libreadline
|
||||
orlibedit library, respectively, and when its input is from a terminal,
|
||||
it reads it using the readline() function. This provides line-editing
|
||||
and history facilities. Note that libreadline is GPL-licensed, so if
|
||||
you distribute a binary of pcre2test linked in this way, there may be
|
||||
it reads it using the readline() function. This provides line-editing
|
||||
and history facilities. Note that libreadline is GPL-licensed, so if
|
||||
you distribute a binary of pcre2test linked in this way, there may be
|
||||
licensing issues. These can be avoided by linking instead with libedit,
|
||||
which has a BSD licence.
|
||||
|
||||
Setting --enable-pcre2test-libreadline causes the -lreadline option to
|
||||
be added to the pcre2test build. In many operating environments with a
|
||||
sytem-installed readline library this is sufficient. However, in some
|
||||
Setting --enable-pcre2test-libreadline causes the -lreadline option to
|
||||
be added to the pcre2test build. In many operating environments with a
|
||||
sytem-installed readline library this is sufficient. However, in some
|
||||
environments (e.g. if an unmodified distribution version of readline is
|
||||
in use), some extra configuration may be necessary. The INSTALL file
|
||||
in use), some extra configuration may be necessary. The INSTALL file
|
||||
for libreadline says this:
|
||||
|
||||
"Readline uses the termcap functions, but does not link with
|
||||
the termcap or curses library itself, allowing applications
|
||||
which link with readline the to choose an appropriate library."
|
||||
|
||||
If your environment has not been set up so that an appropriate library
|
||||
If your environment has not been set up so that an appropriate library
|
||||
is automatically included, you may need to add something like
|
||||
|
||||
LIBS="-ncurses"
|
||||
|
@ -4109,7 +4112,7 @@ INCLUDING DEBUGGING CODE
|
|||
|
||||
--enable-debug
|
||||
|
||||
to the configure command, additional debugging code is included in the
|
||||
to the configure command, additional debugging code is included in the
|
||||
build. This feature is intended for use by the PCRE2 maintainers.
|
||||
|
||||
|
||||
|
@ -4119,15 +4122,15 @@ DEBUGGING WITH VALGRIND SUPPORT
|
|||
|
||||
--enable-valgrind
|
||||
|
||||
to the configure command, PCRE2 will use valgrind annotations to mark
|
||||
certain memory regions as unaddressable. This allows it to detect
|
||||
invalid memory accesses, and is mostly useful for debugging PCRE2
|
||||
to the configure command, PCRE2 will use valgrind annotations to mark
|
||||
certain memory regions as unaddressable. This allows it to detect
|
||||
invalid memory accesses, and is mostly useful for debugging PCRE2
|
||||
itself.
|
||||
|
||||
|
||||
CODE COVERAGE REPORTING
|
||||
|
||||
If your C compiler is gcc, you can build a version of PCRE2 that can
|
||||
If your C compiler is gcc, you can build a version of PCRE2 that can
|
||||
generate a code coverage report for its test suite. To enable this, you
|
||||
must install lcov version 1.6 or above. Then specify
|
||||
|
||||
|
@ -4136,20 +4139,20 @@ CODE COVERAGE REPORTING
|
|||
to the configure command and build PCRE2 in the usual way.
|
||||
|
||||
Note that using ccache (a caching C compiler) is incompatible with code
|
||||
coverage reporting. If you have configured ccache to run automatically
|
||||
coverage reporting. If you have configured ccache to run automatically
|
||||
on your system, you must set the environment variable
|
||||
|
||||
CCACHE_DISABLE=1
|
||||
|
||||
before running make to build PCRE2, so that ccache is not used.
|
||||
|
||||
When --enable-coverage is used, the following addition targets are
|
||||
When --enable-coverage is used, the following addition targets are
|
||||
added to the Makefile:
|
||||
|
||||
make coverage
|
||||
|
||||
This creates a fresh coverage report for the PCRE2 test suite. It is
|
||||
equivalent to running "make coverage-reset", "make coverage-baseline",
|
||||
This creates a fresh coverage report for the PCRE2 test suite. It is
|
||||
equivalent to running "make coverage-reset", "make coverage-baseline",
|
||||
"make check", and then "make coverage-report".
|
||||
|
||||
make coverage-reset
|
||||
|
@ -4166,28 +4169,28 @@ CODE COVERAGE REPORTING
|
|||
|
||||
make coverage-clean-report
|
||||
|
||||
This removes the generated coverage report without cleaning the cover-
|
||||
This removes the generated coverage report without cleaning the cover-
|
||||
age data itself.
|
||||
|
||||
make coverage-clean-data
|
||||
|
||||
This removes the captured coverage data without removing the coverage
|
||||
This removes the captured coverage data without removing the coverage
|
||||
files created at compile time (*.gcno).
|
||||
|
||||
make coverage-clean
|
||||
|
||||
This cleans all coverage data including the generated coverage report.
|
||||
For more information about code coverage, see the gcov and lcov docu-
|
||||
This cleans all coverage data including the generated coverage report.
|
||||
For more information about code coverage, see the gcov and lcov docu-
|
||||
mentation.
|
||||
|
||||
|
||||
DISABLING THE Z AND T FORMATTING MODIFIERS
|
||||
|
||||
The C99 standard defines formatting modifiers z and t for size_t and
|
||||
ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers
|
||||
in environments other than Microsoft Visual Studio when __STDC_VER-
|
||||
SION__ is defined and has a value greater than or equal to 199901L
|
||||
(indicating C99). However, there is at least one environment that
|
||||
The C99 standard defines formatting modifiers z and t for size_t and
|
||||
ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers
|
||||
in environments other than Microsoft Visual Studio when __STDC_VER-
|
||||
SION__ is defined and has a value greater than or equal to 199901L
|
||||
(indicating C99). However, there is at least one environment that
|
||||
claims to be C99 but does not support these modifiers. If
|
||||
|
||||
--disable-percent-zt
|
||||
|
@ -4198,39 +4201,39 @@ DISABLING THE Z AND T FORMATTING MODIFIERS
|
|||
|
||||
SUPPORT FOR FUZZERS
|
||||
|
||||
There is a special option for use by people who want to run fuzzing
|
||||
There is a special option for use by people who want to run fuzzing
|
||||
tests on PCRE2:
|
||||
|
||||
--enable-fuzz-support
|
||||
|
||||
At present this applies only to the 8-bit library. If set, it causes an
|
||||
extra library called libpcre2-fuzzsupport.a to be built, but not
|
||||
installed. This contains a single function called LLVMFuzzerTestOneIn-
|
||||
put() whose arguments are a pointer to a string and the length of the
|
||||
string. When called, this function tries to compile the string as a
|
||||
pattern, and if that succeeds, to match it. This is done both with no
|
||||
options and with some random options bits that are generated from the
|
||||
extra library called libpcre2-fuzzsupport.a to be built, but not
|
||||
installed. This contains a single function called LLVMFuzzerTestOneIn-
|
||||
put() whose arguments are a pointer to a string and the length of the
|
||||
string. When called, this function tries to compile the string as a
|
||||
pattern, and if that succeeds, to match it. This is done both with no
|
||||
options and with some random options bits that are generated from the
|
||||
string.
|
||||
|
||||
Setting --enable-fuzz-support also causes a binary called pcre2fuz-
|
||||
zcheck to be created. This is normally run under valgrind or used when
|
||||
Setting --enable-fuzz-support also causes a binary called pcre2fuz-
|
||||
zcheck to be created. This is normally run under valgrind or used when
|
||||
PCRE2 is compiled with address sanitizing enabled. It calls the fuzzing
|
||||
function and outputs information about what it is doing. The input
|
||||
strings are specified by arguments: if an argument starts with "=" the
|
||||
rest of it is a literal input string. Otherwise, it is assumed to be a
|
||||
function and outputs information about what it is doing. The input
|
||||
strings are specified by arguments: if an argument starts with "=" the
|
||||
rest of it is a literal input string. Otherwise, it is assumed to be a
|
||||
file name, and the contents of the file are the test string.
|
||||
|
||||
|
||||
OBSOLETE OPTION
|
||||
|
||||
In versions of PCRE2 prior to 10.30, there were two ways of handling
|
||||
backtracking in the pcre2_match() function. The default was to use the
|
||||
In versions of PCRE2 prior to 10.30, there were two ways of handling
|
||||
backtracking in the pcre2_match() function. The default was to use the
|
||||
system stack, but if
|
||||
|
||||
--disable-stack-for-recursion
|
||||
|
||||
was set, memory on the heap was used. From release 10.30 onwards this
|
||||
has changed (the stack is no longer used) and this option now does
|
||||
was set, memory on the heap was used. From release 10.30 onwards this
|
||||
has changed (the stack is no longer used) and this option now does
|
||||
nothing except give a warning.
|
||||
|
||||
|
||||
|
@ -4248,11 +4251,11 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 15 November 2018
|
||||
Copyright (c) 1997-2018 University of Cambridge.
|
||||
Last updated: 03 March 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2CALLOUT(3) Library Functions Manual PCRE2CALLOUT(3)
|
||||
|
||||
|
||||
|
@ -4682,8 +4685,8 @@ REVISION
|
|||
Last updated: 03 February 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2COMPAT(3) Library Functions Manual PCRE2COMPAT(3)
|
||||
|
||||
|
||||
|
@ -4887,8 +4890,8 @@ REVISION
|
|||
Last updated: 12 February 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2JIT(3) Library Functions Manual PCRE2JIT(3)
|
||||
|
||||
|
||||
|
@ -5287,8 +5290,8 @@ REVISION
|
|||
Last updated: 16 October 2018
|
||||
Copyright (c) 1997-2018 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2LIMITS(3) Library Functions Manual PCRE2LIMITS(3)
|
||||
|
||||
|
||||
|
@ -5357,8 +5360,8 @@ REVISION
|
|||
Last updated: 02 February 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2MATCHING(3) Library Functions Manual PCRE2MATCHING(3)
|
||||
|
||||
|
||||
|
@ -5578,8 +5581,8 @@ REVISION
|
|||
Last updated: 10 October 2018
|
||||
Copyright (c) 1997-2018 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2PARTIAL(3) Library Functions Manual PCRE2PARTIAL(3)
|
||||
|
||||
|
||||
|
@ -6018,8 +6021,8 @@ REVISION
|
|||
Last updated: 22 December 2014
|
||||
Copyright (c) 1997-2014 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2PATTERN(3) Library Functions Manual PCRE2PATTERN(3)
|
||||
|
||||
|
||||
|
@ -9362,8 +9365,8 @@ REVISION
|
|||
Last updated: 12 February 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2PERFORM(3) Library Functions Manual PCRE2PERFORM(3)
|
||||
|
||||
|
||||
|
@ -9597,8 +9600,8 @@ REVISION
|
|||
Last updated: 03 February 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2POSIX(3) Library Functions Manual PCRE2POSIX(3)
|
||||
|
||||
|
||||
|
@ -9927,8 +9930,8 @@ REVISION
|
|||
Last updated: 30 January 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2SAMPLE(3) Library Functions Manual PCRE2SAMPLE(3)
|
||||
|
||||
|
||||
|
@ -10206,8 +10209,8 @@ REVISION
|
|||
Last updated: 27 June 2018
|
||||
Copyright (c) 1997-2018 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2SYNTAX(3) Library Functions Manual PCRE2SYNTAX(3)
|
||||
|
||||
|
||||
|
@ -10707,8 +10710,8 @@ REVISION
|
|||
Last updated: 11 February 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
PCRE2UNICODE(3) Library Functions Manual PCRE2UNICODE(3)
|
||||
|
||||
|
||||
|
@ -11079,5 +11082,5 @@ REVISION
|
|||
Last updated: 03 February 2019
|
||||
Copyright (c) 1997-2019 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -75,7 +75,7 @@ PCRE2_UTF, PCRE2_UCP and related options.
|
|||
.P
|
||||
Additional options may be set in the compile context via the
|
||||
.\" HREF
|
||||
\fBpcre2_set_compile_extra_options\fP
|
||||
\fBpcre2_set_compile_extra_options\fP
|
||||
.\"
|
||||
function.
|
||||
.P
|
||||
|
|
|
@ -40,7 +40,7 @@ characters. The options are:
|
|||
.sp
|
||||
PCRE2_ANCHORED Match only at the first position
|
||||
PCRE2_COPY_MATCHED_SUBJECT
|
||||
On success, make a private subject copy
|
||||
On success, make a private subject copy
|
||||
PCRE2_ENDANCHORED Pattern can match only at end of subject
|
||||
PCRE2_NOTBOL Subject is not the beginning of a line
|
||||
PCRE2_NOTEOL Subject is not the end of a line
|
||||
|
|
|
@ -49,7 +49,7 @@ terminated by a binary zero code unit. The options are:
|
|||
.sp
|
||||
PCRE2_ANCHORED Match only at the first position
|
||||
PCRE2_COPY_MATCHED_SUBJECT
|
||||
On success, make a private subject copy
|
||||
On success, make a private subject copy
|
||||
PCRE2_ENDANCHORED Pattern can match only at end of subject
|
||||
PCRE2_NOTBOL Subject string is not the beginning of a line
|
||||
PCRE2_NOTEOL Subject string is not the end of a line
|
||||
|
|
|
@ -18,7 +18,7 @@ If \fImatch_data\fP is NULL, this function does nothing. Otherwise,
|
|||
using the memory freeing function from the general context or compiled pattern
|
||||
with which it was created, or \fBfree()\fP if that was not set.
|
||||
.P
|
||||
If the PCRE2_COPY_MATCHED_SUBJECT was used for a successful match using this
|
||||
If the PCRE2_COPY_MATCHED_SUBJECT was used for a successful match using this
|
||||
match data block, the copy of the subject that was remembered with the block is
|
||||
also freed.
|
||||
.P
|
||||
|
|
|
@ -23,7 +23,7 @@ options are:
|
|||
in UTF-8 and UTF-32 modes
|
||||
.\" JOIN
|
||||
PCRE2_EXTRA_ALT_BSUX Extended alternate \eu, \eU, and \ex
|
||||
handling
|
||||
handling
|
||||
.\" JOIN
|
||||
PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL Treat all invalid escapes as
|
||||
a literal following character
|
||||
|
|
|
@ -247,7 +247,7 @@ document for an overview of all the PCRE2 documentation.
|
|||
.sp
|
||||
.B const unsigned char *pcre2_maketables(pcre2_general_context *\fIgcontext\fP);
|
||||
.sp
|
||||
.B int pcre2_pattern_info(const pcre2_code *\fIcode\fP, uint32_t \fIwhat\fP,
|
||||
.B int pcre2_pattern_info(const pcre2_code *\fIcode\fP, uint32_t \fIwhat\fP,
|
||||
.B " void *\fIwhere\fP);"
|
||||
.sp
|
||||
.B int pcre2_callout_enumerate(const pcre2_code *\fIcode\fP,
|
||||
|
@ -1244,7 +1244,7 @@ until after all operations on the
|
|||
.\" </a>
|
||||
match data block
|
||||
.\"
|
||||
have taken place, unless, in the case of the subject string, you have used the
|
||||
have taken place, unless, in the case of the subject string, you have used the
|
||||
PCRE2_COPY_MATCHED_SUBJECT option, which is described in the section entitled
|
||||
"Option bits for \fBpcre2_match()\fP"
|
||||
.\" HTML <a href="#matchoptions>">
|
||||
|
@ -1375,8 +1375,8 @@ the PCRE2_EXTRA_ALT_BSUX extra option (see "Extra compile options"
|
|||
.\" </a>
|
||||
below).
|
||||
.\"
|
||||
Note that this alternative escape handling applies only to patterns. Neither of
|
||||
these options affects the processing of replacement strings passed to
|
||||
Note that this alternative escape handling applies only to patterns. Neither of
|
||||
these options affects the processing of replacement strings passed to
|
||||
\fBpcre2_substitute()\fP.
|
||||
.sp
|
||||
PCRE2_ALT_CIRCUMFLEX
|
||||
|
@ -1832,10 +1832,10 @@ characters if the matching function is called with PCRE2_NO_UTF_CHECK set.
|
|||
.sp
|
||||
PCRE2_EXTRA_ALT_BSUX
|
||||
.sp
|
||||
The original option PCRE2_ALT_BSUX causes PCRE2 to process \eU, \eu, and \ex in
|
||||
the way that ECMAscript (aka JavaScript) does. Additional functionality was
|
||||
defined by ECMAscript 6; setting PCRE2_EXTRA_ALT_BSUX has the effect of
|
||||
PCRE2_ALT_BSUX, but in addition it recognizes \eu{hhh..} as a hexadecimal
|
||||
The original option PCRE2_ALT_BSUX causes PCRE2 to process \eU, \eu, and \ex in
|
||||
the way that ECMAscript (aka JavaScript) does. Additional functionality was
|
||||
defined by ECMAscript 6; setting PCRE2_EXTRA_ALT_BSUX has the effect of
|
||||
PCRE2_ALT_BSUX, but in addition it recognizes \eu{hhh..} as a hexadecimal
|
||||
character code, where hhh.. is any number of hexadecimal digits.
|
||||
.sp
|
||||
PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL
|
||||
|
@ -1852,7 +1852,7 @@ If the PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL extra option is passed to
|
|||
\fBpcre2_compile()\fP, all unrecognized or malformed escape sequences are
|
||||
treated as single-character escapes. For example, \ej is a literal "j" and
|
||||
\ex{2z} is treated as the literal string "x{2z}". Setting this option means
|
||||
that typos in patterns may go undetected and have unexpected results. Also note
|
||||
that typos in patterns may go undetected and have unexpected results. Also note
|
||||
that a sequence such as [\eN{] is interpreted as a malformed attempt at
|
||||
[\eN{...}] and so is treated as [N{] whereas [\eN] gives an error because an
|
||||
unqualified \eN is a valid escape sequence but is not supported in a character
|
||||
|
@ -1860,9 +1860,9 @@ class. To reiterate: this is a dangerous option. Use with great care.
|
|||
.sp
|
||||
PCRE2_EXTRA_ESCAPED_CR_IS_LF
|
||||
.sp
|
||||
There are some legacy applications where the escape sequence \er in a pattern
|
||||
is expected to match a newline. If this option is set, \er in a pattern is
|
||||
converted to \en so that it matches a LF (linefeed) instead of a CR (carriage
|
||||
There are some legacy applications where the escape sequence \er in a pattern
|
||||
is expected to match a newline. If this option is set, \er in a pattern is
|
||||
converted to \en so that it matches a LF (linefeed) instead of a CR (carriage
|
||||
return) character. The option does not affect a literal CR in the pattern, nor
|
||||
does it affect CR specified as an explicit code point such as \ex{0D}.
|
||||
.sp
|
||||
|
@ -2547,7 +2547,7 @@ the use of .* with PCRE2_DOTALL, not by starting the pattern with ^ or \eA.
|
|||
.rs
|
||||
.sp
|
||||
The unused bits of the \fIoptions\fP argument for \fBpcre2_match()\fP must be
|
||||
zero. The only bits that may be set are PCRE2_ANCHORED,
|
||||
zero. The only bits that may be set are PCRE2_ANCHORED,
|
||||
PCRE2_COPY_MATCHED_SUBJECT, PCRE2_ENDANCHORED, PCRE2_NOTBOL, PCRE2_NOTEOL,
|
||||
PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_JIT, PCRE2_NO_UTF_CHECK,
|
||||
PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Their action is described below.
|
||||
|
@ -2567,8 +2567,8 @@ matching.
|
|||
.sp
|
||||
PCRE2_COPY_MATCHED_SUBJECT
|
||||
.sp
|
||||
By default, a pointer to the subject is remembered in the match data block so
|
||||
that, after a successful match, it can be referenced by the substring
|
||||
By default, a pointer to the subject is remembered in the match data block so
|
||||
that, after a successful match, it can be referenced by the substring
|
||||
extraction functions. This means that the subject's memory must not be freed
|
||||
until all such operations are complete. For some applications where the
|
||||
lifetime of the subject string is not guaranteed, it may be necessary to make a
|
||||
|
@ -2868,8 +2868,8 @@ undefined.
|
|||
.P
|
||||
After a successful match, a partial match (PCRE2_ERROR_PARTIAL), or a failure
|
||||
to match (PCRE2_ERROR_NOMATCH), a mark name may be available. The function
|
||||
\fBpcre2_get_mark()\fP can be called to access this name, which can be
|
||||
specified in the pattern by any of the backtracking control verbs, not just
|
||||
\fBpcre2_get_mark()\fP can be called to access this name, which can be
|
||||
specified in the pattern by any of the backtracking control verbs, not just
|
||||
(*MARK). The same function applies to all the verbs. It returns a pointer to
|
||||
the zero-terminated name, which is within the compiled pattern. If no name is
|
||||
available, NULL is returned. The length of the name (excluding the terminating
|
||||
|
@ -3016,7 +3016,7 @@ The backtracking match limit was reached.
|
|||
If a pattern contains many nested backtracking points, heap memory is used to
|
||||
remember them. This error is given when the memory allocation function (default
|
||||
or custom) fails. Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given
|
||||
if the amount of memory needed exceeds the heap limit. PCRE2_ERROR_NOMEMORY is
|
||||
if the amount of memory needed exceeds the heap limit. PCRE2_ERROR_NOMEMORY is
|
||||
also returned if PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
|
||||
.sp
|
||||
PCRE2_ERROR_NULL
|
||||
|
@ -3407,7 +3407,7 @@ capture groups and letters within \eQ...\eE quoted sequences.
|
|||
.P
|
||||
Note that case forcing sequences such as \eU...\eE do not nest. For example,
|
||||
the result of processing "\eUaa\eLBB\eEcc\eE" is "AAbbcc"; the final \eE has no
|
||||
effect. Note also that the PCRE2_ALT_BSUX and PCRE2_EXTRA_ALT_BSUX options do
|
||||
effect. Note also that the PCRE2_ALT_BSUX and PCRE2_EXTRA_ALT_BSUX options do
|
||||
not apply to not apply to replacement strings.
|
||||
.P
|
||||
The second effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more
|
||||
|
@ -3439,7 +3439,7 @@ The PCRE2_SUBSTITUTE_UNSET_EMPTY option does not affect these extended
|
|||
substitutions. However, PCRE2_SUBSTITUTE_UNKNOWN_UNSET does cause unknown
|
||||
groups in the extended syntax forms to be treated as unset.
|
||||
.P
|
||||
If successful, \fBpcre2_substitute()\fP returns the number of successful
|
||||
If successful, \fBpcre2_substitute()\fP returns the number of successful
|
||||
matches. This may be zero if no matches were found, and is never greater than 1
|
||||
unless PCRE2_SUBSTITUTE_GLOBAL is set.
|
||||
.P
|
||||
|
@ -3487,8 +3487,8 @@ above).
|
|||
.sp
|
||||
The \fBpcre2_set_substitution_callout()\fP function can be used to specify a
|
||||
callout function for \fBpcre2_substitute()\fP. This information is passed in
|
||||
a match context. The callout function is called after each substitution has
|
||||
been processed, but it can cause the replacement not to happen. The callout
|
||||
a match context. The callout function is called after each substitution has
|
||||
been processed, but it can cause the replacement not to happen. The callout
|
||||
function is not called for simulated substitutions that happen as a result of
|
||||
the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option.
|
||||
.P
|
||||
|
@ -3497,10 +3497,10 @@ block structure, which contains the following fields, not necessarily in this
|
|||
order:
|
||||
.sp
|
||||
uint32_t \fIversion\fP;
|
||||
uint32_t \fIsubscount\fP;
|
||||
uint32_t \fIsubscount\fP;
|
||||
PCRE2_SPTR \fIinput\fP;
|
||||
PCRE2_SPTR \fIoutput\fP;
|
||||
PCRE2_SIZE \fI*ovector\fP;
|
||||
PCRE2_SPTR \fIoutput\fP;
|
||||
PCRE2_SIZE \fI*ovector\fP;
|
||||
uint32_t \fIoveccount\fP;
|
||||
PCRE2_SIZE \fIoutput_offsets[2]\fP;
|
||||
.sp
|
||||
|
@ -3512,9 +3512,9 @@ The \fIsubscount\fP field is the number of the current match. It is 1 for the
|
|||
first callout, 2 for the second, and so on. The \fIinput\fP and \fIoutput\fP
|
||||
pointers are copies of the values passed to \fBpcre2_substitute()\fP.
|
||||
.P
|
||||
The \fIovector\fP field points to the ovector, which contains the result of the
|
||||
most recent match. The \fIoveccount\fP field contains the number of pairs that
|
||||
are set in the ovector, and is always greater than zero.
|
||||
The \fIovector\fP field points to the ovector, which contains the result of the
|
||||
most recent match. The \fIoveccount\fP field contains the number of pairs that
|
||||
are set in the ovector, and is always greater than zero.
|
||||
.P
|
||||
The \fIoutput_offsets\fP vector contains the offsets of the replacement in the
|
||||
output string. This has already been processed for dollar and (if requested)
|
||||
|
|
|
@ -33,7 +33,7 @@ is described in the
|
|||
.\" HREF
|
||||
\fBpcre2api\fP
|
||||
.\"
|
||||
documentation; the rest of this document is concerned with callouts during
|
||||
documentation; the rest of this document is concerned with callouts during
|
||||
pattern matching.
|
||||
.P
|
||||
Within a regular expression, (?C<arg>) indicates a point at which the external
|
||||
|
|
|
@ -778,8 +778,8 @@ only callouts with string arguments are useful.
|
|||
.SS "Calling external programs or scripts"
|
||||
.rs
|
||||
.sp
|
||||
This facility can be independently disabled when \fBpcre2grep\fP is built. It
|
||||
is supported for Windows, where a call to \fB_spawnvp()\fP is used, for VMS,
|
||||
This facility can be independently disabled when \fBpcre2grep\fP is built. It
|
||||
is supported for Windows, where a call to \fB_spawnvp()\fP is used, for VMS,
|
||||
where \fBlib$spawn()\fP is used, and for any other Unix-like environment where
|
||||
\fBfork()\fP and \fBexecv()\fP are available.
|
||||
.P
|
||||
|
|
|
@ -390,12 +390,12 @@ two compile-time options. If PCRE2_ALT_BSUX is set, the sequence \ex followed
|
|||
by { is not recognized. Only if \ex is followed by two hexadecimal digits is it
|
||||
recognized as a character escape. Otherwise it is interpreted as a literal "x"
|
||||
character. In this mode, support for code points greater than 256 is provided
|
||||
by \eu, which must be followed by four hexadecimal digits; otherwise it is
|
||||
by \eu, which must be followed by four hexadecimal digits; otherwise it is
|
||||
interpreted as a literal "u" character.
|
||||
.P
|
||||
PCRE2_EXTRA_ALT_BSUX has the same effect as PCRE2_ALT_BSUX and, in addition,
|
||||
\eu{hhh..} is recognized as the character specified by hexadecimal code point.
|
||||
There may be any number of hexadecimal digits. This syntax is from ECMAScript
|
||||
There may be any number of hexadecimal digits. This syntax is from ECMAScript
|
||||
6.
|
||||
.P
|
||||
The \eN{U+hhh..} escape sequence is recognized only when the PCRE2_UTF option
|
||||
|
@ -1188,7 +1188,7 @@ character. If any other of these assertions appears in a character class, an
|
|||
A word boundary is a position in the subject string where the current character
|
||||
and the previous character do not both match \ew or \eW (i.e. one matches
|
||||
\ew and the other matches \eW), or the start or end of the string if the
|
||||
first or last character matches \ew, respectively. When PCRE2 is built with
|
||||
first or last character matches \ew, respectively. When PCRE2 is built with
|
||||
Unicode support, the meanings of \ew and \eW can be changed by setting the
|
||||
PCRE2_UCP option. When this is done, it also affects \eb and \eB. Neither PCRE2
|
||||
nor Perl has a separate "start of word" or "end of word" metasequence. However,
|
||||
|
|
|
@ -29,12 +29,12 @@ and 32-bit libraries. See the
|
|||
\fBpcre2api\fP
|
||||
.\"
|
||||
documentation for a description of PCRE2's native API, which contains much
|
||||
additional functionality.
|
||||
additional functionality.
|
||||
.P
|
||||
The functions described here are wrapper functions that ultimately call the
|
||||
PCRE2 native API. Their prototypes are defined in the \fBpcre2posix.h\fP header
|
||||
file, and they all have unique names starting with \fBpcre2_\fP. However, the
|
||||
\fBpcre2posix.h\fP header also contains macro definitions that convert the
|
||||
\fBpcre2posix.h\fP header also contains macro definitions that convert the
|
||||
standard POSIX names such \fBregcomp()\fP into \fBpcre2_regcomp()\fP etc. This
|
||||
means that a program can use the usual POSIX names without running the risk of
|
||||
accidentally linking with POSIX functions from a different library.
|
||||
|
@ -44,7 +44,7 @@ can be accessed by adding \fB-lpcre2-posix\fP to the command for linking an
|
|||
application. Because the POSIX functions call the native ones, it is also
|
||||
necessary to add \fB-lpcre2-8\fP.
|
||||
.P
|
||||
Although they are not defined as protypes in \fBpcre2posix.h\fP, the library
|
||||
Although they are not defined as protypes in \fBpcre2posix.h\fP, the library
|
||||
does contain functions with the POSIX names \fBregcomp()\fP etc. These simply
|
||||
pass their arguments to the PCRE2 functions. These functions are provided for
|
||||
backwards compatibility with earlier versions of PCRE2, so that existing
|
||||
|
|
|
@ -22,7 +22,7 @@ documentation. This document contains a quick-reference summary of the syntax.
|
|||
.SH "ESCAPED CHARACTERS"
|
||||
.rs
|
||||
.sp
|
||||
This table applies to ASCII and Unicode environments. An unrecognized escape
|
||||
This table applies to ASCII and Unicode environments. An unrecognized escape
|
||||
sequence causes an error.
|
||||
.sp
|
||||
\ea alarm, that is, the BEL character (hex 07)
|
||||
|
@ -49,7 +49,7 @@ following are also recognized:
|
|||
When \ex is not followed by {, from zero to two hexadecimal digits are read,
|
||||
but in ALT_BSUX mode \ex must be followed by two hexadecimal digits to be
|
||||
recognized as a hexadecimal escape; otherwise it matches a literal "x".
|
||||
Likewise, if \eu (in ALT_BSUX mode) is not followed by four hexadecimal digits
|
||||
Likewise, if \eu (in ALT_BSUX mode) is not followed by four hexadecimal digits
|
||||
or (in EXTRA_ALT_BSUX mode) a sequence of hex digits in curly brackets, it
|
||||
matches a literal "u".
|
||||
.P
|
||||
|
|
|
@ -565,10 +565,10 @@ for a description of the effects of these options.
|
|||
/s dotall set PCRE2_DOTALL
|
||||
dupnames set PCRE2_DUPNAMES
|
||||
endanchored set PCRE2_ENDANCHORED
|
||||
escaped_cr_is_lf set PCRE2_EXTRA_ESCAPED_CR_IS_LF
|
||||
escaped_cr_is_lf set PCRE2_EXTRA_ESCAPED_CR_IS_LF
|
||||
/x extended set PCRE2_EXTENDED
|
||||
/xx extended_more set PCRE2_EXTENDED_MORE
|
||||
extra_alt_bsux set PCRE2_EXTRA_ALT_BSUX
|
||||
extra_alt_bsux set PCRE2_EXTRA_ALT_BSUX
|
||||
firstline set PCRE2_FIRSTLINE
|
||||
literal set PCRE2_LITERAL
|
||||
match_line set PCRE2_EXTRA_MATCH_LINE
|
||||
|
@ -1005,7 +1005,7 @@ process.
|
|||
aftertext show text after match
|
||||
allaftertext show text after captures
|
||||
allcaptures show all captures
|
||||
allvector show the entire ovector
|
||||
allvector show the entire ovector
|
||||
allusedtext show all consulted text
|
||||
altglobal alternative global matching
|
||||
/g global global matching
|
||||
|
@ -1013,9 +1013,9 @@ process.
|
|||
mark show mark values
|
||||
replace=<string> specify a replacement string
|
||||
startchar show starting character when relevant
|
||||
substitute_callout use substitution callouts
|
||||
substitute_callout use substitution callouts
|
||||
substitute_extended use PCRE2_SUBSTITUTE_EXTENDED
|
||||
substitute_skip=<n> skip substitution number n
|
||||
substitute_skip=<n> skip substitution number n
|
||||
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||
substitute_stop=<n> skip substitution number n and greater
|
||||
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||
|
@ -1160,7 +1160,7 @@ pattern.
|
|||
aftertext show text after match
|
||||
allaftertext show text after captures
|
||||
allcaptures show all captures
|
||||
allvector show the entire ovector
|
||||
allvector show the entire ovector
|
||||
allusedtext show all consulted text (non-JIT only)
|
||||
altglobal alternative global matching
|
||||
callout_capture show captures at callout time
|
||||
|
@ -1190,9 +1190,9 @@ pattern.
|
|||
replace=<string> specify a replacement string
|
||||
startchar show startchar when relevant
|
||||
startoffset=<n> same as offset=<n>
|
||||
substitute_callout use substitution callouts
|
||||
substitute_callout use substitution callouts
|
||||
substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED
|
||||
substitute_skip=<n> skip substitution number n
|
||||
substitute_skip=<n> skip substitution number n
|
||||
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||
substitute_stop=<n> skip substitution number n and greater
|
||||
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||
|
@ -1273,9 +1273,9 @@ result, and also for DFA matching, provides a means of checking that there are
|
|||
no unexpected modifications to ovector fields. Before each match attempt, the
|
||||
ovector is filled with a special value, and if this is found in both elements
|
||||
of a capturing pair, "<unchanged>" is output. After a successful match, this
|
||||
applies to all groups after the maximum capture group for the pattern. In other
|
||||
cases it applies to the entire ovector. After a partial match, the first two
|
||||
elements are the only ones that should be set. After a DFA match, the amount of
|
||||
applies to all groups after the maximum capture group for the pattern. In other
|
||||
cases it applies to the entire ovector. After a partial match, the first two
|
||||
elements are the only ones that should be set. After a DFA match, the amount of
|
||||
ovector that is used depends on the number of matches that were found.
|
||||
.
|
||||
.
|
||||
|
@ -1288,13 +1288,13 @@ controlled by various modifiers listed above whose names begin with
|
|||
\fBcallout_\fP. Details are given in the section entitled "Callouts"
|
||||
.\" HTML <a href="#callouts">
|
||||
.\" </a>
|
||||
below.
|
||||
below.
|
||||
.\"
|
||||
Testing callouts from \fBpcre2_substitute()\fP is decribed separately in
|
||||
Testing callouts from \fBpcre2_substitute()\fP is decribed separately in
|
||||
"Testing the substitution function"
|
||||
.\" HTML <a href="#substitution">
|
||||
.\" </a>
|
||||
below.
|
||||
below.
|
||||
.\"
|
||||
.
|
||||
.
|
||||
|
@ -1416,14 +1416,14 @@ matching provokes an error return ("bad option value") from
|
|||
.SS "Testing substitute callouts"
|
||||
.rs
|
||||
.sp
|
||||
If the \fBsubstitute_callout\fP modifier is set, a substitution callout
|
||||
If the \fBsubstitute_callout\fP modifier is set, a substitution callout
|
||||
function is set up. When it is called (after each substitution), details of the
|
||||
the input and output strings are output. For example:
|
||||
.sp
|
||||
/abc/g,replace=<$0>,substitute_callout
|
||||
abcdefabcpqr
|
||||
1(1) Old 0 3 "abc" New 0 5 "<abc>"
|
||||
2(1) Old 6 9 "abc" New 8 13 "<abc>"
|
||||
2(1) Old 6 9 "abc" New 8 13 "<abc>"
|
||||
2: <abc>def<abc>pqr
|
||||
.sp
|
||||
The first number on each callout line is the count of matches. The
|
||||
|
@ -1432,11 +1432,11 @@ is, one more than the number of capturing groups that were set). Then are
|
|||
listed the offsets of the old substring, its contents, and the same for the
|
||||
replacement.
|
||||
.P
|
||||
By default, the substitution callout function returns zero, which accepts the
|
||||
replacement and causes matching to continue if /g was used. Two further
|
||||
modifiers can be used to test other return values. If \fBsubstitute_skip\fP is
|
||||
set to a value greater than zero the callout function returns +1 for the match
|
||||
of that number, and similarly \fBsubstitute_stop\fP returns -1. These cause the
|
||||
By default, the substitution callout function returns zero, which accepts the
|
||||
replacement and causes matching to continue if /g was used. Two further
|
||||
modifiers can be used to test other return values. If \fBsubstitute_skip\fP is
|
||||
set to a value greater than zero the callout function returns +1 for the match
|
||||
of that number, and similarly \fBsubstitute_stop\fP returns -1. These cause the
|
||||
replacement to be rejected, and -1 causes no further matching to take place. If
|
||||
either of them are set, \fBsubstitute_callout\fP is assumed. For example:
|
||||
.sp
|
||||
|
@ -1449,7 +1449,7 @@ either of them are set, \fBsubstitute_callout\fP is assumed. For example:
|
|||
1(1) Old 0 3 "abc" New 0 5 "<abc> STOPPED"
|
||||
1: abcdefabcpqr
|
||||
.sp
|
||||
If both are set for the same number, stop takes precedence. Only a single skip
|
||||
If both are set for the same number, stop takes precedence. Only a single skip
|
||||
or stop is supported, which is sufficient for testing that the feature works.
|
||||
.
|
||||
.
|
||||
|
|
|
@ -72,7 +72,7 @@ characters (see the description of \eC in the
|
|||
.\" HREF
|
||||
\fBpcre2pattern\fP
|
||||
.\"
|
||||
documentation). For this reason, there is a build-time option that disables
|
||||
documentation). For this reason, there is a build-time option that disables
|
||||
support for \eC completely. There is also a less draconian compile-time option
|
||||
for locking out the use of \eC when a pattern is compiled.
|
||||
.P
|
||||
|
@ -135,13 +135,13 @@ characters that are all from the same Unicode script. However, because some
|
|||
scripts are commonly used together, and because some diacritical and other
|
||||
marks are used with multiple scripts, it is not that simple.
|
||||
.P
|
||||
Every Unicode character has a Script property, mostly with a value
|
||||
Every Unicode character has a Script property, mostly with a value
|
||||
corresponding to the name of a script, such as Latin, Greek, or Cyrillic. There
|
||||
are also three special values:
|
||||
.P
|
||||
"Unknown" is used for code points that have not been assigned, and also for the
|
||||
surrogate code points. In the PCRE2 32-bit library, characters whose code
|
||||
points are greater than the Unicode maximum (U+10FFFF), which are accessible
|
||||
points are greater than the Unicode maximum (U+10FFFF), which are accessible
|
||||
only in non-UTF mode, are assigned the Unknown script.
|
||||
.P
|
||||
"Common" is used for characters that are used with many scripts. These include
|
||||
|
@ -152,19 +152,19 @@ digits 0 to 9.
|
|||
previous character. These are considered to take on the script of the character
|
||||
that they modify.
|
||||
.P
|
||||
Some Inherited characters are used with many scripts, but many of them are only
|
||||
normally used with a small number of scripts. For example, U+102E0 (Coptic
|
||||
Epact thousands mark) is used only with Arabic and Coptic. In order to make it
|
||||
possible to check this, a Unicode property called Script Extension exists. Its
|
||||
value is a list of scripts that apply to the character. For the majority of
|
||||
Some Inherited characters are used with many scripts, but many of them are only
|
||||
normally used with a small number of scripts. For example, U+102E0 (Coptic
|
||||
Epact thousands mark) is used only with Arabic and Coptic. In order to make it
|
||||
possible to check this, a Unicode property called Script Extension exists. Its
|
||||
value is a list of scripts that apply to the character. For the majority of
|
||||
characters, the list contains just one script, the same one as the Script
|
||||
property. However, for characters such as U+102E0 more than one Script is
|
||||
listed. There are also some Common characters that have a single, non-Common
|
||||
script in their Script Extension list.
|
||||
.P
|
||||
The next section describes the basic rules for deciding whether a given string
|
||||
of characters is a script run. Note, however, that there are some special cases
|
||||
involving the Chinese Han script, and an additional constraint for decimal
|
||||
The next section describes the basic rules for deciding whether a given string
|
||||
of characters is a script run. Note, however, that there are some special cases
|
||||
involving the Chinese Han script, and an additional constraint for decimal
|
||||
digits. These are covered in subsequent sections.
|
||||
.
|
||||
.
|
||||
|
@ -185,16 +185,16 @@ all the sets of scripts must not be empty.
|
|||
.P
|
||||
A simple example is an Internet name such as "google.com". The letters are all
|
||||
in the Latin script, and the dot is Common, so this string is a script run.
|
||||
However, the Cyrillic letter "o" looks exactly the same as the Latin "o"; a
|
||||
However, the Cyrillic letter "o" looks exactly the same as the Latin "o"; a
|
||||
string that looks the same, but with Cyrillic "o"s is not a script run.
|
||||
.P
|
||||
More interesting examples involve characters with more than one script in their
|
||||
More interesting examples involve characters with more than one script in their
|
||||
Script Extension. Consider the following characters:
|
||||
.sp
|
||||
U+060C Arabic comma
|
||||
U+06D4 Arabic full stop
|
||||
.sp
|
||||
The first has the Script Extension list Arabic, Hanifi Rohingya, Syriac, and
|
||||
The first has the Script Extension list Arabic, Hanifi Rohingya, Syriac, and
|
||||
Thaana; the second has just Arabic and Hanifi Rohingya. Both of them could
|
||||
appear in script runs of either Arabic or Hanifi Rohingya. The first could also
|
||||
appear in Syriac or Thaana script runs, but the second could not.
|
||||
|
@ -202,9 +202,9 @@ appear in Syriac or Thaana script runs, but the second could not.
|
|||
.
|
||||
.SS "The Chinese Han script"
|
||||
.rs
|
||||
.sp
|
||||
The Chinese Han script is commonly used in conjunction with other scripts for
|
||||
writing certain languages. Japanese uses the Hiragana and Katakana scripts
|
||||
.sp
|
||||
The Chinese Han script is commonly used in conjunction with other scripts for
|
||||
writing certain languages. Japanese uses the Hiragana and Katakana scripts
|
||||
together with Han; Korean uses Hangul and Han; Taiwanese Mandarin uses Bopomofo
|
||||
and Han. These three combinations are treated as special cases when checking
|
||||
script runs and are, in effect, "virtual scripts". Thus, a script run may
|
||||
|
|
|
@ -29,7 +29,7 @@ if [ $# -gt 1 -a "$1" = "-perl" ] ; then
|
|||
shift
|
||||
perl=$1
|
||||
shift
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ $# -gt 0 -a "$1" = "-w" ] ; then
|
||||
perlarg="-w"
|
||||
|
@ -386,10 +386,10 @@ for (;;)
|
|||
}
|
||||
}
|
||||
|
||||
# By closing OUTFILE explicitly, we avoid a Perl warning in -w mode
|
||||
# By closing OUTFILE explicitly, we avoid a Perl warning in -w mode
|
||||
# "main::OUTFILE" used only once".
|
||||
|
||||
close(OUTFILE) if $outfile eq "OUTFILE";
|
||||
close(OUTFILE) if $outfile eq "OUTFILE";
|
||||
|
||||
PERLEND
|
||||
) | $perl $perlarg - $@
|
||||
|
|
|
@ -44,7 +44,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
#define PCRE2_MAJOR 10
|
||||
#define PCRE2_MINOR 33
|
||||
#define PCRE2_PRERELEASE -RC1
|
||||
#define PCRE2_DATE 2018-09-14
|
||||
#define PCRE2_DATE 2019-03-03
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE2, the appropriate
|
||||
|
@ -150,6 +150,7 @@ D is inspected during pcre2_dfa_match() execution
|
|||
#define PCRE2_EXTRA_MATCH_WORD 0x00000004u /* C */
|
||||
#define PCRE2_EXTRA_MATCH_LINE 0x00000008u /* C */
|
||||
#define PCRE2_EXTRA_ESCAPED_CR_IS_LF 0x00000010u /* C */
|
||||
#define PCRE2_EXTRA_ALT_BSUX 0x00000020u /* C */
|
||||
|
||||
/* These are for pcre2_jit_compile(). */
|
||||
|
||||
|
|
|
@ -604,15 +604,15 @@ for(;;)
|
|||
case OP_SCBRAPOS:
|
||||
if (cb->had_recurse) return FALSE;
|
||||
break;
|
||||
|
||||
|
||||
/* A script run might have to backtrack if the iterated item can match
|
||||
characters from more than one script. So give up unless repeating an
|
||||
characters from more than one script. So give up unless repeating an
|
||||
explicit character. */
|
||||
|
||||
|
||||
case OP_SCRIPT_RUN:
|
||||
if (base_list[0] != OP_CHAR && base_list[0] != OP_CHARI)
|
||||
return FALSE;
|
||||
break;
|
||||
return FALSE;
|
||||
break;
|
||||
|
||||
/* Atomic sub-patterns and assertions can always auto-possessify their
|
||||
last iterator. However, if the group was entered as a result of checking
|
||||
|
|
|
@ -407,7 +407,7 @@ return 0;
|
|||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_substitute_callout(pcre2_match_context *mcontext,
|
||||
int (*substitute_callout)(pcre2_substitute_callout_block *, void *),
|
||||
int (*substitute_callout)(pcre2_substitute_callout_block *, void *),
|
||||
void *substitute_callout_data)
|
||||
{
|
||||
mcontext->substitute_callout = substitute_callout;
|
||||
|
|
|
@ -182,8 +182,8 @@ static const unsigned char compile_error_texts[] =
|
|||
"\\N{U+dddd} is supported only in Unicode (UTF) mode\0"
|
||||
"invalid hyphen in option setting\0"
|
||||
/* 95 */
|
||||
"(*alpha_assertion) not recognized\0"
|
||||
"script runs require Unicode support, which this version of PCRE2 does not have\0"
|
||||
"(*alpha_assertion) not recognized\0"
|
||||
"script runs require Unicode support, which this version of PCRE2 does not have\0"
|
||||
;
|
||||
|
||||
/* Match-time and UTF error texts are in the same format. */
|
||||
|
|
|
@ -525,10 +525,10 @@ bytes in a code unit in that mode. */
|
|||
enum { PCRE2_MATCHEDBY_INTERPRETER, /* pcre2_match() */
|
||||
PCRE2_MATCHEDBY_DFA_INTERPRETER, /* pcre2_dfa_match() */
|
||||
PCRE2_MATCHEDBY_JIT }; /* pcre2_jit_match() */
|
||||
|
||||
|
||||
/* Values for the flags field in a match data block. */
|
||||
|
||||
#define PCRE2_MD_COPIED_SUBJECT 0x01u
|
||||
#define PCRE2_MD_COPIED_SUBJECT 0x01u
|
||||
|
||||
/* Magic number to provide a small check against being handed junk. */
|
||||
|
||||
|
@ -1774,7 +1774,7 @@ typedef struct {
|
|||
uint8_t caseset; /* offset to multichar other cases or zero */
|
||||
int32_t other_case; /* offset to other case, or zero if none */
|
||||
int16_t scriptx; /* script extension value */
|
||||
int16_t dummy; /* spare - to round to multiple of 4 bytes */
|
||||
int16_t dummy; /* spare - to round to multiple of 4 bytes */
|
||||
} ucd_record;
|
||||
|
||||
/* UCD access macros */
|
||||
|
|
|
@ -7794,12 +7794,12 @@ if (needstype || needsscript)
|
|||
|
||||
OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
|
||||
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
|
||||
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
|
||||
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
|
||||
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
|
||||
|
||||
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 0);
|
||||
|
||||
|
||||
// OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
|
||||
|
||||
ccbegin = cc;
|
||||
|
@ -7848,7 +7848,7 @@ if (needstype || needsscript)
|
|||
//fprintf(stderr, "~~C\n");
|
||||
OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
|
||||
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
|
||||
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
|
||||
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
|
||||
OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP1, 0);
|
||||
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
|
||||
|
@ -7862,12 +7862,12 @@ if (needstype || needsscript)
|
|||
// PH hacking
|
||||
//fprintf(stderr, "~~D\n");
|
||||
OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
|
||||
|
||||
|
||||
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
|
||||
|
||||
|
||||
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
|
||||
OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
|
||||
|
||||
|
||||
OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
|
||||
typereg = RETURN_ADDR;
|
||||
}
|
||||
|
@ -9207,9 +9207,9 @@ if (common->utf && *cc == OP_REFI)
|
|||
OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
|
||||
|
||||
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
|
||||
|
||||
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
|
||||
|
||||
|
||||
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
|
||||
|
||||
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));
|
||||
|
||||
OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case));
|
||||
|
|
|
@ -138,7 +138,7 @@ for (i = 0; i < 256; i++)
|
|||
int x = 0;
|
||||
if (isspace(i)) x += ctype_space;
|
||||
if (isalpha(i)) x += ctype_letter;
|
||||
if (islower(i)) x += ctype_lcletter;
|
||||
if (islower(i)) x += ctype_lcletter;
|
||||
if (isdigit(i)) x += ctype_digit;
|
||||
if (isalnum(i) || i == '_') x += ctype_word;
|
||||
*p++ = x;
|
||||
|
|
|
@ -96,10 +96,10 @@ pcre2_match_data_free(pcre2_match_data *match_data)
|
|||
if (match_data != NULL)
|
||||
{
|
||||
if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
|
||||
match_data->memctl.free((void *)match_data->subject,
|
||||
match_data->memctl.free((void *)match_data->subject,
|
||||
match_data->memctl.memory_data);
|
||||
match_data->memctl.free(match_data, match_data->memctl.memory_data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -393,7 +393,7 @@ for(;;)
|
|||
case OP_ASSERTBACK:
|
||||
case OP_ASSERTBACK_NOT:
|
||||
case OP_ONCE:
|
||||
case OP_SCRIPT_RUN:
|
||||
case OP_SCRIPT_RUN:
|
||||
case OP_COND:
|
||||
case OP_SCOND:
|
||||
case OP_REVERSE:
|
||||
|
|
|
@ -171,7 +171,7 @@ for (;;)
|
|||
/* Fall through */
|
||||
|
||||
case OP_ONCE:
|
||||
case OP_SCRIPT_RUN:
|
||||
case OP_SCRIPT_RUN:
|
||||
case OP_SBRA:
|
||||
case OP_BRAPOS:
|
||||
case OP_SBRAPOS:
|
||||
|
@ -1076,7 +1076,7 @@ do
|
|||
case OP_CBRAPOS:
|
||||
case OP_SCBRAPOS:
|
||||
case OP_ONCE:
|
||||
case OP_SCRIPT_RUN:
|
||||
case OP_SCRIPT_RUN:
|
||||
case OP_ASSERT:
|
||||
rc = set_start_bits(re, tcode, utf);
|
||||
if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
|
||||
|
|
|
@ -3,8 +3,8 @@
|
|||
*************************************************/
|
||||
|
||||
/* PCRE2 is a library of functions to support regular expressions whose syntax
|
||||
and semantics are as close as possible to those of the Perl 5 language. This is
|
||||
the public header file to be #included by applications that call PCRE2 via the
|
||||
and semantics are as close as possible to those of the Perl 5 language. This is
|
||||
the public header file to be #included by applications that call PCRE2 via the
|
||||
POSIX wrapper interface.
|
||||
|
||||
Written by Philip Hazel
|
||||
|
@ -138,7 +138,7 @@ file. */
|
|||
# endif
|
||||
#endif
|
||||
|
||||
/* The functions. The actual code is in functions with pcre2_xxx names for
|
||||
/* The functions. The actual code is in functions with pcre2_xxx names for
|
||||
uniqueness. POSIX names are provided as macros for API compatibility with POSIX
|
||||
regex functions. It's done this way to ensure to they are always linked from
|
||||
the PCRE2 library and not by accident from elsewhere (regex_t differs in size
|
||||
|
@ -155,7 +155,7 @@ PCRE2POSIX_EXP_DECL void pcre2_regfree(regex_t *);
|
|||
#define regerror pcre2_regerror
|
||||
#define regfree pcre2_regfree
|
||||
|
||||
/* Debian had a patch that used different names. These are now here to save
|
||||
/* Debian had a patch that used different names. These are now here to save
|
||||
them having to maintain their own patch, but are not documented by PCRE2. */
|
||||
|
||||
#define PCRE2regcomp pcre2_regcomp
|
||||
|
|
Loading…
Reference in New Issue