More file tidies for 10.33-RC1
This commit is contained in:
parent
02ff543f9c
commit
7375089fa5
|
@ -88,7 +88,7 @@
|
||||||
|
|
||||||
PROJECT(PCRE2 C)
|
PROJECT(PCRE2 C)
|
||||||
|
|
||||||
# Increased minimum to 2.8.0 to support newer add_test features.
|
# Increased minimum to 2.8.0 to support newer add_test features.
|
||||||
CMAKE_MINIMUM_REQUIRED(VERSION 2.8.0)
|
CMAKE_MINIMUM_REQUIRED(VERSION 2.8.0)
|
||||||
|
|
||||||
# Set policy CMP0026 to avoid warnings for the use of LOCATION in
|
# Set policy CMP0026 to avoid warnings for the use of LOCATION in
|
||||||
|
@ -324,7 +324,7 @@ ENDIF(PCRE2_SUPPORT_VALGRIND)
|
||||||
|
|
||||||
IF(PCRE2_DISABLE_PERCENT_ZT)
|
IF(PCRE2_DISABLE_PERCENT_ZT)
|
||||||
SET(DISABLE_PERCENT_ZT 1)
|
SET(DISABLE_PERCENT_ZT 1)
|
||||||
ENDIF(PCRE2_DISABLE_PERCENT_ZT)
|
ENDIF(PCRE2_DISABLE_PERCENT_ZT)
|
||||||
|
|
||||||
# This next one used to reference ${READLINE_LIBRARY})
|
# This next one used to reference ${READLINE_LIBRARY})
|
||||||
# but I was advised to add the NCURSES test as well, along with
|
# but I was advised to add the NCURSES test as well, along with
|
||||||
|
@ -459,7 +459,7 @@ SET(PCRE2_SOURCES
|
||||||
src/pcre2_newline.c
|
src/pcre2_newline.c
|
||||||
src/pcre2_ord2utf.c
|
src/pcre2_ord2utf.c
|
||||||
src/pcre2_pattern_info.c
|
src/pcre2_pattern_info.c
|
||||||
src/pcre2_script_run.c
|
src/pcre2_script_run.c
|
||||||
src/pcre2_serialize.c
|
src/pcre2_serialize.c
|
||||||
src/pcre2_string_utils.c
|
src/pcre2_string_utils.c
|
||||||
src/pcre2_study.c
|
src/pcre2_study.c
|
||||||
|
@ -651,10 +651,10 @@ IF(PCRE2_BUILD_TESTS)
|
||||||
|
|
||||||
# exes in Debug location tested by the RunTest and RunGrepTest shell scripts
|
# exes in Debug location tested by the RunTest and RunGrepTest shell scripts
|
||||||
# via "make test"
|
# via "make test"
|
||||||
|
|
||||||
# The commented out code below provokes a warning about future removal
|
# The commented out code below provokes a warning about future removal
|
||||||
# of the facility, and requires policy CMP0026 to be set to "OLD". I have
|
# of the facility, and requires policy CMP0026 to be set to "OLD". I have
|
||||||
# got fed-up with the warnings, but my plea for help on the mailing list
|
# got fed-up with the warnings, but my plea for help on the mailing list
|
||||||
# produced no response. So, I've hacked. The new code below seems to work on
|
# produced no response. So, I've hacked. The new code below seems to work on
|
||||||
# Linux.
|
# Linux.
|
||||||
|
|
||||||
|
@ -857,9 +857,9 @@ IF(PCRE2_SHOW_REPORT)
|
||||||
MESSAGE(STATUS " Support Valgrind .................: ${PCRE2_SUPPORT_VALGRIND}")
|
MESSAGE(STATUS " Support Valgrind .................: ${PCRE2_SUPPORT_VALGRIND}")
|
||||||
IF(PCRE2_DISABLE_PERCENT_ZT)
|
IF(PCRE2_DISABLE_PERCENT_ZT)
|
||||||
MESSAGE(STATUS " Use %zu and %td ..................: OFF" )
|
MESSAGE(STATUS " Use %zu and %td ..................: OFF" )
|
||||||
ELSE(PCRE2_DISABLE_PERCENT_ZT)
|
ELSE(PCRE2_DISABLE_PERCENT_ZT)
|
||||||
MESSAGE(STATUS " Use %zu and %td ..................: AUTO" )
|
MESSAGE(STATUS " Use %zu and %td ..................: AUTO" )
|
||||||
ENDIF(PCRE2_DISABLE_PERCENT_ZT)
|
ENDIF(PCRE2_DISABLE_PERCENT_ZT)
|
||||||
|
|
||||||
IF(MINGW AND NOT PCRE2_STATIC)
|
IF(MINGW AND NOT PCRE2_STATIC)
|
||||||
MESSAGE(STATUS " Non-standard dll names (prefix) . : ${NON_STANDARD_LIB_PREFIX}")
|
MESSAGE(STATUS " Non-standard dll names (prefix) . : ${NON_STANDARD_LIB_PREFIX}")
|
||||||
|
|
86
ChangeLog
86
ChangeLog
|
@ -14,11 +14,11 @@ a greater than 1 fixed quantifier. This issue was found by Yunho Kim.
|
||||||
|
|
||||||
3. Added support for callouts from pcre2_substitute().
|
3. Added support for callouts from pcre2_substitute().
|
||||||
|
|
||||||
4. The POSIX functions are now all called pcre2_regcomp() etc., with wrapper
|
4. The POSIX functions are now all called pcre2_regcomp() etc., with wrapper
|
||||||
functions that use the standard POSIX names. However, in pcre2posix.h the POSIX
|
functions that use the standard POSIX names. However, in pcre2posix.h the POSIX
|
||||||
names are defined as macros. This should help avoid linking with the wrong
|
names are defined as macros. This should help avoid linking with the wrong
|
||||||
library in some environments while still exporting the POSIX names for
|
library in some environments while still exporting the POSIX names for
|
||||||
pre-existing programs that use them. (The Debian alternative names are also
|
pre-existing programs that use them. (The Debian alternative names are also
|
||||||
defined as macros, but not documented.)
|
defined as macros, but not documented.)
|
||||||
|
|
||||||
5. Fix an xclass matching issue in JIT.
|
5. Fix an xclass matching issue in JIT.
|
||||||
|
@ -33,29 +33,29 @@ new "is lower case letter" bit. At the same time, the now unused "is
|
||||||
hexadecimal digit" bit was removed. The default tables in
|
hexadecimal digit" bit was removed. The default tables in
|
||||||
src/pcre2_chartables.c.dist are updated.
|
src/pcre2_chartables.c.dist are updated.
|
||||||
|
|
||||||
8. Implement the new Perl "script run" features (*script_run:...) and
|
8. Implement the new Perl "script run" features (*script_run:...) and
|
||||||
(*atomic_script_run:...) aka (*sr:...) and (*asr:...).
|
(*atomic_script_run:...) aka (*sr:...) and (*asr:...).
|
||||||
|
|
||||||
9. Fixed two typos in change 22 for 10.21, which added special handling for
|
9. Fixed two typos in change 22 for 10.21, which added special handling for
|
||||||
ranges such as a-z in EBCDIC environments. The original code probably never
|
ranges such as a-z in EBCDIC environments. The original code probably never
|
||||||
worked, though there were no bug reports.
|
worked, though there were no bug reports.
|
||||||
|
|
||||||
10. Implement PCRE2_COPY_MATCHED_SUBJECT for pcre2_match() (including JIT via
|
10. Implement PCRE2_COPY_MATCHED_SUBJECT for pcre2_match() (including JIT via
|
||||||
pcre2_match()) and pcre2_dfa_match(), but *not* the pcre2_jit_match() fast
|
pcre2_match()) and pcre2_dfa_match(), but *not* the pcre2_jit_match() fast
|
||||||
path. Also, when a match fails, set the subject field in the match data to NULL
|
path. Also, when a match fails, set the subject field in the match data to NULL
|
||||||
for tidiness - none of the substring extractors should reference this after
|
for tidiness - none of the substring extractors should reference this after
|
||||||
match failure.
|
match failure.
|
||||||
|
|
||||||
11. If a pattern started with a subroutine call that had a quantifier with a
|
11. If a pattern started with a subroutine call that had a quantifier with a
|
||||||
minimum of zero, an incorrect "match must start with this character" could be
|
minimum of zero, an incorrect "match must start with this character" could be
|
||||||
recorded. Example: /(?&xxx)*ABC(?<xxx>XYZ)/ would (incorrectly) expect 'A' to
|
recorded. Example: /(?&xxx)*ABC(?<xxx>XYZ)/ would (incorrectly) expect 'A' to
|
||||||
be the first character of a match.
|
be the first character of a match.
|
||||||
|
|
||||||
12. The heap limit checking code in pcre2_dfa_match() could suffer from
|
12. The heap limit checking code in pcre2_dfa_match() could suffer from
|
||||||
overflow if the heap limit was set very large. This could cause incorrect "heap
|
overflow if the heap limit was set very large. This could cause incorrect "heap
|
||||||
limit exceeded" errors.
|
limit exceeded" errors.
|
||||||
|
|
||||||
13. Add "kibibytes" to the heap limit output from pcre2test -C to make the
|
13. Add "kibibytes" to the heap limit output from pcre2test -C to make the
|
||||||
units clear.
|
units clear.
|
||||||
|
|
||||||
14. Add a call to pcre2_jit_free_unused_memory() in pcre2grep, for tidiness.
|
14. Add a call to pcre2_jit_free_unused_memory() in pcre2grep, for tidiness.
|
||||||
|
@ -71,33 +71,33 @@ inttypes.h. This supports environments that do not have stdint.h but do have
|
||||||
inttypes.h, which are known to exist. A note in the autotools documentation
|
inttypes.h, which are known to exist. A note in the autotools documentation
|
||||||
says (November 2018) that there are none known that are the other way round.
|
says (November 2018) that there are none known that are the other way round.
|
||||||
|
|
||||||
17. Added --disable-percent-zt to "configure" (and equivalent to CMake) to
|
17. Added --disable-percent-zt to "configure" (and equivalent to CMake) to
|
||||||
forcibly disable the use of %zu and %td in formatting strings because there is
|
forcibly disable the use of %zu and %td in formatting strings because there is
|
||||||
at least one version of VMS that claims to be C99 but does not support these
|
at least one version of VMS that claims to be C99 but does not support these
|
||||||
modifiers.
|
modifiers.
|
||||||
|
|
||||||
18. Added --disable-pcre2grep-callout-fork, which restricts the callout support
|
18. Added --disable-pcre2grep-callout-fork, which restricts the callout support
|
||||||
in pcre2grep to the inbuilt echo facility. This may be useful in environments
|
in pcre2grep to the inbuilt echo facility. This may be useful in environments
|
||||||
that do not support fork().
|
that do not support fork().
|
||||||
|
|
||||||
19. Fix two instances of <= 0 being applied to unsigned integers (the VMS
|
19. Fix two instances of <= 0 being applied to unsigned integers (the VMS
|
||||||
compiler complains).
|
compiler complains).
|
||||||
|
|
||||||
20. Added "fork" support for VMS to pcre2grep, for running an external program
|
20. Added "fork" support for VMS to pcre2grep, for running an external program
|
||||||
via a string callout.
|
via a string callout.
|
||||||
|
|
||||||
21. Improve MAP_JIT flag usage on MacOS. Patch by Rich Siegel.
|
21. Improve MAP_JIT flag usage on MacOS. Patch by Rich Siegel.
|
||||||
|
|
||||||
22. If a pattern started with (*MARK), (*COMMIT), (*PRUNE), (*SKIP), or (*THEN)
|
22. If a pattern started with (*MARK), (*COMMIT), (*PRUNE), (*SKIP), or (*THEN)
|
||||||
followed by ^ it was not recognized as anchored.
|
followed by ^ it was not recognized as anchored.
|
||||||
|
|
||||||
23. The RunGrepTest script used to cut out the test of NUL characters for
|
23. The RunGrepTest script used to cut out the test of NUL characters for
|
||||||
Solaris and MacOS as printf and sed can't handle them. It seems that the *BSD
|
Solaris and MacOS as printf and sed can't handle them. It seems that the *BSD
|
||||||
systems can't either. I've inverted the test so that only those OS that are
|
systems can't either. I've inverted the test so that only those OS that are
|
||||||
known to work (currently only Linux) try to run this test.
|
known to work (currently only Linux) try to run this test.
|
||||||
|
|
||||||
24. Some tests in RunGrepTest appended to testtrygrep from two different file
|
24. Some tests in RunGrepTest appended to testtrygrep from two different file
|
||||||
descriptors instead of redirecting stderr to stdout. This worked on Linux, but
|
descriptors instead of redirecting stderr to stdout. This worked on Linux, but
|
||||||
it was reported not to on other systems, causing the tests to fail.
|
it was reported not to on other systems, causing the tests to fail.
|
||||||
|
|
||||||
25. In the RunTest script, make the test for stack setting use the same value
|
25. In the RunTest script, make the test for stack setting use the same value
|
||||||
|
@ -105,27 +105,27 @@ for the stack as it needs for -bigstack.
|
||||||
|
|
||||||
26. Insert a cast in pcre2_dfa_match.c to suppress a compiler warning.
|
26. Insert a cast in pcre2_dfa_match.c to suppress a compiler warning.
|
||||||
|
|
||||||
26. With PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL set, escape sequences such as \s
|
26. With PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL set, escape sequences such as \s
|
||||||
which are valid in character classes, but not as the end of ranges, were being
|
which are valid in character classes, but not as the end of ranges, were being
|
||||||
treated as literals. An example is [_-\s] (but not [\s-_] because that gave an
|
treated as literals. An example is [_-\s] (but not [\s-_] because that gave an
|
||||||
error at the *start* of a range). Now an "invalid range" error is given
|
error at the *start* of a range). Now an "invalid range" error is given
|
||||||
independently of PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL.
|
independently of PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL.
|
||||||
|
|
||||||
27. Related to 26 above, PCRE2_BAD_ESCAPE_IS_LITERAL was affecting known escape
|
27. Related to 26 above, PCRE2_BAD_ESCAPE_IS_LITERAL was affecting known escape
|
||||||
sequences such as \eX when they appeared invalidly in a character class. Now
|
sequences such as \eX when they appeared invalidly in a character class. Now
|
||||||
the option applies only to unrecognized or malformed escape sequences.
|
the option applies only to unrecognized or malformed escape sequences.
|
||||||
|
|
||||||
28. Fix word boundary in JIT compiler. Patch by Mike Munday.
|
28. Fix word boundary in JIT compiler. Patch by Mike Munday.
|
||||||
|
|
||||||
29. The pcre2_dfa_match() function was incorrectly handling conditional version
|
29. The pcre2_dfa_match() function was incorrectly handling conditional version
|
||||||
tests such as (?(VERSION>=0)...) when the version test was true. Incorrect
|
tests such as (?(VERSION>=0)...) when the version test was true. Incorrect
|
||||||
processing or a crash could result.
|
processing or a crash could result.
|
||||||
|
|
||||||
30. When PCRE2_UTF is set, allow non-ASCII letters and decimal digits in group
|
30. When PCRE2_UTF is set, allow non-ASCII letters and decimal digits in group
|
||||||
names, as Perl does. There was a small bug in this new code, found by
|
names, as Perl does. There was a small bug in this new code, found by
|
||||||
ClusterFuzz 12950, fixed before release.
|
ClusterFuzz 12950, fixed before release.
|
||||||
|
|
||||||
31. Implemented PCRE2_EXTRA_ALT_BSUX to support ECMAScript 6's \u{hhh}
|
31. Implemented PCRE2_EXTRA_ALT_BSUX to support ECMAScript 6's \u{hhh}
|
||||||
construct.
|
construct.
|
||||||
|
|
||||||
32. Compile \p{Any} to be the same as . in DOTALL mode, so that it benefits
|
32. Compile \p{Any} to be the same as . in DOTALL mode, so that it benefits
|
||||||
|
@ -133,15 +133,15 @@ from auto-anchoring if \p{Any}* starts a pattern.
|
||||||
|
|
||||||
33. Compile invalid UTF check in JIT test when only pcre32 is enabled.
|
33. Compile invalid UTF check in JIT test when only pcre32 is enabled.
|
||||||
|
|
||||||
34. For some time now, CMake has been warning about the setting of policy
|
34. For some time now, CMake has been warning about the setting of policy
|
||||||
CMP0026 to "OLD" in CmakeLists.txt, and hinting that the feature might be
|
CMP0026 to "OLD" in CmakeLists.txt, and hinting that the feature might be
|
||||||
removed in a future version. A request for CMake expertise on the list produced
|
removed in a future version. A request for CMake expertise on the list produced
|
||||||
no result, so I have now hacked CMakeLists.txt along the lines of some changes
|
no result, so I have now hacked CMakeLists.txt along the lines of some changes
|
||||||
I found on the Internet. The new code no longer needs the policy setting, and
|
I found on the Internet. The new code no longer needs the policy setting, and
|
||||||
it appears to work fine on Linux.
|
it appears to work fine on Linux.
|
||||||
|
|
||||||
35. Setting --enable-jit=auto for an out-of-tree build failed because the
|
35. Setting --enable-jit=auto for an out-of-tree build failed because the
|
||||||
source directory wasn't in the search path for AC_TRY_COMPILE always. Patch
|
source directory wasn't in the search path for AC_TRY_COMPILE always. Patch
|
||||||
from Ross Burton.
|
from Ross Burton.
|
||||||
|
|
||||||
|
|
||||||
|
|
2
NEWS
2
NEWS
|
@ -5,7 +5,7 @@ News about PCRE2 releases
|
||||||
Version 10.33-RC1 03-March-2019
|
Version 10.33-RC1 03-March-2019
|
||||||
-------------------------------
|
-------------------------------
|
||||||
|
|
||||||
Yet more bugfixes, tidies, and a few enhancements, summarized here (see
|
Yet more bugfixes, tidies, and a few enhancements, summarized here (see
|
||||||
ChangeLog for the full list):
|
ChangeLog for the full list):
|
||||||
|
|
||||||
1. Callouts from pcre2_substitute() are now available.
|
1. Callouts from pcre2_substitute() are now available.
|
||||||
|
|
|
@ -47,7 +47,7 @@ can skip ahead to the CMake section.
|
||||||
environment. In particular, you can alter the definition of the NEWLINE
|
environment. In particular, you can alter the definition of the NEWLINE
|
||||||
macro to specify what character(s) you want to be interpreted as line
|
macro to specify what character(s) you want to be interpreted as line
|
||||||
terminators by default.
|
terminators by default.
|
||||||
|
|
||||||
When you subsequently compile any of the PCRE2 modules, you must specify
|
When you subsequently compile any of the PCRE2 modules, you must specify
|
||||||
-DHAVE_CONFIG_H to your compiler so that src/config.h is included in the
|
-DHAVE_CONFIG_H to your compiler so that src/config.h is included in the
|
||||||
sources.
|
sources.
|
||||||
|
@ -61,7 +61,7 @@ can skip ahead to the CMake section.
|
||||||
configure/make world, this is handled automatically.) When upgrading to a
|
configure/make world, this is handled automatically.) When upgrading to a
|
||||||
new release, you are strongly advised to review src/config.h.generic
|
new release, you are strongly advised to review src/config.h.generic
|
||||||
before re-using what you had previously.
|
before re-using what you had previously.
|
||||||
|
|
||||||
Note also that the src/config.h.generic file is created from a config.h
|
Note also that the src/config.h.generic file is created from a config.h
|
||||||
that was generated by Autotools, which automatically includes settings of
|
that was generated by Autotools, which automatically includes settings of
|
||||||
a number of macros that are not actually used by PCRE2 (for example,
|
a number of macros that are not actually used by PCRE2 (for example,
|
||||||
|
@ -109,7 +109,7 @@ can skip ahead to the CMake section.
|
||||||
pcre2_newline.c
|
pcre2_newline.c
|
||||||
pcre2_ord2utf.c
|
pcre2_ord2utf.c
|
||||||
pcre2_pattern_info.c
|
pcre2_pattern_info.c
|
||||||
pcre2_script_run.c
|
pcre2_script_run.c
|
||||||
pcre2_serialize.c
|
pcre2_serialize.c
|
||||||
pcre2_string_utils.c
|
pcre2_string_utils.c
|
||||||
pcre2_study.c
|
pcre2_study.c
|
||||||
|
|
24
README
24
README
|
@ -53,7 +53,7 @@ The header file for the POSIX-style functions is called pcre2posix.h. The
|
||||||
official POSIX name is regex.h, but I did not want to risk possible problems
|
official POSIX name is regex.h, but I did not want to risk possible problems
|
||||||
with existing files of that name by distributing it that way. To use PCRE2 with
|
with existing files of that name by distributing it that way. To use PCRE2 with
|
||||||
an existing program that uses the POSIX API, pcre2posix.h will have to be
|
an existing program that uses the POSIX API, pcre2posix.h will have to be
|
||||||
renamed or pointed at by a link (or the program modified, of course). See the
|
renamed or pointed at by a link (or the program modified, of course). See the
|
||||||
pcre2posix documentation for more details.
|
pcre2posix documentation for more details.
|
||||||
|
|
||||||
|
|
||||||
|
@ -311,10 +311,10 @@ library. They are also documented in the pcre2build man page.
|
||||||
. There is support for calling external programs during matching in the
|
. There is support for calling external programs during matching in the
|
||||||
pcre2grep command, using PCRE2's callout facility with string arguments. This
|
pcre2grep command, using PCRE2's callout facility with string arguments. This
|
||||||
support can be disabled by adding --disable-pcre2grep-callout to the
|
support can be disabled by adding --disable-pcre2grep-callout to the
|
||||||
"configure" command. There are two kinds of callout: one that generates
|
"configure" command. There are two kinds of callout: one that generates
|
||||||
output from inbuilt code, and another that calls an external program. The
|
output from inbuilt code, and another that calls an external program. The
|
||||||
latter has special support for Windows and VMS; otherwise it assumes the
|
latter has special support for Windows and VMS; otherwise it assumes the
|
||||||
existence of the fork() function. This facility can be disabled by adding
|
existence of the fork() function. This facility can be disabled by adding
|
||||||
--disable-pcre2grep-callout-fork to the "configure" command.
|
--disable-pcre2grep-callout-fork to the "configure" command.
|
||||||
|
|
||||||
. The pcre2grep program currently supports only 8-bit data files, and so
|
. The pcre2grep program currently supports only 8-bit data files, and so
|
||||||
|
@ -344,7 +344,7 @@ library. They are also documented in the pcre2build man page.
|
||||||
|
|
||||||
The default is either 1048576 or the value of --with-pcre2grep-bufsize,
|
The default is either 1048576 or the value of --with-pcre2grep-bufsize,
|
||||||
whichever is the larger.
|
whichever is the larger.
|
||||||
|
|
||||||
. It is possible to compile pcre2test so that it links with the libreadline
|
. It is possible to compile pcre2test so that it links with the libreadline
|
||||||
or libedit libraries, by specifying, respectively,
|
or libedit libraries, by specifying, respectively,
|
||||||
|
|
||||||
|
@ -367,14 +367,14 @@ library. They are also documented in the pcre2build man page.
|
||||||
If you get error messages about missing functions tgetstr, tgetent, tputs,
|
If you get error messages about missing functions tgetstr, tgetent, tputs,
|
||||||
tgetflag, or tgoto, this is the problem, and linking with the ncurses library
|
tgetflag, or tgoto, this is the problem, and linking with the ncurses library
|
||||||
should fix it.
|
should fix it.
|
||||||
|
|
||||||
. The C99 standard defines formatting modifiers z and t for size_t and
|
. The C99 standard defines formatting modifiers z and t for size_t and
|
||||||
ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in
|
ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in
|
||||||
environments other than Microsoft Visual Studio when __STDC_VERSION__ is
|
environments other than Microsoft Visual Studio when __STDC_VERSION__ is
|
||||||
defined and has a value greater than or equal to 199901L (indicating C99).
|
defined and has a value greater than or equal to 199901L (indicating C99).
|
||||||
However, there is at least one environment that claims to be C99 but does not
|
However, there is at least one environment that claims to be C99 but does not
|
||||||
support these modifiers. If --disable-percent-zt is specified, no use is made
|
support these modifiers. If --disable-percent-zt is specified, no use is made
|
||||||
of the z or t modifiers. Instead or %td or %zu, %lu is used, with a cast for
|
of the z or t modifiers. Instead or %td or %zu, %lu is used, with a cast for
|
||||||
size_t values.
|
size_t values.
|
||||||
|
|
||||||
. There is a special option called --enable-fuzz-support for use by people who
|
. There is a special option called --enable-fuzz-support for use by people who
|
||||||
|
@ -790,7 +790,7 @@ The distribution should contain the files listed below.
|
||||||
src/pcre2_newline.c )
|
src/pcre2_newline.c )
|
||||||
src/pcre2_ord2utf.c )
|
src/pcre2_ord2utf.c )
|
||||||
src/pcre2_pattern_info.c )
|
src/pcre2_pattern_info.c )
|
||||||
src/pcre2_script_run.c )
|
src/pcre2_script_run.c )
|
||||||
src/pcre2_serialize.c )
|
src/pcre2_serialize.c )
|
||||||
src/pcre2_string_utils.c )
|
src/pcre2_string_utils.c )
|
||||||
src/pcre2_study.c )
|
src/pcre2_study.c )
|
||||||
|
|
|
@ -753,7 +753,7 @@ if $valgrind $vjs $pcre2grep --help | $valgrind $vjs $pcre2grep -q 'callout scri
|
||||||
else
|
else
|
||||||
$cf $srcdir/testdata/grepoutputC testtrygrep
|
$cf $srcdir/testdata/grepoutputC testtrygrep
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ $? != 0 ] ; then exit 1; fi
|
if [ $? != 0 ] ; then exit 1; fi
|
||||||
else
|
else
|
||||||
echo "Script callouts are not supported"
|
echo "Script callouts are not supported"
|
||||||
|
|
14
configure.ac
14
configure.ac
|
@ -147,14 +147,14 @@ AC_ARG_ENABLE(jit,
|
||||||
if test "$enable_jit" = "auto"; then
|
if test "$enable_jit" = "auto"; then
|
||||||
AC_LANG(C)
|
AC_LANG(C)
|
||||||
SAVE_CPPFLAGS=$CPPFLAGS
|
SAVE_CPPFLAGS=$CPPFLAGS
|
||||||
CPPFLAGS=-I$srcdir
|
CPPFLAGS=-I$srcdir
|
||||||
AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
|
AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
|
||||||
#define SLJIT_CONFIG_AUTO 1
|
#define SLJIT_CONFIG_AUTO 1
|
||||||
#include "src/sljit/sljitConfigInternal.h"
|
#include "src/sljit/sljitConfigInternal.h"
|
||||||
#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
|
#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
|
||||||
#error unsupported
|
#error unsupported
|
||||||
#endif]])], enable_jit=yes, enable_jit=no)
|
#endif]])], enable_jit=yes, enable_jit=no)
|
||||||
CPPFLAGS=$SAVE_CPPFLAGS
|
CPPFLAGS=$SAVE_CPPFLAGS
|
||||||
echo checking for JIT support on this hardware... $enable_jit
|
echo checking for JIT support on this hardware... $enable_jit
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
@ -607,7 +607,7 @@ if test "$enable_percent_zt" = "no"; then
|
||||||
Define to any value to disable the use of the z and t modifiers in
|
Define to any value to disable the use of the z and t modifiers in
|
||||||
formatting settings such as %zu or %td (this is rarely needed).])
|
formatting settings such as %zu or %td (this is rarely needed).])
|
||||||
else
|
else
|
||||||
enable_percent_zt=auto
|
enable_percent_zt=auto
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Unless running under Windows, JIT support requires pthreads.
|
# Unless running under Windows, JIT support requires pthreads.
|
||||||
|
@ -647,13 +647,13 @@ if test "$enable_pcre2grep_callout" = "yes"; then
|
||||||
fi
|
fi
|
||||||
AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT_FORK], [], [
|
AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT_FORK], [], [
|
||||||
Define to any value to enable fork support in pcre2grep callout scripts.
|
Define to any value to enable fork support in pcre2grep callout scripts.
|
||||||
This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also
|
This will have no effect unless SUPPORT_PCRE2GREP_CALLOUT is also
|
||||||
defined.])
|
defined.])
|
||||||
fi
|
fi
|
||||||
AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT], [], [
|
AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT], [], [
|
||||||
Define to any value to enable callout script support in pcre2grep.])
|
Define to any value to enable callout script support in pcre2grep.])
|
||||||
else
|
else
|
||||||
enable_pcre2grep_callout_fork="no"
|
enable_pcre2grep_callout_fork="no"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if test "$enable_unicode" = "yes"; then
|
if test "$enable_unicode" = "yes"; then
|
||||||
|
@ -1055,7 +1055,7 @@ $PACKAGE-$VERSION configuration summary:
|
||||||
Build static libs .................. : ${enable_static}
|
Build static libs .................. : ${enable_static}
|
||||||
Use JIT in pcre2grep ............... : ${enable_pcre2grep_jit}
|
Use JIT in pcre2grep ............... : ${enable_pcre2grep_jit}
|
||||||
Enable callouts in pcre2grep ....... : ${enable_pcre2grep_callout}
|
Enable callouts in pcre2grep ....... : ${enable_pcre2grep_callout}
|
||||||
Enable fork in pcre2grep callouts .. : ${enable_pcre2grep_callout_fork}
|
Enable fork in pcre2grep callouts .. : ${enable_pcre2grep_callout_fork}
|
||||||
Initial buffer size for pcre2grep .. : ${with_pcre2grep_bufsize}
|
Initial buffer size for pcre2grep .. : ${with_pcre2grep_bufsize}
|
||||||
Maximum buffer size for pcre2grep .. : ${with_pcre2grep_max_bufsize}
|
Maximum buffer size for pcre2grep .. : ${with_pcre2grep_max_bufsize}
|
||||||
Link pcre2grep with libz ........... : ${enable_pcre2grep_libz}
|
Link pcre2grep with libz ........... : ${enable_pcre2grep_libz}
|
||||||
|
|
|
@ -47,7 +47,7 @@ can skip ahead to the CMake section.
|
||||||
environment. In particular, you can alter the definition of the NEWLINE
|
environment. In particular, you can alter the definition of the NEWLINE
|
||||||
macro to specify what character(s) you want to be interpreted as line
|
macro to specify what character(s) you want to be interpreted as line
|
||||||
terminators by default.
|
terminators by default.
|
||||||
|
|
||||||
When you subsequently compile any of the PCRE2 modules, you must specify
|
When you subsequently compile any of the PCRE2 modules, you must specify
|
||||||
-DHAVE_CONFIG_H to your compiler so that src/config.h is included in the
|
-DHAVE_CONFIG_H to your compiler so that src/config.h is included in the
|
||||||
sources.
|
sources.
|
||||||
|
@ -61,7 +61,7 @@ can skip ahead to the CMake section.
|
||||||
configure/make world, this is handled automatically.) When upgrading to a
|
configure/make world, this is handled automatically.) When upgrading to a
|
||||||
new release, you are strongly advised to review src/config.h.generic
|
new release, you are strongly advised to review src/config.h.generic
|
||||||
before re-using what you had previously.
|
before re-using what you had previously.
|
||||||
|
|
||||||
Note also that the src/config.h.generic file is created from a config.h
|
Note also that the src/config.h.generic file is created from a config.h
|
||||||
that was generated by Autotools, which automatically includes settings of
|
that was generated by Autotools, which automatically includes settings of
|
||||||
a number of macros that are not actually used by PCRE2 (for example,
|
a number of macros that are not actually used by PCRE2 (for example,
|
||||||
|
@ -109,7 +109,7 @@ can skip ahead to the CMake section.
|
||||||
pcre2_newline.c
|
pcre2_newline.c
|
||||||
pcre2_ord2utf.c
|
pcre2_ord2utf.c
|
||||||
pcre2_pattern_info.c
|
pcre2_pattern_info.c
|
||||||
pcre2_script_run.c
|
pcre2_script_run.c
|
||||||
pcre2_serialize.c
|
pcre2_serialize.c
|
||||||
pcre2_string_utils.c
|
pcre2_string_utils.c
|
||||||
pcre2_study.c
|
pcre2_study.c
|
||||||
|
|
|
@ -53,7 +53,7 @@ The header file for the POSIX-style functions is called pcre2posix.h. The
|
||||||
official POSIX name is regex.h, but I did not want to risk possible problems
|
official POSIX name is regex.h, but I did not want to risk possible problems
|
||||||
with existing files of that name by distributing it that way. To use PCRE2 with
|
with existing files of that name by distributing it that way. To use PCRE2 with
|
||||||
an existing program that uses the POSIX API, pcre2posix.h will have to be
|
an existing program that uses the POSIX API, pcre2posix.h will have to be
|
||||||
renamed or pointed at by a link (or the program modified, of course). See the
|
renamed or pointed at by a link (or the program modified, of course). See the
|
||||||
pcre2posix documentation for more details.
|
pcre2posix documentation for more details.
|
||||||
|
|
||||||
|
|
||||||
|
@ -311,7 +311,11 @@ library. They are also documented in the pcre2build man page.
|
||||||
. There is support for calling external programs during matching in the
|
. There is support for calling external programs during matching in the
|
||||||
pcre2grep command, using PCRE2's callout facility with string arguments. This
|
pcre2grep command, using PCRE2's callout facility with string arguments. This
|
||||||
support can be disabled by adding --disable-pcre2grep-callout to the
|
support can be disabled by adding --disable-pcre2grep-callout to the
|
||||||
"configure" command.
|
"configure" command. There are two kinds of callout: one that generates
|
||||||
|
output from inbuilt code, and another that calls an external program. The
|
||||||
|
latter has special support for Windows and VMS; otherwise it assumes the
|
||||||
|
existence of the fork() function. This facility can be disabled by adding
|
||||||
|
--disable-pcre2grep-callout-fork to the "configure" command.
|
||||||
|
|
||||||
. The pcre2grep program currently supports only 8-bit data files, and so
|
. The pcre2grep program currently supports only 8-bit data files, and so
|
||||||
requires the 8-bit PCRE2 library. It is possible to compile pcre2grep to use
|
requires the 8-bit PCRE2 library. It is possible to compile pcre2grep to use
|
||||||
|
@ -363,14 +367,14 @@ library. They are also documented in the pcre2build man page.
|
||||||
If you get error messages about missing functions tgetstr, tgetent, tputs,
|
If you get error messages about missing functions tgetstr, tgetent, tputs,
|
||||||
tgetflag, or tgoto, this is the problem, and linking with the ncurses library
|
tgetflag, or tgoto, this is the problem, and linking with the ncurses library
|
||||||
should fix it.
|
should fix it.
|
||||||
|
|
||||||
. The C99 standard defines formatting modifiers z and t for size_t and
|
. The C99 standard defines formatting modifiers z and t for size_t and
|
||||||
ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in
|
ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in
|
||||||
environments other than Microsoft Visual Studio when __STDC_VERSION__ is
|
environments other than Microsoft Visual Studio when __STDC_VERSION__ is
|
||||||
defined and has a value greater than or equal to 199901L (indicating C99).
|
defined and has a value greater than or equal to 199901L (indicating C99).
|
||||||
However, there is at least one environment that claims to be C99 but does not
|
However, there is at least one environment that claims to be C99 but does not
|
||||||
support these modifiers. If --disable-percent-zt is specified, no use is made
|
support these modifiers. If --disable-percent-zt is specified, no use is made
|
||||||
of the z or t modifiers. Instead or %td or %zu, %lu is used, with a cast for
|
of the z or t modifiers. Instead or %td or %zu, %lu is used, with a cast for
|
||||||
size_t values.
|
size_t values.
|
||||||
|
|
||||||
. There is a special option called --enable-fuzz-support for use by people who
|
. There is a special option called --enable-fuzz-support for use by people who
|
||||||
|
@ -786,7 +790,7 @@ The distribution should contain the files listed below.
|
||||||
src/pcre2_newline.c )
|
src/pcre2_newline.c )
|
||||||
src/pcre2_ord2utf.c )
|
src/pcre2_ord2utf.c )
|
||||||
src/pcre2_pattern_info.c )
|
src/pcre2_pattern_info.c )
|
||||||
src/pcre2_script_run.c )
|
src/pcre2_script_run.c )
|
||||||
src/pcre2_serialize.c )
|
src/pcre2_serialize.c )
|
||||||
src/pcre2_string_utils.c )
|
src/pcre2_string_utils.c )
|
||||||
src/pcre2_study.c )
|
src/pcre2_study.c )
|
||||||
|
@ -886,4 +890,4 @@ The distribution should contain the files listed below.
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
Email local part: ph10
|
Email local part: ph10
|
||||||
Email domain: cam.ac.uk
|
Email domain: cam.ac.uk
|
||||||
Last updated: 29 January 2019
|
Last updated: 03 March 2019
|
||||||
|
|
|
@ -52,7 +52,7 @@ characters. The options are:
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_ANCHORED Match only at the first position
|
PCRE2_ANCHORED Match only at the first position
|
||||||
PCRE2_COPY_MATCHED_SUBJECT
|
PCRE2_COPY_MATCHED_SUBJECT
|
||||||
On success, make a private subject copy
|
On success, make a private subject copy
|
||||||
PCRE2_ENDANCHORED Pattern can match only at end of subject
|
PCRE2_ENDANCHORED Pattern can match only at end of subject
|
||||||
PCRE2_NOTBOL Subject is not the beginning of a line
|
PCRE2_NOTBOL Subject is not the beginning of a line
|
||||||
PCRE2_NOTEOL Subject is not the end of a line
|
PCRE2_NOTEOL Subject is not the end of a line
|
||||||
|
|
|
@ -61,7 +61,7 @@ terminated by a binary zero code unit. The options are:
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_ANCHORED Match only at the first position
|
PCRE2_ANCHORED Match only at the first position
|
||||||
PCRE2_COPY_MATCHED_SUBJECT
|
PCRE2_COPY_MATCHED_SUBJECT
|
||||||
On success, make a private subject copy
|
On success, make a private subject copy
|
||||||
PCRE2_ENDANCHORED Pattern can match only at end of subject
|
PCRE2_ENDANCHORED Pattern can match only at end of subject
|
||||||
PCRE2_NOTBOL Subject string is not the beginning of a line
|
PCRE2_NOTBOL Subject string is not the beginning of a line
|
||||||
PCRE2_NOTEOL Subject string is not the end of a line
|
PCRE2_NOTEOL Subject string is not the end of a line
|
||||||
|
|
|
@ -31,7 +31,7 @@ using the memory freeing function from the general context or compiled pattern
|
||||||
with which it was created, or <b>free()</b> if that was not set.
|
with which it was created, or <b>free()</b> if that was not set.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If the PCRE2_COPY_MATCHED_SUBJECT was used for a successful match using this
|
If the PCRE2_COPY_MATCHED_SUBJECT was used for a successful match using this
|
||||||
match data block, the copy of the subject that was remembered with the block is
|
match data block, the copy of the subject that was remembered with the block is
|
||||||
also freed.
|
also freed.
|
||||||
</P>
|
</P>
|
||||||
|
|
|
@ -31,7 +31,7 @@ housed in a compile context. It completely replaces all the bits. The extra
|
||||||
options are:
|
options are:
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES Allow \x{df800} to \x{dfff} in UTF-8 and UTF-32 modes
|
PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES Allow \x{df800} to \x{dfff} in UTF-8 and UTF-32 modes
|
||||||
PCRE2_EXTRA_ALT_BSUX Extended alternate \u, \U, and \x handling
|
PCRE2_EXTRA_ALT_BSUX Extended alternate \u, \U, and \x handling
|
||||||
PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL Treat all invalid escapes as a literal following character
|
PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL Treat all invalid escapes as a literal following character
|
||||||
PCRE2_EXTRA_ESCAPED_CR_IS_LF Interpret \r as \n
|
PCRE2_EXTRA_ESCAPED_CR_IS_LF Interpret \r as \n
|
||||||
PCRE2_EXTRA_MATCH_LINE Pattern matches whole lines
|
PCRE2_EXTRA_MATCH_LINE Pattern matches whole lines
|
||||||
|
|
|
@ -1309,7 +1309,7 @@ be referenced by the substring extraction functions after a successful match.
|
||||||
After running a match, you must not free a compiled pattern or a subject string
|
After running a match, you must not free a compiled pattern or a subject string
|
||||||
until after all operations on the
|
until after all operations on the
|
||||||
<a href="#matchdatablock">match data block</a>
|
<a href="#matchdatablock">match data block</a>
|
||||||
have taken place, unless, in the case of the subject string, you have used the
|
have taken place, unless, in the case of the subject string, you have used the
|
||||||
PCRE2_COPY_MATCHED_SUBJECT option, which is described in the section entitled
|
PCRE2_COPY_MATCHED_SUBJECT option, which is described in the section entitled
|
||||||
"Option bits for <b>pcre2_match()</b>"
|
"Option bits for <b>pcre2_match()</b>"
|
||||||
<a href="#matchoptions>">below.</a>
|
<a href="#matchoptions>">below.</a>
|
||||||
|
@ -1437,8 +1437,8 @@ binary zero character followed by z).
|
||||||
ECMAscript 6 added additional functionality to \u. This can be accessed using
|
ECMAscript 6 added additional functionality to \u. This can be accessed using
|
||||||
the PCRE2_EXTRA_ALT_BSUX extra option (see "Extra compile options"
|
the PCRE2_EXTRA_ALT_BSUX extra option (see "Extra compile options"
|
||||||
<a href="#extracompileoptions">below).</a>
|
<a href="#extracompileoptions">below).</a>
|
||||||
Note that this alternative escape handling applies only to patterns. Neither of
|
Note that this alternative escape handling applies only to patterns. Neither of
|
||||||
these options affects the processing of replacement strings passed to
|
these options affects the processing of replacement strings passed to
|
||||||
<b>pcre2_substitute()</b>.
|
<b>pcre2_substitute()</b>.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_ALT_CIRCUMFLEX
|
PCRE2_ALT_CIRCUMFLEX
|
||||||
|
@ -1875,10 +1875,10 @@ characters if the matching function is called with PCRE2_NO_UTF_CHECK set.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_EXTRA_ALT_BSUX
|
PCRE2_EXTRA_ALT_BSUX
|
||||||
</pre>
|
</pre>
|
||||||
The original option PCRE2_ALT_BSUX causes PCRE2 to process \U, \u, and \x in
|
The original option PCRE2_ALT_BSUX causes PCRE2 to process \U, \u, and \x in
|
||||||
the way that ECMAscript (aka JavaScript) does. Additional functionality was
|
the way that ECMAscript (aka JavaScript) does. Additional functionality was
|
||||||
defined by ECMAscript 6; setting PCRE2_EXTRA_ALT_BSUX has the effect of
|
defined by ECMAscript 6; setting PCRE2_EXTRA_ALT_BSUX has the effect of
|
||||||
PCRE2_ALT_BSUX, but in addition it recognizes \u{hhh..} as a hexadecimal
|
PCRE2_ALT_BSUX, but in addition it recognizes \u{hhh..} as a hexadecimal
|
||||||
character code, where hhh.. is any number of hexadecimal digits.
|
character code, where hhh.. is any number of hexadecimal digits.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL
|
PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL
|
||||||
|
@ -1896,7 +1896,7 @@ If the PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL extra option is passed to
|
||||||
<b>pcre2_compile()</b>, all unrecognized or malformed escape sequences are
|
<b>pcre2_compile()</b>, all unrecognized or malformed escape sequences are
|
||||||
treated as single-character escapes. For example, \j is a literal "j" and
|
treated as single-character escapes. For example, \j is a literal "j" and
|
||||||
\x{2z} is treated as the literal string "x{2z}". Setting this option means
|
\x{2z} is treated as the literal string "x{2z}". Setting this option means
|
||||||
that typos in patterns may go undetected and have unexpected results. Also note
|
that typos in patterns may go undetected and have unexpected results. Also note
|
||||||
that a sequence such as [\N{] is interpreted as a malformed attempt at
|
that a sequence such as [\N{] is interpreted as a malformed attempt at
|
||||||
[\N{...}] and so is treated as [N{] whereas [\N] gives an error because an
|
[\N{...}] and so is treated as [N{] whereas [\N] gives an error because an
|
||||||
unqualified \N is a valid escape sequence but is not supported in a character
|
unqualified \N is a valid escape sequence but is not supported in a character
|
||||||
|
@ -1904,9 +1904,9 @@ class. To reiterate: this is a dangerous option. Use with great care.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_EXTRA_ESCAPED_CR_IS_LF
|
PCRE2_EXTRA_ESCAPED_CR_IS_LF
|
||||||
</pre>
|
</pre>
|
||||||
There are some legacy applications where the escape sequence \r in a pattern
|
There are some legacy applications where the escape sequence \r in a pattern
|
||||||
is expected to match a newline. If this option is set, \r in a pattern is
|
is expected to match a newline. If this option is set, \r in a pattern is
|
||||||
converted to \n so that it matches a LF (linefeed) instead of a CR (carriage
|
converted to \n so that it matches a LF (linefeed) instead of a CR (carriage
|
||||||
return) character. The option does not affect a literal CR in the pattern, nor
|
return) character. The option does not affect a literal CR in the pattern, nor
|
||||||
does it affect CR specified as an explicit code point such as \x{0D}.
|
does it affect CR specified as an explicit code point such as \x{0D}.
|
||||||
<pre>
|
<pre>
|
||||||
|
@ -2564,7 +2564,7 @@ Option bits for <b>pcre2_match()</b>
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
The unused bits of the <i>options</i> argument for <b>pcre2_match()</b> must be
|
The unused bits of the <i>options</i> argument for <b>pcre2_match()</b> must be
|
||||||
zero. The only bits that may be set are PCRE2_ANCHORED,
|
zero. The only bits that may be set are PCRE2_ANCHORED,
|
||||||
PCRE2_COPY_MATCHED_SUBJECT, PCRE2_ENDANCHORED, PCRE2_NOTBOL, PCRE2_NOTEOL,
|
PCRE2_COPY_MATCHED_SUBJECT, PCRE2_ENDANCHORED, PCRE2_NOTBOL, PCRE2_NOTEOL,
|
||||||
PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_JIT, PCRE2_NO_UTF_CHECK,
|
PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_JIT, PCRE2_NO_UTF_CHECK,
|
||||||
PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Their action is described below.
|
PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Their action is described below.
|
||||||
|
@ -2585,8 +2585,8 @@ matching.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_COPY_MATCHED_SUBJECT
|
PCRE2_COPY_MATCHED_SUBJECT
|
||||||
</pre>
|
</pre>
|
||||||
By default, a pointer to the subject is remembered in the match data block so
|
By default, a pointer to the subject is remembered in the match data block so
|
||||||
that, after a successful match, it can be referenced by the substring
|
that, after a successful match, it can be referenced by the substring
|
||||||
extraction functions. This means that the subject's memory must not be freed
|
extraction functions. This means that the subject's memory must not be freed
|
||||||
until all such operations are complete. For some applications where the
|
until all such operations are complete. For some applications where the
|
||||||
lifetime of the subject string is not guaranteed, it may be necessary to make a
|
lifetime of the subject string is not guaranteed, it may be necessary to make a
|
||||||
|
@ -2866,8 +2866,8 @@ undefined.
|
||||||
<P>
|
<P>
|
||||||
After a successful match, a partial match (PCRE2_ERROR_PARTIAL), or a failure
|
After a successful match, a partial match (PCRE2_ERROR_PARTIAL), or a failure
|
||||||
to match (PCRE2_ERROR_NOMATCH), a mark name may be available. The function
|
to match (PCRE2_ERROR_NOMATCH), a mark name may be available. The function
|
||||||
<b>pcre2_get_mark()</b> can be called to access this name, which can be
|
<b>pcre2_get_mark()</b> can be called to access this name, which can be
|
||||||
specified in the pattern by any of the backtracking control verbs, not just
|
specified in the pattern by any of the backtracking control verbs, not just
|
||||||
(*MARK). The same function applies to all the verbs. It returns a pointer to
|
(*MARK). The same function applies to all the verbs. It returns a pointer to
|
||||||
the zero-terminated name, which is within the compiled pattern. If no name is
|
the zero-terminated name, which is within the compiled pattern. If no name is
|
||||||
available, NULL is returned. The length of the name (excluding the terminating
|
available, NULL is returned. The length of the name (excluding the terminating
|
||||||
|
@ -3002,7 +3002,7 @@ The backtracking match limit was reached.
|
||||||
If a pattern contains many nested backtracking points, heap memory is used to
|
If a pattern contains many nested backtracking points, heap memory is used to
|
||||||
remember them. This error is given when the memory allocation function (default
|
remember them. This error is given when the memory allocation function (default
|
||||||
or custom) fails. Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given
|
or custom) fails. Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given
|
||||||
if the amount of memory needed exceeds the heap limit. PCRE2_ERROR_NOMEMORY is
|
if the amount of memory needed exceeds the heap limit. PCRE2_ERROR_NOMEMORY is
|
||||||
also returned if PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
|
also returned if PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_ERROR_NULL
|
PCRE2_ERROR_NULL
|
||||||
|
@ -3405,7 +3405,7 @@ capture groups and letters within \Q...\E quoted sequences.
|
||||||
<P>
|
<P>
|
||||||
Note that case forcing sequences such as \U...\E do not nest. For example,
|
Note that case forcing sequences such as \U...\E do not nest. For example,
|
||||||
the result of processing "\Uaa\LBB\Ecc\E" is "AAbbcc"; the final \E has no
|
the result of processing "\Uaa\LBB\Ecc\E" is "AAbbcc"; the final \E has no
|
||||||
effect. Note also that the PCRE2_ALT_BSUX and PCRE2_EXTRA_ALT_BSUX options do
|
effect. Note also that the PCRE2_ALT_BSUX and PCRE2_EXTRA_ALT_BSUX options do
|
||||||
not apply to not apply to replacement strings.
|
not apply to not apply to replacement strings.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -3439,7 +3439,7 @@ substitutions. However, PCRE2_SUBSTITUTE_UNKNOWN_UNSET does cause unknown
|
||||||
groups in the extended syntax forms to be treated as unset.
|
groups in the extended syntax forms to be treated as unset.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If successful, <b>pcre2_substitute()</b> returns the number of successful
|
If successful, <b>pcre2_substitute()</b> returns the number of successful
|
||||||
matches. This may be zero if no matches were found, and is never greater than 1
|
matches. This may be zero if no matches were found, and is never greater than 1
|
||||||
unless PCRE2_SUBSTITUTE_GLOBAL is set.
|
unless PCRE2_SUBSTITUTE_GLOBAL is set.
|
||||||
</P>
|
</P>
|
||||||
|
@ -3489,8 +3489,8 @@ Substitution callouts
|
||||||
<br>
|
<br>
|
||||||
The <b>pcre2_set_substitution_callout()</b> function can be used to specify a
|
The <b>pcre2_set_substitution_callout()</b> function can be used to specify a
|
||||||
callout function for <b>pcre2_substitute()</b>. This information is passed in
|
callout function for <b>pcre2_substitute()</b>. This information is passed in
|
||||||
a match context. The callout function is called after each substitution has
|
a match context. The callout function is called after each substitution has
|
||||||
been processed, but it can cause the replacement not to happen. The callout
|
been processed, but it can cause the replacement not to happen. The callout
|
||||||
function is not called for simulated substitutions that happen as a result of
|
function is not called for simulated substitutions that happen as a result of
|
||||||
the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option.
|
the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option.
|
||||||
</P>
|
</P>
|
||||||
|
@ -3500,10 +3500,10 @@ block structure, which contains the following fields, not necessarily in this
|
||||||
order:
|
order:
|
||||||
<pre>
|
<pre>
|
||||||
uint32_t <i>version</i>;
|
uint32_t <i>version</i>;
|
||||||
uint32_t <i>subscount</i>;
|
uint32_t <i>subscount</i>;
|
||||||
PCRE2_SPTR <i>input</i>;
|
PCRE2_SPTR <i>input</i>;
|
||||||
PCRE2_SPTR <i>output</i>;
|
PCRE2_SPTR <i>output</i>;
|
||||||
PCRE2_SIZE <i>*ovector</i>;
|
PCRE2_SIZE <i>*ovector</i>;
|
||||||
uint32_t <i>oveccount</i>;
|
uint32_t <i>oveccount</i>;
|
||||||
PCRE2_SIZE <i>output_offsets[2]</i>;
|
PCRE2_SIZE <i>output_offsets[2]</i>;
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -3517,9 +3517,9 @@ first callout, 2 for the second, and so on. The <i>input</i> and <i>output</i>
|
||||||
pointers are copies of the values passed to <b>pcre2_substitute()</b>.
|
pointers are copies of the values passed to <b>pcre2_substitute()</b>.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <i>ovector</i> field points to the ovector, which contains the result of the
|
The <i>ovector</i> field points to the ovector, which contains the result of the
|
||||||
most recent match. The <i>oveccount</i> field contains the number of pairs that
|
most recent match. The <i>oveccount</i> field contains the number of pairs that
|
||||||
are set in the ovector, and is always greater than zero.
|
are set in the ovector, and is always greater than zero.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <i>output_offsets</i> vector contains the offsets of the replacement in the
|
The <i>output_offsets</i> vector contains the offsets of the replacement in the
|
||||||
|
|
|
@ -376,12 +376,15 @@ environment.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC14" href="#TOC1">PCRE2GREP SUPPORT FOR EXTERNAL SCRIPTS</a><br>
|
<br><a name="SEC14" href="#TOC1">PCRE2GREP SUPPORT FOR EXTERNAL SCRIPTS</a><br>
|
||||||
<P>
|
<P>
|
||||||
By default, on non-Windows systems, <b>pcre2grep</b> supports the use of
|
By default <b>pcre2grep</b> supports the use of callouts with string arguments
|
||||||
callouts with string arguments within the patterns it is matching, in order to
|
within the patterns it is matching. There are two kinds: one that generates
|
||||||
run external scripts. For details, see the
|
output using local code, and another that calls an external program or script.
|
||||||
|
If --disable-pcre2grep-callout-fork is added to the <b>configure</b> command,
|
||||||
|
only the first kind of callout is supported; if --disable-pcre2grep-callout is
|
||||||
|
used, all callouts are completely ignored. For more details of <b>pcre2grep</b>
|
||||||
|
callouts, see the
|
||||||
<a href="pcre2grep.html"><b>pcre2grep</b></a>
|
<a href="pcre2grep.html"><b>pcre2grep</b></a>
|
||||||
documentation. This support can be disabled by adding
|
documentation.
|
||||||
--disable-pcre2grep-callout to the <b>configure</b> command.
|
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC15" href="#TOC1">PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT</a><br>
|
<br><a name="SEC15" href="#TOC1">PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT</a><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -526,14 +529,14 @@ documentation.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC21" href="#TOC1">DISABLING THE Z AND T FORMATTING MODIFIERS</a><br>
|
<br><a name="SEC21" href="#TOC1">DISABLING THE Z AND T FORMATTING MODIFIERS</a><br>
|
||||||
<P>
|
<P>
|
||||||
The C99 standard defines formatting modifiers z and t for size_t and
|
The C99 standard defines formatting modifiers z and t for size_t and
|
||||||
ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in
|
ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers in
|
||||||
environments other than Microsoft Visual Studio when __STDC_VERSION__ is
|
environments other than Microsoft Visual Studio when __STDC_VERSION__ is
|
||||||
defined and has a value greater than or equal to 199901L (indicating C99).
|
defined and has a value greater than or equal to 199901L (indicating C99).
|
||||||
However, there is at least one environment that claims to be C99 but does not
|
However, there is at least one environment that claims to be C99 but does not
|
||||||
support these modifiers. If
|
support these modifiers. If
|
||||||
<pre>
|
<pre>
|
||||||
--disable-percent-zt
|
--disable-percent-zt
|
||||||
</pre>
|
</pre>
|
||||||
is specified, no use is made of the z or t modifiers. Instead or %td or %zu,
|
is specified, no use is made of the z or t modifiers. Instead or %td or %zu,
|
||||||
%lu is used, with a cast for size_t values.
|
%lu is used, with a cast for size_t values.
|
||||||
|
@ -589,9 +592,9 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC26" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC26" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 15 November 2018
|
Last updated: 03 March 2019
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2018 University of Cambridge.
|
Copyright © 1997-2019 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||||
|
|
|
@ -48,7 +48,7 @@ When using the <b>pcre2_substitute()</b> function, an additional callout feature
|
||||||
is available. This does a callout after each change to the subject string and
|
is available. This does a callout after each change to the subject string and
|
||||||
is described in the
|
is described in the
|
||||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||||
documentation; the rest of this document is concerned with callouts during
|
documentation; the rest of this document is concerned with callouts during
|
||||||
pattern matching.
|
pattern matching.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
|
|
@ -871,8 +871,8 @@ only callouts with string arguments are useful.
|
||||||
Calling external programs or scripts
|
Calling external programs or scripts
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
This facility can be independently disabled when <b>pcre2grep</b> is built. It
|
This facility can be independently disabled when <b>pcre2grep</b> is built. It
|
||||||
is supported for Windows, where a call to <b>_spawnvp()</b> is used, for VMS,
|
is supported for Windows, where a call to <b>_spawnvp()</b> is used, for VMS,
|
||||||
where <b>lib$spawn()</b> is used, and for any other Unix-like environment where
|
where <b>lib$spawn()</b> is used, and for any other Unix-like environment where
|
||||||
<b>fork()</b> and <b>execv()</b> are available.
|
<b>fork()</b> and <b>execv()</b> are available.
|
||||||
</P>
|
</P>
|
||||||
|
|
|
@ -418,13 +418,13 @@ two compile-time options. If PCRE2_ALT_BSUX is set, the sequence \x followed
|
||||||
by { is not recognized. Only if \x is followed by two hexadecimal digits is it
|
by { is not recognized. Only if \x is followed by two hexadecimal digits is it
|
||||||
recognized as a character escape. Otherwise it is interpreted as a literal "x"
|
recognized as a character escape. Otherwise it is interpreted as a literal "x"
|
||||||
character. In this mode, support for code points greater than 256 is provided
|
character. In this mode, support for code points greater than 256 is provided
|
||||||
by \u, which must be followed by four hexadecimal digits; otherwise it is
|
by \u, which must be followed by four hexadecimal digits; otherwise it is
|
||||||
interpreted as a literal "u" character.
|
interpreted as a literal "u" character.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
PCRE2_EXTRA_ALT_BSUX has the same effect as PCRE2_ALT_BSUX and, in addition,
|
PCRE2_EXTRA_ALT_BSUX has the same effect as PCRE2_ALT_BSUX and, in addition,
|
||||||
\u{hhh..} is recognized as the character specified by hexadecimal code point.
|
\u{hhh..} is recognized as the character specified by hexadecimal code point.
|
||||||
There may be any number of hexadecimal digits. This syntax is from ECMAScript
|
There may be any number of hexadecimal digits. This syntax is from ECMAScript
|
||||||
6.
|
6.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -1194,7 +1194,7 @@ character. If any other of these assertions appears in a character class, an
|
||||||
A word boundary is a position in the subject string where the current character
|
A word boundary is a position in the subject string where the current character
|
||||||
and the previous character do not both match \w or \W (i.e. one matches
|
and the previous character do not both match \w or \W (i.e. one matches
|
||||||
\w and the other matches \W), or the start or end of the string if the
|
\w and the other matches \W), or the start or end of the string if the
|
||||||
first or last character matches \w, respectively. When PCRE2 is built with
|
first or last character matches \w, respectively. When PCRE2 is built with
|
||||||
Unicode support, the meanings of \w and \W can be changed by setting the
|
Unicode support, the meanings of \w and \W can be changed by setting the
|
||||||
PCRE2_UCP option. When this is done, it also affects \b and \B. Neither PCRE2
|
PCRE2_UCP option. When this is done, it also affects \b and \B. Neither PCRE2
|
||||||
nor Perl has a separate "start of word" or "end of word" metasequence. However,
|
nor Perl has a separate "start of word" or "end of word" metasequence. However,
|
||||||
|
|
|
@ -50,13 +50,13 @@ expression 8-bit library. There are no POSIX-style wrappers for PCRE2's 16-bit
|
||||||
and 32-bit libraries. See the
|
and 32-bit libraries. See the
|
||||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||||
documentation for a description of PCRE2's native API, which contains much
|
documentation for a description of PCRE2's native API, which contains much
|
||||||
additional functionality.
|
additional functionality.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The functions described here are wrapper functions that ultimately call the
|
The functions described here are wrapper functions that ultimately call the
|
||||||
PCRE2 native API. Their prototypes are defined in the <b>pcre2posix.h</b> header
|
PCRE2 native API. Their prototypes are defined in the <b>pcre2posix.h</b> header
|
||||||
file, and they all have unique names starting with <b>pcre2_</b>. However, the
|
file, and they all have unique names starting with <b>pcre2_</b>. However, the
|
||||||
<b>pcre2posix.h</b> header also contains macro definitions that convert the
|
<b>pcre2posix.h</b> header also contains macro definitions that convert the
|
||||||
standard POSIX names such <b>regcomp()</b> into <b>pcre2_regcomp()</b> etc. This
|
standard POSIX names such <b>regcomp()</b> into <b>pcre2_regcomp()</b> etc. This
|
||||||
means that a program can use the usual POSIX names without running the risk of
|
means that a program can use the usual POSIX names without running the risk of
|
||||||
accidentally linking with POSIX functions from a different library.
|
accidentally linking with POSIX functions from a different library.
|
||||||
|
@ -68,7 +68,7 @@ application. Because the POSIX functions call the native ones, it is also
|
||||||
necessary to add <b>-lpcre2-8</b>.
|
necessary to add <b>-lpcre2-8</b>.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Although they are not defined as protypes in <b>pcre2posix.h</b>, the library
|
Although they are not defined as protypes in <b>pcre2posix.h</b>, the library
|
||||||
does contain functions with the POSIX names <b>regcomp()</b> etc. These simply
|
does contain functions with the POSIX names <b>regcomp()</b> etc. These simply
|
||||||
pass their arguments to the PCRE2 functions. These functions are provided for
|
pass their arguments to the PCRE2 functions. These functions are provided for
|
||||||
backwards compatibility with earlier versions of PCRE2, so that existing
|
backwards compatibility with earlier versions of PCRE2, so that existing
|
||||||
|
|
|
@ -58,7 +58,7 @@ documentation. This document contains a quick-reference summary of the syntax.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC3" href="#TOC1">ESCAPED CHARACTERS</a><br>
|
<br><a name="SEC3" href="#TOC1">ESCAPED CHARACTERS</a><br>
|
||||||
<P>
|
<P>
|
||||||
This table applies to ASCII and Unicode environments. An unrecognized escape
|
This table applies to ASCII and Unicode environments. An unrecognized escape
|
||||||
sequence causes an error.
|
sequence causes an error.
|
||||||
<pre>
|
<pre>
|
||||||
\a alarm, that is, the BEL character (hex 07)
|
\a alarm, that is, the BEL character (hex 07)
|
||||||
|
@ -85,7 +85,7 @@ following are also recognized:
|
||||||
When \x is not followed by {, from zero to two hexadecimal digits are read,
|
When \x is not followed by {, from zero to two hexadecimal digits are read,
|
||||||
but in ALT_BSUX mode \x must be followed by two hexadecimal digits to be
|
but in ALT_BSUX mode \x must be followed by two hexadecimal digits to be
|
||||||
recognized as a hexadecimal escape; otherwise it matches a literal "x".
|
recognized as a hexadecimal escape; otherwise it matches a literal "x".
|
||||||
Likewise, if \u (in ALT_BSUX mode) is not followed by four hexadecimal digits
|
Likewise, if \u (in ALT_BSUX mode) is not followed by four hexadecimal digits
|
||||||
or (in EXTRA_ALT_BSUX mode) a sequence of hex digits in curly brackets, it
|
or (in EXTRA_ALT_BSUX mode) a sequence of hex digits in curly brackets, it
|
||||||
matches a literal "u".
|
matches a literal "u".
|
||||||
</P>
|
</P>
|
||||||
|
|
|
@ -606,10 +606,10 @@ for a description of the effects of these options.
|
||||||
/s dotall set PCRE2_DOTALL
|
/s dotall set PCRE2_DOTALL
|
||||||
dupnames set PCRE2_DUPNAMES
|
dupnames set PCRE2_DUPNAMES
|
||||||
endanchored set PCRE2_ENDANCHORED
|
endanchored set PCRE2_ENDANCHORED
|
||||||
escaped_cr_is_lf set PCRE2_EXTRA_ESCAPED_CR_IS_LF
|
escaped_cr_is_lf set PCRE2_EXTRA_ESCAPED_CR_IS_LF
|
||||||
/x extended set PCRE2_EXTENDED
|
/x extended set PCRE2_EXTENDED
|
||||||
/xx extended_more set PCRE2_EXTENDED_MORE
|
/xx extended_more set PCRE2_EXTENDED_MORE
|
||||||
extra_alt_bsux set PCRE2_EXTRA_ALT_BSUX
|
extra_alt_bsux set PCRE2_EXTRA_ALT_BSUX
|
||||||
firstline set PCRE2_FIRSTLINE
|
firstline set PCRE2_FIRSTLINE
|
||||||
literal set PCRE2_LITERAL
|
literal set PCRE2_LITERAL
|
||||||
match_line set PCRE2_EXTRA_MATCH_LINE
|
match_line set PCRE2_EXTRA_MATCH_LINE
|
||||||
|
@ -1043,7 +1043,7 @@ process.
|
||||||
aftertext show text after match
|
aftertext show text after match
|
||||||
allaftertext show text after captures
|
allaftertext show text after captures
|
||||||
allcaptures show all captures
|
allcaptures show all captures
|
||||||
allvector show the entire ovector
|
allvector show the entire ovector
|
||||||
allusedtext show all consulted text
|
allusedtext show all consulted text
|
||||||
altglobal alternative global matching
|
altglobal alternative global matching
|
||||||
/g global global matching
|
/g global global matching
|
||||||
|
@ -1051,9 +1051,9 @@ process.
|
||||||
mark show mark values
|
mark show mark values
|
||||||
replace=<string> specify a replacement string
|
replace=<string> specify a replacement string
|
||||||
startchar show starting character when relevant
|
startchar show starting character when relevant
|
||||||
substitute_callout use substitution callouts
|
substitute_callout use substitution callouts
|
||||||
substitute_extended use PCRE2_SUBSTITUTE_EXTENDED
|
substitute_extended use PCRE2_SUBSTITUTE_EXTENDED
|
||||||
substitute_skip=<n> skip substitution number n
|
substitute_skip=<n> skip substitution number n
|
||||||
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||||
substitute_stop=<n> skip substitution number n and greater
|
substitute_stop=<n> skip substitution number n and greater
|
||||||
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||||
|
@ -1191,7 +1191,7 @@ pattern.
|
||||||
aftertext show text after match
|
aftertext show text after match
|
||||||
allaftertext show text after captures
|
allaftertext show text after captures
|
||||||
allcaptures show all captures
|
allcaptures show all captures
|
||||||
allvector show the entire ovector
|
allvector show the entire ovector
|
||||||
allusedtext show all consulted text (non-JIT only)
|
allusedtext show all consulted text (non-JIT only)
|
||||||
altglobal alternative global matching
|
altglobal alternative global matching
|
||||||
callout_capture show captures at callout time
|
callout_capture show captures at callout time
|
||||||
|
@ -1221,9 +1221,9 @@ pattern.
|
||||||
replace=<string> specify a replacement string
|
replace=<string> specify a replacement string
|
||||||
startchar show startchar when relevant
|
startchar show startchar when relevant
|
||||||
startoffset=<n> same as offset=<n>
|
startoffset=<n> same as offset=<n>
|
||||||
substitute_callout use substitution callouts
|
substitute_callout use substitution callouts
|
||||||
substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED
|
substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED
|
||||||
substitute_skip=<n> skip substitution number n
|
substitute_skip=<n> skip substitution number n
|
||||||
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||||
substitute_stop=<n> skip substitution number n and greater
|
substitute_stop=<n> skip substitution number n and greater
|
||||||
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||||
|
@ -1306,9 +1306,9 @@ result, and also for DFA matching, provides a means of checking that there are
|
||||||
no unexpected modifications to ovector fields. Before each match attempt, the
|
no unexpected modifications to ovector fields. Before each match attempt, the
|
||||||
ovector is filled with a special value, and if this is found in both elements
|
ovector is filled with a special value, and if this is found in both elements
|
||||||
of a capturing pair, "<unchanged>" is output. After a successful match, this
|
of a capturing pair, "<unchanged>" is output. After a successful match, this
|
||||||
applies to all groups after the maximum capture group for the pattern. In other
|
applies to all groups after the maximum capture group for the pattern. In other
|
||||||
cases it applies to the entire ovector. After a partial match, the first two
|
cases it applies to the entire ovector. After a partial match, the first two
|
||||||
elements are the only ones that should be set. After a DFA match, the amount of
|
elements are the only ones that should be set. After a DFA match, the amount of
|
||||||
ovector that is used depends on the number of matches that were found.
|
ovector that is used depends on the number of matches that were found.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
|
@ -1320,7 +1320,7 @@ functions, unless <b>callout_none</b> is specified. Its behaviour can be
|
||||||
controlled by various modifiers listed above whose names begin with
|
controlled by various modifiers listed above whose names begin with
|
||||||
<b>callout_</b>. Details are given in the section entitled "Callouts"
|
<b>callout_</b>. Details are given in the section entitled "Callouts"
|
||||||
<a href="#callouts">below.</a>
|
<a href="#callouts">below.</a>
|
||||||
Testing callouts from <b>pcre2_substitute()</b> is decribed separately in
|
Testing callouts from <b>pcre2_substitute()</b> is decribed separately in
|
||||||
"Testing the substitution function"
|
"Testing the substitution function"
|
||||||
<a href="#substitution">below.</a>
|
<a href="#substitution">below.</a>
|
||||||
</P>
|
</P>
|
||||||
|
@ -1449,14 +1449,14 @@ matching provokes an error return ("bad option value") from
|
||||||
Testing substitute callouts
|
Testing substitute callouts
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
If the <b>substitute_callout</b> modifier is set, a substitution callout
|
If the <b>substitute_callout</b> modifier is set, a substitution callout
|
||||||
function is set up. When it is called (after each substitution), details of the
|
function is set up. When it is called (after each substitution), details of the
|
||||||
the input and output strings are output. For example:
|
the input and output strings are output. For example:
|
||||||
<pre>
|
<pre>
|
||||||
/abc/g,replace=<$0>,substitute_callout
|
/abc/g,replace=<$0>,substitute_callout
|
||||||
abcdefabcpqr
|
abcdefabcpqr
|
||||||
1(1) Old 0 3 "abc" New 0 5 "<abc>"
|
1(1) Old 0 3 "abc" New 0 5 "<abc>"
|
||||||
2(1) Old 6 9 "abc" New 8 13 "<abc>"
|
2(1) Old 6 9 "abc" New 8 13 "<abc>"
|
||||||
2: <abc>def<abc>pqr
|
2: <abc>def<abc>pqr
|
||||||
</pre>
|
</pre>
|
||||||
The first number on each callout line is the count of matches. The
|
The first number on each callout line is the count of matches. The
|
||||||
|
@ -1466,11 +1466,11 @@ listed the offsets of the old substring, its contents, and the same for the
|
||||||
replacement.
|
replacement.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
By default, the substitution callout function returns zero, which accepts the
|
By default, the substitution callout function returns zero, which accepts the
|
||||||
replacement and causes matching to continue if /g was used. Two further
|
replacement and causes matching to continue if /g was used. Two further
|
||||||
modifiers can be used to test other return values. If <b>substitute_skip</b> is
|
modifiers can be used to test other return values. If <b>substitute_skip</b> is
|
||||||
set to a value greater than zero the callout function returns +1 for the match
|
set to a value greater than zero the callout function returns +1 for the match
|
||||||
of that number, and similarly <b>substitute_stop</b> returns -1. These cause the
|
of that number, and similarly <b>substitute_stop</b> returns -1. These cause the
|
||||||
replacement to be rejected, and -1 causes no further matching to take place. If
|
replacement to be rejected, and -1 causes no further matching to take place. If
|
||||||
either of them are set, <b>substitute_callout</b> is assumed. For example:
|
either of them are set, <b>substitute_callout</b> is assumed. For example:
|
||||||
<pre>
|
<pre>
|
||||||
|
@ -1483,7 +1483,7 @@ either of them are set, <b>substitute_callout</b> is assumed. For example:
|
||||||
1(1) Old 0 3 "abc" New 0 5 "<abc> STOPPED"
|
1(1) Old 0 3 "abc" New 0 5 "<abc> STOPPED"
|
||||||
1: abcdefabcpqr
|
1: abcdefabcpqr
|
||||||
</pre>
|
</pre>
|
||||||
If both are set for the same number, stop takes precedence. Only a single skip
|
If both are set for the same number, stop takes precedence. Only a single skip
|
||||||
or stop is supported, which is sufficient for testing that the feature works.
|
or stop is supported, which is sufficient for testing that the feature works.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
|
|
|
@ -82,7 +82,7 @@ The escape sequence \C can be used to match a single code unit in a UTF mode,
|
||||||
but its use can lead to some strange effects because it breaks up multi-unit
|
but its use can lead to some strange effects because it breaks up multi-unit
|
||||||
characters (see the description of \C in the
|
characters (see the description of \C in the
|
||||||
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
|
<a href="pcre2pattern.html"><b>pcre2pattern</b></a>
|
||||||
documentation). For this reason, there is a build-time option that disables
|
documentation). For this reason, there is a build-time option that disables
|
||||||
support for \C completely. There is also a less draconian compile-time option
|
support for \C completely. There is also a less draconian compile-time option
|
||||||
for locking out the use of \C when a pattern is compiled.
|
for locking out the use of \C when a pattern is compiled.
|
||||||
</P>
|
</P>
|
||||||
|
@ -144,14 +144,14 @@ scripts are commonly used together, and because some diacritical and other
|
||||||
marks are used with multiple scripts, it is not that simple.
|
marks are used with multiple scripts, it is not that simple.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Every Unicode character has a Script property, mostly with a value
|
Every Unicode character has a Script property, mostly with a value
|
||||||
corresponding to the name of a script, such as Latin, Greek, or Cyrillic. There
|
corresponding to the name of a script, such as Latin, Greek, or Cyrillic. There
|
||||||
are also three special values:
|
are also three special values:
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
"Unknown" is used for code points that have not been assigned, and also for the
|
"Unknown" is used for code points that have not been assigned, and also for the
|
||||||
surrogate code points. In the PCRE2 32-bit library, characters whose code
|
surrogate code points. In the PCRE2 32-bit library, characters whose code
|
||||||
points are greater than the Unicode maximum (U+10FFFF), which are accessible
|
points are greater than the Unicode maximum (U+10FFFF), which are accessible
|
||||||
only in non-UTF mode, are assigned the Unknown script.
|
only in non-UTF mode, are assigned the Unknown script.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -165,20 +165,20 @@ previous character. These are considered to take on the script of the character
|
||||||
that they modify.
|
that they modify.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Some Inherited characters are used with many scripts, but many of them are only
|
Some Inherited characters are used with many scripts, but many of them are only
|
||||||
normally used with a small number of scripts. For example, U+102E0 (Coptic
|
normally used with a small number of scripts. For example, U+102E0 (Coptic
|
||||||
Epact thousands mark) is used only with Arabic and Coptic. In order to make it
|
Epact thousands mark) is used only with Arabic and Coptic. In order to make it
|
||||||
possible to check this, a Unicode property called Script Extension exists. Its
|
possible to check this, a Unicode property called Script Extension exists. Its
|
||||||
value is a list of scripts that apply to the character. For the majority of
|
value is a list of scripts that apply to the character. For the majority of
|
||||||
characters, the list contains just one script, the same one as the Script
|
characters, the list contains just one script, the same one as the Script
|
||||||
property. However, for characters such as U+102E0 more than one Script is
|
property. However, for characters such as U+102E0 more than one Script is
|
||||||
listed. There are also some Common characters that have a single, non-Common
|
listed. There are also some Common characters that have a single, non-Common
|
||||||
script in their Script Extension list.
|
script in their Script Extension list.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The next section describes the basic rules for deciding whether a given string
|
The next section describes the basic rules for deciding whether a given string
|
||||||
of characters is a script run. Note, however, that there are some special cases
|
of characters is a script run. Note, however, that there are some special cases
|
||||||
involving the Chinese Han script, and an additional constraint for decimal
|
involving the Chinese Han script, and an additional constraint for decimal
|
||||||
digits. These are covered in subsequent sections.
|
digits. These are covered in subsequent sections.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
|
@ -201,17 +201,17 @@ all the sets of scripts must not be empty.
|
||||||
<P>
|
<P>
|
||||||
A simple example is an Internet name such as "google.com". The letters are all
|
A simple example is an Internet name such as "google.com". The letters are all
|
||||||
in the Latin script, and the dot is Common, so this string is a script run.
|
in the Latin script, and the dot is Common, so this string is a script run.
|
||||||
However, the Cyrillic letter "o" looks exactly the same as the Latin "o"; a
|
However, the Cyrillic letter "o" looks exactly the same as the Latin "o"; a
|
||||||
string that looks the same, but with Cyrillic "o"s is not a script run.
|
string that looks the same, but with Cyrillic "o"s is not a script run.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
More interesting examples involve characters with more than one script in their
|
More interesting examples involve characters with more than one script in their
|
||||||
Script Extension. Consider the following characters:
|
Script Extension. Consider the following characters:
|
||||||
<pre>
|
<pre>
|
||||||
U+060C Arabic comma
|
U+060C Arabic comma
|
||||||
U+06D4 Arabic full stop
|
U+06D4 Arabic full stop
|
||||||
</pre>
|
</pre>
|
||||||
The first has the Script Extension list Arabic, Hanifi Rohingya, Syriac, and
|
The first has the Script Extension list Arabic, Hanifi Rohingya, Syriac, and
|
||||||
Thaana; the second has just Arabic and Hanifi Rohingya. Both of them could
|
Thaana; the second has just Arabic and Hanifi Rohingya. Both of them could
|
||||||
appear in script runs of either Arabic or Hanifi Rohingya. The first could also
|
appear in script runs of either Arabic or Hanifi Rohingya. The first could also
|
||||||
appear in Syriac or Thaana script runs, but the second could not.
|
appear in Syriac or Thaana script runs, but the second could not.
|
||||||
|
@ -220,8 +220,8 @@ appear in Syriac or Thaana script runs, but the second could not.
|
||||||
The Chinese Han script
|
The Chinese Han script
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
The Chinese Han script is commonly used in conjunction with other scripts for
|
The Chinese Han script is commonly used in conjunction with other scripts for
|
||||||
writing certain languages. Japanese uses the Hiragana and Katakana scripts
|
writing certain languages. Japanese uses the Hiragana and Katakana scripts
|
||||||
together with Han; Korean uses Hangul and Han; Taiwanese Mandarin uses Bopomofo
|
together with Han; Korean uses Hangul and Han; Taiwanese Mandarin uses Bopomofo
|
||||||
and Han. These three combinations are treated as special cases when checking
|
and Han. These three combinations are treated as special cases when checking
|
||||||
script runs and are, in effect, "virtual scripts". Thus, a script run may
|
script runs and are, in effect, "virtual scripts". Thus, a script run may
|
||||||
|
|
183
doc/pcre2.txt
183
doc/pcre2.txt
|
@ -180,8 +180,8 @@ REVISION
|
||||||
Last updated: 17 September 2018
|
Last updated: 17 September 2018
|
||||||
Copyright (c) 1997-2018 University of Cambridge.
|
Copyright (c) 1997-2018 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2API(3) Library Functions Manual PCRE2API(3)
|
PCRE2API(3) Library Functions Manual PCRE2API(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -3681,8 +3681,8 @@ REVISION
|
||||||
Last updated: 14 February 2019
|
Last updated: 14 February 2019
|
||||||
Copyright (c) 1997-2019 University of Cambridge.
|
Copyright (c) 1997-2019 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2BUILD(3) Library Functions Manual PCRE2BUILD(3)
|
PCRE2BUILD(3) Library Functions Manual PCRE2BUILD(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -4027,45 +4027,48 @@ USING EBCDIC CODE
|
||||||
|
|
||||||
PCRE2GREP SUPPORT FOR EXTERNAL SCRIPTS
|
PCRE2GREP SUPPORT FOR EXTERNAL SCRIPTS
|
||||||
|
|
||||||
By default, on non-Windows systems, pcre2grep supports the use of call-
|
By default pcre2grep supports the use of callouts with string arguments
|
||||||
outs with string arguments within the patterns it is matching, in order
|
within the patterns it is matching. There are two kinds: one that gen-
|
||||||
to run external scripts. For details, see the pcre2grep documentation.
|
erates output using local code, and another that calls an external pro-
|
||||||
This support can be disabled by adding --disable-pcre2grep-callout to
|
gram or script. If --disable-pcre2grep-callout-fork is added to the
|
||||||
the configure command.
|
configure command, only the first kind of callout is supported; if
|
||||||
|
--disable-pcre2grep-callout is used, all callouts are completely
|
||||||
|
ignored. For more details of pcre2grep callouts, see the pcre2grep doc-
|
||||||
|
umentation.
|
||||||
|
|
||||||
|
|
||||||
PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT
|
PCRE2GREP OPTIONS FOR COMPRESSED FILE SUPPORT
|
||||||
|
|
||||||
By default, pcre2grep reads all files as plain text. You can build it
|
By default, pcre2grep reads all files as plain text. You can build it
|
||||||
so that it recognizes files whose names end in .gz or .bz2, and reads
|
so that it recognizes files whose names end in .gz or .bz2, and reads
|
||||||
them with libz or libbz2, respectively, by adding one or both of
|
them with libz or libbz2, respectively, by adding one or both of
|
||||||
|
|
||||||
--enable-pcre2grep-libz
|
--enable-pcre2grep-libz
|
||||||
--enable-pcre2grep-libbz2
|
--enable-pcre2grep-libbz2
|
||||||
|
|
||||||
to the configure command. These options naturally require that the rel-
|
to the configure command. These options naturally require that the rel-
|
||||||
evant libraries are installed on your system. Configuration will fail
|
evant libraries are installed on your system. Configuration will fail
|
||||||
if they are not.
|
if they are not.
|
||||||
|
|
||||||
|
|
||||||
PCRE2GREP BUFFER SIZE
|
PCRE2GREP BUFFER SIZE
|
||||||
|
|
||||||
pcre2grep uses an internal buffer to hold a "window" on the file it is
|
pcre2grep uses an internal buffer to hold a "window" on the file it is
|
||||||
scanning, in order to be able to output "before" and "after" lines when
|
scanning, in order to be able to output "before" and "after" lines when
|
||||||
it finds a match. The default starting size of the buffer is 20KiB. The
|
it finds a match. The default starting size of the buffer is 20KiB. The
|
||||||
buffer itself is three times this size, but because of the way it is
|
buffer itself is three times this size, but because of the way it is
|
||||||
used for holding "before" lines, the longest line that is guaranteed to
|
used for holding "before" lines, the longest line that is guaranteed to
|
||||||
be processable is the notional buffer size. If a longer line is encoun-
|
be processable is the notional buffer size. If a longer line is encoun-
|
||||||
tered, pcre2grep automatically expands the buffer, up to a specified
|
tered, pcre2grep automatically expands the buffer, up to a specified
|
||||||
maximum size, whose default is 1MiB or the starting size, whichever is
|
maximum size, whose default is 1MiB or the starting size, whichever is
|
||||||
the larger. You can change the default parameter values by adding, for
|
the larger. You can change the default parameter values by adding, for
|
||||||
example,
|
example,
|
||||||
|
|
||||||
--with-pcre2grep-bufsize=51200
|
--with-pcre2grep-bufsize=51200
|
||||||
--with-pcre2grep-max-bufsize=2097152
|
--with-pcre2grep-max-bufsize=2097152
|
||||||
|
|
||||||
to the configure command. The caller of pcre2grep can override these
|
to the configure command. The caller of pcre2grep can override these
|
||||||
values by using --buffer-size and --max-buffer-size on the command
|
values by using --buffer-size and --max-buffer-size on the command
|
||||||
line.
|
line.
|
||||||
|
|
||||||
|
|
||||||
|
@ -4076,26 +4079,26 @@ PCRE2TEST OPTION FOR LIBREADLINE SUPPORT
|
||||||
--enable-pcre2test-libreadline
|
--enable-pcre2test-libreadline
|
||||||
--enable-pcre2test-libedit
|
--enable-pcre2test-libedit
|
||||||
|
|
||||||
to the configure command, pcre2test is linked with the libreadline
|
to the configure command, pcre2test is linked with the libreadline
|
||||||
orlibedit library, respectively, and when its input is from a terminal,
|
orlibedit library, respectively, and when its input is from a terminal,
|
||||||
it reads it using the readline() function. This provides line-editing
|
it reads it using the readline() function. This provides line-editing
|
||||||
and history facilities. Note that libreadline is GPL-licensed, so if
|
and history facilities. Note that libreadline is GPL-licensed, so if
|
||||||
you distribute a binary of pcre2test linked in this way, there may be
|
you distribute a binary of pcre2test linked in this way, there may be
|
||||||
licensing issues. These can be avoided by linking instead with libedit,
|
licensing issues. These can be avoided by linking instead with libedit,
|
||||||
which has a BSD licence.
|
which has a BSD licence.
|
||||||
|
|
||||||
Setting --enable-pcre2test-libreadline causes the -lreadline option to
|
Setting --enable-pcre2test-libreadline causes the -lreadline option to
|
||||||
be added to the pcre2test build. In many operating environments with a
|
be added to the pcre2test build. In many operating environments with a
|
||||||
sytem-installed readline library this is sufficient. However, in some
|
sytem-installed readline library this is sufficient. However, in some
|
||||||
environments (e.g. if an unmodified distribution version of readline is
|
environments (e.g. if an unmodified distribution version of readline is
|
||||||
in use), some extra configuration may be necessary. The INSTALL file
|
in use), some extra configuration may be necessary. The INSTALL file
|
||||||
for libreadline says this:
|
for libreadline says this:
|
||||||
|
|
||||||
"Readline uses the termcap functions, but does not link with
|
"Readline uses the termcap functions, but does not link with
|
||||||
the termcap or curses library itself, allowing applications
|
the termcap or curses library itself, allowing applications
|
||||||
which link with readline the to choose an appropriate library."
|
which link with readline the to choose an appropriate library."
|
||||||
|
|
||||||
If your environment has not been set up so that an appropriate library
|
If your environment has not been set up so that an appropriate library
|
||||||
is automatically included, you may need to add something like
|
is automatically included, you may need to add something like
|
||||||
|
|
||||||
LIBS="-ncurses"
|
LIBS="-ncurses"
|
||||||
|
@ -4109,7 +4112,7 @@ INCLUDING DEBUGGING CODE
|
||||||
|
|
||||||
--enable-debug
|
--enable-debug
|
||||||
|
|
||||||
to the configure command, additional debugging code is included in the
|
to the configure command, additional debugging code is included in the
|
||||||
build. This feature is intended for use by the PCRE2 maintainers.
|
build. This feature is intended for use by the PCRE2 maintainers.
|
||||||
|
|
||||||
|
|
||||||
|
@ -4119,15 +4122,15 @@ DEBUGGING WITH VALGRIND SUPPORT
|
||||||
|
|
||||||
--enable-valgrind
|
--enable-valgrind
|
||||||
|
|
||||||
to the configure command, PCRE2 will use valgrind annotations to mark
|
to the configure command, PCRE2 will use valgrind annotations to mark
|
||||||
certain memory regions as unaddressable. This allows it to detect
|
certain memory regions as unaddressable. This allows it to detect
|
||||||
invalid memory accesses, and is mostly useful for debugging PCRE2
|
invalid memory accesses, and is mostly useful for debugging PCRE2
|
||||||
itself.
|
itself.
|
||||||
|
|
||||||
|
|
||||||
CODE COVERAGE REPORTING
|
CODE COVERAGE REPORTING
|
||||||
|
|
||||||
If your C compiler is gcc, you can build a version of PCRE2 that can
|
If your C compiler is gcc, you can build a version of PCRE2 that can
|
||||||
generate a code coverage report for its test suite. To enable this, you
|
generate a code coverage report for its test suite. To enable this, you
|
||||||
must install lcov version 1.6 or above. Then specify
|
must install lcov version 1.6 or above. Then specify
|
||||||
|
|
||||||
|
@ -4136,20 +4139,20 @@ CODE COVERAGE REPORTING
|
||||||
to the configure command and build PCRE2 in the usual way.
|
to the configure command and build PCRE2 in the usual way.
|
||||||
|
|
||||||
Note that using ccache (a caching C compiler) is incompatible with code
|
Note that using ccache (a caching C compiler) is incompatible with code
|
||||||
coverage reporting. If you have configured ccache to run automatically
|
coverage reporting. If you have configured ccache to run automatically
|
||||||
on your system, you must set the environment variable
|
on your system, you must set the environment variable
|
||||||
|
|
||||||
CCACHE_DISABLE=1
|
CCACHE_DISABLE=1
|
||||||
|
|
||||||
before running make to build PCRE2, so that ccache is not used.
|
before running make to build PCRE2, so that ccache is not used.
|
||||||
|
|
||||||
When --enable-coverage is used, the following addition targets are
|
When --enable-coverage is used, the following addition targets are
|
||||||
added to the Makefile:
|
added to the Makefile:
|
||||||
|
|
||||||
make coverage
|
make coverage
|
||||||
|
|
||||||
This creates a fresh coverage report for the PCRE2 test suite. It is
|
This creates a fresh coverage report for the PCRE2 test suite. It is
|
||||||
equivalent to running "make coverage-reset", "make coverage-baseline",
|
equivalent to running "make coverage-reset", "make coverage-baseline",
|
||||||
"make check", and then "make coverage-report".
|
"make check", and then "make coverage-report".
|
||||||
|
|
||||||
make coverage-reset
|
make coverage-reset
|
||||||
|
@ -4166,28 +4169,28 @@ CODE COVERAGE REPORTING
|
||||||
|
|
||||||
make coverage-clean-report
|
make coverage-clean-report
|
||||||
|
|
||||||
This removes the generated coverage report without cleaning the cover-
|
This removes the generated coverage report without cleaning the cover-
|
||||||
age data itself.
|
age data itself.
|
||||||
|
|
||||||
make coverage-clean-data
|
make coverage-clean-data
|
||||||
|
|
||||||
This removes the captured coverage data without removing the coverage
|
This removes the captured coverage data without removing the coverage
|
||||||
files created at compile time (*.gcno).
|
files created at compile time (*.gcno).
|
||||||
|
|
||||||
make coverage-clean
|
make coverage-clean
|
||||||
|
|
||||||
This cleans all coverage data including the generated coverage report.
|
This cleans all coverage data including the generated coverage report.
|
||||||
For more information about code coverage, see the gcov and lcov docu-
|
For more information about code coverage, see the gcov and lcov docu-
|
||||||
mentation.
|
mentation.
|
||||||
|
|
||||||
|
|
||||||
DISABLING THE Z AND T FORMATTING MODIFIERS
|
DISABLING THE Z AND T FORMATTING MODIFIERS
|
||||||
|
|
||||||
The C99 standard defines formatting modifiers z and t for size_t and
|
The C99 standard defines formatting modifiers z and t for size_t and
|
||||||
ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers
|
ptrdiff_t values, respectively. By default, PCRE2 uses these modifiers
|
||||||
in environments other than Microsoft Visual Studio when __STDC_VER-
|
in environments other than Microsoft Visual Studio when __STDC_VER-
|
||||||
SION__ is defined and has a value greater than or equal to 199901L
|
SION__ is defined and has a value greater than or equal to 199901L
|
||||||
(indicating C99). However, there is at least one environment that
|
(indicating C99). However, there is at least one environment that
|
||||||
claims to be C99 but does not support these modifiers. If
|
claims to be C99 but does not support these modifiers. If
|
||||||
|
|
||||||
--disable-percent-zt
|
--disable-percent-zt
|
||||||
|
@ -4198,39 +4201,39 @@ DISABLING THE Z AND T FORMATTING MODIFIERS
|
||||||
|
|
||||||
SUPPORT FOR FUZZERS
|
SUPPORT FOR FUZZERS
|
||||||
|
|
||||||
There is a special option for use by people who want to run fuzzing
|
There is a special option for use by people who want to run fuzzing
|
||||||
tests on PCRE2:
|
tests on PCRE2:
|
||||||
|
|
||||||
--enable-fuzz-support
|
--enable-fuzz-support
|
||||||
|
|
||||||
At present this applies only to the 8-bit library. If set, it causes an
|
At present this applies only to the 8-bit library. If set, it causes an
|
||||||
extra library called libpcre2-fuzzsupport.a to be built, but not
|
extra library called libpcre2-fuzzsupport.a to be built, but not
|
||||||
installed. This contains a single function called LLVMFuzzerTestOneIn-
|
installed. This contains a single function called LLVMFuzzerTestOneIn-
|
||||||
put() whose arguments are a pointer to a string and the length of the
|
put() whose arguments are a pointer to a string and the length of the
|
||||||
string. When called, this function tries to compile the string as a
|
string. When called, this function tries to compile the string as a
|
||||||
pattern, and if that succeeds, to match it. This is done both with no
|
pattern, and if that succeeds, to match it. This is done both with no
|
||||||
options and with some random options bits that are generated from the
|
options and with some random options bits that are generated from the
|
||||||
string.
|
string.
|
||||||
|
|
||||||
Setting --enable-fuzz-support also causes a binary called pcre2fuz-
|
Setting --enable-fuzz-support also causes a binary called pcre2fuz-
|
||||||
zcheck to be created. This is normally run under valgrind or used when
|
zcheck to be created. This is normally run under valgrind or used when
|
||||||
PCRE2 is compiled with address sanitizing enabled. It calls the fuzzing
|
PCRE2 is compiled with address sanitizing enabled. It calls the fuzzing
|
||||||
function and outputs information about what it is doing. The input
|
function and outputs information about what it is doing. The input
|
||||||
strings are specified by arguments: if an argument starts with "=" the
|
strings are specified by arguments: if an argument starts with "=" the
|
||||||
rest of it is a literal input string. Otherwise, it is assumed to be a
|
rest of it is a literal input string. Otherwise, it is assumed to be a
|
||||||
file name, and the contents of the file are the test string.
|
file name, and the contents of the file are the test string.
|
||||||
|
|
||||||
|
|
||||||
OBSOLETE OPTION
|
OBSOLETE OPTION
|
||||||
|
|
||||||
In versions of PCRE2 prior to 10.30, there were two ways of handling
|
In versions of PCRE2 prior to 10.30, there were two ways of handling
|
||||||
backtracking in the pcre2_match() function. The default was to use the
|
backtracking in the pcre2_match() function. The default was to use the
|
||||||
system stack, but if
|
system stack, but if
|
||||||
|
|
||||||
--disable-stack-for-recursion
|
--disable-stack-for-recursion
|
||||||
|
|
||||||
was set, memory on the heap was used. From release 10.30 onwards this
|
was set, memory on the heap was used. From release 10.30 onwards this
|
||||||
has changed (the stack is no longer used) and this option now does
|
has changed (the stack is no longer used) and this option now does
|
||||||
nothing except give a warning.
|
nothing except give a warning.
|
||||||
|
|
||||||
|
|
||||||
|
@ -4248,11 +4251,11 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 15 November 2018
|
Last updated: 03 March 2019
|
||||||
Copyright (c) 1997-2018 University of Cambridge.
|
Copyright (c) 1997-2019 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2CALLOUT(3) Library Functions Manual PCRE2CALLOUT(3)
|
PCRE2CALLOUT(3) Library Functions Manual PCRE2CALLOUT(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -4682,8 +4685,8 @@ REVISION
|
||||||
Last updated: 03 February 2019
|
Last updated: 03 February 2019
|
||||||
Copyright (c) 1997-2019 University of Cambridge.
|
Copyright (c) 1997-2019 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2COMPAT(3) Library Functions Manual PCRE2COMPAT(3)
|
PCRE2COMPAT(3) Library Functions Manual PCRE2COMPAT(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -4887,8 +4890,8 @@ REVISION
|
||||||
Last updated: 12 February 2019
|
Last updated: 12 February 2019
|
||||||
Copyright (c) 1997-2019 University of Cambridge.
|
Copyright (c) 1997-2019 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2JIT(3) Library Functions Manual PCRE2JIT(3)
|
PCRE2JIT(3) Library Functions Manual PCRE2JIT(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -5287,8 +5290,8 @@ REVISION
|
||||||
Last updated: 16 October 2018
|
Last updated: 16 October 2018
|
||||||
Copyright (c) 1997-2018 University of Cambridge.
|
Copyright (c) 1997-2018 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2LIMITS(3) Library Functions Manual PCRE2LIMITS(3)
|
PCRE2LIMITS(3) Library Functions Manual PCRE2LIMITS(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -5357,8 +5360,8 @@ REVISION
|
||||||
Last updated: 02 February 2019
|
Last updated: 02 February 2019
|
||||||
Copyright (c) 1997-2019 University of Cambridge.
|
Copyright (c) 1997-2019 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2MATCHING(3) Library Functions Manual PCRE2MATCHING(3)
|
PCRE2MATCHING(3) Library Functions Manual PCRE2MATCHING(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -5578,8 +5581,8 @@ REVISION
|
||||||
Last updated: 10 October 2018
|
Last updated: 10 October 2018
|
||||||
Copyright (c) 1997-2018 University of Cambridge.
|
Copyright (c) 1997-2018 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2PARTIAL(3) Library Functions Manual PCRE2PARTIAL(3)
|
PCRE2PARTIAL(3) Library Functions Manual PCRE2PARTIAL(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -6018,8 +6021,8 @@ REVISION
|
||||||
Last updated: 22 December 2014
|
Last updated: 22 December 2014
|
||||||
Copyright (c) 1997-2014 University of Cambridge.
|
Copyright (c) 1997-2014 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2PATTERN(3) Library Functions Manual PCRE2PATTERN(3)
|
PCRE2PATTERN(3) Library Functions Manual PCRE2PATTERN(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -9362,8 +9365,8 @@ REVISION
|
||||||
Last updated: 12 February 2019
|
Last updated: 12 February 2019
|
||||||
Copyright (c) 1997-2019 University of Cambridge.
|
Copyright (c) 1997-2019 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2PERFORM(3) Library Functions Manual PCRE2PERFORM(3)
|
PCRE2PERFORM(3) Library Functions Manual PCRE2PERFORM(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -9597,8 +9600,8 @@ REVISION
|
||||||
Last updated: 03 February 2019
|
Last updated: 03 February 2019
|
||||||
Copyright (c) 1997-2019 University of Cambridge.
|
Copyright (c) 1997-2019 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2POSIX(3) Library Functions Manual PCRE2POSIX(3)
|
PCRE2POSIX(3) Library Functions Manual PCRE2POSIX(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -9927,8 +9930,8 @@ REVISION
|
||||||
Last updated: 30 January 2019
|
Last updated: 30 January 2019
|
||||||
Copyright (c) 1997-2019 University of Cambridge.
|
Copyright (c) 1997-2019 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2SAMPLE(3) Library Functions Manual PCRE2SAMPLE(3)
|
PCRE2SAMPLE(3) Library Functions Manual PCRE2SAMPLE(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -10206,8 +10209,8 @@ REVISION
|
||||||
Last updated: 27 June 2018
|
Last updated: 27 June 2018
|
||||||
Copyright (c) 1997-2018 University of Cambridge.
|
Copyright (c) 1997-2018 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2SYNTAX(3) Library Functions Manual PCRE2SYNTAX(3)
|
PCRE2SYNTAX(3) Library Functions Manual PCRE2SYNTAX(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -10707,8 +10710,8 @@ REVISION
|
||||||
Last updated: 11 February 2019
|
Last updated: 11 February 2019
|
||||||
Copyright (c) 1997-2019 University of Cambridge.
|
Copyright (c) 1997-2019 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
PCRE2UNICODE(3) Library Functions Manual PCRE2UNICODE(3)
|
PCRE2UNICODE(3) Library Functions Manual PCRE2UNICODE(3)
|
||||||
|
|
||||||
|
|
||||||
|
@ -11079,5 +11082,5 @@ REVISION
|
||||||
Last updated: 03 February 2019
|
Last updated: 03 February 2019
|
||||||
Copyright (c) 1997-2019 University of Cambridge.
|
Copyright (c) 1997-2019 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -75,7 +75,7 @@ PCRE2_UTF, PCRE2_UCP and related options.
|
||||||
.P
|
.P
|
||||||
Additional options may be set in the compile context via the
|
Additional options may be set in the compile context via the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
\fBpcre2_set_compile_extra_options\fP
|
\fBpcre2_set_compile_extra_options\fP
|
||||||
.\"
|
.\"
|
||||||
function.
|
function.
|
||||||
.P
|
.P
|
||||||
|
|
|
@ -40,7 +40,7 @@ characters. The options are:
|
||||||
.sp
|
.sp
|
||||||
PCRE2_ANCHORED Match only at the first position
|
PCRE2_ANCHORED Match only at the first position
|
||||||
PCRE2_COPY_MATCHED_SUBJECT
|
PCRE2_COPY_MATCHED_SUBJECT
|
||||||
On success, make a private subject copy
|
On success, make a private subject copy
|
||||||
PCRE2_ENDANCHORED Pattern can match only at end of subject
|
PCRE2_ENDANCHORED Pattern can match only at end of subject
|
||||||
PCRE2_NOTBOL Subject is not the beginning of a line
|
PCRE2_NOTBOL Subject is not the beginning of a line
|
||||||
PCRE2_NOTEOL Subject is not the end of a line
|
PCRE2_NOTEOL Subject is not the end of a line
|
||||||
|
|
|
@ -49,7 +49,7 @@ terminated by a binary zero code unit. The options are:
|
||||||
.sp
|
.sp
|
||||||
PCRE2_ANCHORED Match only at the first position
|
PCRE2_ANCHORED Match only at the first position
|
||||||
PCRE2_COPY_MATCHED_SUBJECT
|
PCRE2_COPY_MATCHED_SUBJECT
|
||||||
On success, make a private subject copy
|
On success, make a private subject copy
|
||||||
PCRE2_ENDANCHORED Pattern can match only at end of subject
|
PCRE2_ENDANCHORED Pattern can match only at end of subject
|
||||||
PCRE2_NOTBOL Subject string is not the beginning of a line
|
PCRE2_NOTBOL Subject string is not the beginning of a line
|
||||||
PCRE2_NOTEOL Subject string is not the end of a line
|
PCRE2_NOTEOL Subject string is not the end of a line
|
||||||
|
|
|
@ -18,7 +18,7 @@ If \fImatch_data\fP is NULL, this function does nothing. Otherwise,
|
||||||
using the memory freeing function from the general context or compiled pattern
|
using the memory freeing function from the general context or compiled pattern
|
||||||
with which it was created, or \fBfree()\fP if that was not set.
|
with which it was created, or \fBfree()\fP if that was not set.
|
||||||
.P
|
.P
|
||||||
If the PCRE2_COPY_MATCHED_SUBJECT was used for a successful match using this
|
If the PCRE2_COPY_MATCHED_SUBJECT was used for a successful match using this
|
||||||
match data block, the copy of the subject that was remembered with the block is
|
match data block, the copy of the subject that was remembered with the block is
|
||||||
also freed.
|
also freed.
|
||||||
.P
|
.P
|
||||||
|
|
|
@ -23,7 +23,7 @@ options are:
|
||||||
in UTF-8 and UTF-32 modes
|
in UTF-8 and UTF-32 modes
|
||||||
.\" JOIN
|
.\" JOIN
|
||||||
PCRE2_EXTRA_ALT_BSUX Extended alternate \eu, \eU, and \ex
|
PCRE2_EXTRA_ALT_BSUX Extended alternate \eu, \eU, and \ex
|
||||||
handling
|
handling
|
||||||
.\" JOIN
|
.\" JOIN
|
||||||
PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL Treat all invalid escapes as
|
PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL Treat all invalid escapes as
|
||||||
a literal following character
|
a literal following character
|
||||||
|
|
|
@ -247,7 +247,7 @@ document for an overview of all the PCRE2 documentation.
|
||||||
.sp
|
.sp
|
||||||
.B const unsigned char *pcre2_maketables(pcre2_general_context *\fIgcontext\fP);
|
.B const unsigned char *pcre2_maketables(pcre2_general_context *\fIgcontext\fP);
|
||||||
.sp
|
.sp
|
||||||
.B int pcre2_pattern_info(const pcre2_code *\fIcode\fP, uint32_t \fIwhat\fP,
|
.B int pcre2_pattern_info(const pcre2_code *\fIcode\fP, uint32_t \fIwhat\fP,
|
||||||
.B " void *\fIwhere\fP);"
|
.B " void *\fIwhere\fP);"
|
||||||
.sp
|
.sp
|
||||||
.B int pcre2_callout_enumerate(const pcre2_code *\fIcode\fP,
|
.B int pcre2_callout_enumerate(const pcre2_code *\fIcode\fP,
|
||||||
|
@ -1244,7 +1244,7 @@ until after all operations on the
|
||||||
.\" </a>
|
.\" </a>
|
||||||
match data block
|
match data block
|
||||||
.\"
|
.\"
|
||||||
have taken place, unless, in the case of the subject string, you have used the
|
have taken place, unless, in the case of the subject string, you have used the
|
||||||
PCRE2_COPY_MATCHED_SUBJECT option, which is described in the section entitled
|
PCRE2_COPY_MATCHED_SUBJECT option, which is described in the section entitled
|
||||||
"Option bits for \fBpcre2_match()\fP"
|
"Option bits for \fBpcre2_match()\fP"
|
||||||
.\" HTML <a href="#matchoptions>">
|
.\" HTML <a href="#matchoptions>">
|
||||||
|
@ -1375,8 +1375,8 @@ the PCRE2_EXTRA_ALT_BSUX extra option (see "Extra compile options"
|
||||||
.\" </a>
|
.\" </a>
|
||||||
below).
|
below).
|
||||||
.\"
|
.\"
|
||||||
Note that this alternative escape handling applies only to patterns. Neither of
|
Note that this alternative escape handling applies only to patterns. Neither of
|
||||||
these options affects the processing of replacement strings passed to
|
these options affects the processing of replacement strings passed to
|
||||||
\fBpcre2_substitute()\fP.
|
\fBpcre2_substitute()\fP.
|
||||||
.sp
|
.sp
|
||||||
PCRE2_ALT_CIRCUMFLEX
|
PCRE2_ALT_CIRCUMFLEX
|
||||||
|
@ -1832,10 +1832,10 @@ characters if the matching function is called with PCRE2_NO_UTF_CHECK set.
|
||||||
.sp
|
.sp
|
||||||
PCRE2_EXTRA_ALT_BSUX
|
PCRE2_EXTRA_ALT_BSUX
|
||||||
.sp
|
.sp
|
||||||
The original option PCRE2_ALT_BSUX causes PCRE2 to process \eU, \eu, and \ex in
|
The original option PCRE2_ALT_BSUX causes PCRE2 to process \eU, \eu, and \ex in
|
||||||
the way that ECMAscript (aka JavaScript) does. Additional functionality was
|
the way that ECMAscript (aka JavaScript) does. Additional functionality was
|
||||||
defined by ECMAscript 6; setting PCRE2_EXTRA_ALT_BSUX has the effect of
|
defined by ECMAscript 6; setting PCRE2_EXTRA_ALT_BSUX has the effect of
|
||||||
PCRE2_ALT_BSUX, but in addition it recognizes \eu{hhh..} as a hexadecimal
|
PCRE2_ALT_BSUX, but in addition it recognizes \eu{hhh..} as a hexadecimal
|
||||||
character code, where hhh.. is any number of hexadecimal digits.
|
character code, where hhh.. is any number of hexadecimal digits.
|
||||||
.sp
|
.sp
|
||||||
PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL
|
PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL
|
||||||
|
@ -1852,7 +1852,7 @@ If the PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL extra option is passed to
|
||||||
\fBpcre2_compile()\fP, all unrecognized or malformed escape sequences are
|
\fBpcre2_compile()\fP, all unrecognized or malformed escape sequences are
|
||||||
treated as single-character escapes. For example, \ej is a literal "j" and
|
treated as single-character escapes. For example, \ej is a literal "j" and
|
||||||
\ex{2z} is treated as the literal string "x{2z}". Setting this option means
|
\ex{2z} is treated as the literal string "x{2z}". Setting this option means
|
||||||
that typos in patterns may go undetected and have unexpected results. Also note
|
that typos in patterns may go undetected and have unexpected results. Also note
|
||||||
that a sequence such as [\eN{] is interpreted as a malformed attempt at
|
that a sequence such as [\eN{] is interpreted as a malformed attempt at
|
||||||
[\eN{...}] and so is treated as [N{] whereas [\eN] gives an error because an
|
[\eN{...}] and so is treated as [N{] whereas [\eN] gives an error because an
|
||||||
unqualified \eN is a valid escape sequence but is not supported in a character
|
unqualified \eN is a valid escape sequence but is not supported in a character
|
||||||
|
@ -1860,9 +1860,9 @@ class. To reiterate: this is a dangerous option. Use with great care.
|
||||||
.sp
|
.sp
|
||||||
PCRE2_EXTRA_ESCAPED_CR_IS_LF
|
PCRE2_EXTRA_ESCAPED_CR_IS_LF
|
||||||
.sp
|
.sp
|
||||||
There are some legacy applications where the escape sequence \er in a pattern
|
There are some legacy applications where the escape sequence \er in a pattern
|
||||||
is expected to match a newline. If this option is set, \er in a pattern is
|
is expected to match a newline. If this option is set, \er in a pattern is
|
||||||
converted to \en so that it matches a LF (linefeed) instead of a CR (carriage
|
converted to \en so that it matches a LF (linefeed) instead of a CR (carriage
|
||||||
return) character. The option does not affect a literal CR in the pattern, nor
|
return) character. The option does not affect a literal CR in the pattern, nor
|
||||||
does it affect CR specified as an explicit code point such as \ex{0D}.
|
does it affect CR specified as an explicit code point such as \ex{0D}.
|
||||||
.sp
|
.sp
|
||||||
|
@ -2547,7 +2547,7 @@ the use of .* with PCRE2_DOTALL, not by starting the pattern with ^ or \eA.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
The unused bits of the \fIoptions\fP argument for \fBpcre2_match()\fP must be
|
The unused bits of the \fIoptions\fP argument for \fBpcre2_match()\fP must be
|
||||||
zero. The only bits that may be set are PCRE2_ANCHORED,
|
zero. The only bits that may be set are PCRE2_ANCHORED,
|
||||||
PCRE2_COPY_MATCHED_SUBJECT, PCRE2_ENDANCHORED, PCRE2_NOTBOL, PCRE2_NOTEOL,
|
PCRE2_COPY_MATCHED_SUBJECT, PCRE2_ENDANCHORED, PCRE2_NOTBOL, PCRE2_NOTEOL,
|
||||||
PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_JIT, PCRE2_NO_UTF_CHECK,
|
PCRE2_NOTEMPTY, PCRE2_NOTEMPTY_ATSTART, PCRE2_NO_JIT, PCRE2_NO_UTF_CHECK,
|
||||||
PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Their action is described below.
|
PCRE2_PARTIAL_HARD, and PCRE2_PARTIAL_SOFT. Their action is described below.
|
||||||
|
@ -2567,8 +2567,8 @@ matching.
|
||||||
.sp
|
.sp
|
||||||
PCRE2_COPY_MATCHED_SUBJECT
|
PCRE2_COPY_MATCHED_SUBJECT
|
||||||
.sp
|
.sp
|
||||||
By default, a pointer to the subject is remembered in the match data block so
|
By default, a pointer to the subject is remembered in the match data block so
|
||||||
that, after a successful match, it can be referenced by the substring
|
that, after a successful match, it can be referenced by the substring
|
||||||
extraction functions. This means that the subject's memory must not be freed
|
extraction functions. This means that the subject's memory must not be freed
|
||||||
until all such operations are complete. For some applications where the
|
until all such operations are complete. For some applications where the
|
||||||
lifetime of the subject string is not guaranteed, it may be necessary to make a
|
lifetime of the subject string is not guaranteed, it may be necessary to make a
|
||||||
|
@ -2868,8 +2868,8 @@ undefined.
|
||||||
.P
|
.P
|
||||||
After a successful match, a partial match (PCRE2_ERROR_PARTIAL), or a failure
|
After a successful match, a partial match (PCRE2_ERROR_PARTIAL), or a failure
|
||||||
to match (PCRE2_ERROR_NOMATCH), a mark name may be available. The function
|
to match (PCRE2_ERROR_NOMATCH), a mark name may be available. The function
|
||||||
\fBpcre2_get_mark()\fP can be called to access this name, which can be
|
\fBpcre2_get_mark()\fP can be called to access this name, which can be
|
||||||
specified in the pattern by any of the backtracking control verbs, not just
|
specified in the pattern by any of the backtracking control verbs, not just
|
||||||
(*MARK). The same function applies to all the verbs. It returns a pointer to
|
(*MARK). The same function applies to all the verbs. It returns a pointer to
|
||||||
the zero-terminated name, which is within the compiled pattern. If no name is
|
the zero-terminated name, which is within the compiled pattern. If no name is
|
||||||
available, NULL is returned. The length of the name (excluding the terminating
|
available, NULL is returned. The length of the name (excluding the terminating
|
||||||
|
@ -3016,7 +3016,7 @@ The backtracking match limit was reached.
|
||||||
If a pattern contains many nested backtracking points, heap memory is used to
|
If a pattern contains many nested backtracking points, heap memory is used to
|
||||||
remember them. This error is given when the memory allocation function (default
|
remember them. This error is given when the memory allocation function (default
|
||||||
or custom) fails. Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given
|
or custom) fails. Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given
|
||||||
if the amount of memory needed exceeds the heap limit. PCRE2_ERROR_NOMEMORY is
|
if the amount of memory needed exceeds the heap limit. PCRE2_ERROR_NOMEMORY is
|
||||||
also returned if PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
|
also returned if PCRE2_COPY_MATCHED_SUBJECT is set and memory allocation fails.
|
||||||
.sp
|
.sp
|
||||||
PCRE2_ERROR_NULL
|
PCRE2_ERROR_NULL
|
||||||
|
@ -3407,7 +3407,7 @@ capture groups and letters within \eQ...\eE quoted sequences.
|
||||||
.P
|
.P
|
||||||
Note that case forcing sequences such as \eU...\eE do not nest. For example,
|
Note that case forcing sequences such as \eU...\eE do not nest. For example,
|
||||||
the result of processing "\eUaa\eLBB\eEcc\eE" is "AAbbcc"; the final \eE has no
|
the result of processing "\eUaa\eLBB\eEcc\eE" is "AAbbcc"; the final \eE has no
|
||||||
effect. Note also that the PCRE2_ALT_BSUX and PCRE2_EXTRA_ALT_BSUX options do
|
effect. Note also that the PCRE2_ALT_BSUX and PCRE2_EXTRA_ALT_BSUX options do
|
||||||
not apply to not apply to replacement strings.
|
not apply to not apply to replacement strings.
|
||||||
.P
|
.P
|
||||||
The second effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more
|
The second effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more
|
||||||
|
@ -3439,7 +3439,7 @@ The PCRE2_SUBSTITUTE_UNSET_EMPTY option does not affect these extended
|
||||||
substitutions. However, PCRE2_SUBSTITUTE_UNKNOWN_UNSET does cause unknown
|
substitutions. However, PCRE2_SUBSTITUTE_UNKNOWN_UNSET does cause unknown
|
||||||
groups in the extended syntax forms to be treated as unset.
|
groups in the extended syntax forms to be treated as unset.
|
||||||
.P
|
.P
|
||||||
If successful, \fBpcre2_substitute()\fP returns the number of successful
|
If successful, \fBpcre2_substitute()\fP returns the number of successful
|
||||||
matches. This may be zero if no matches were found, and is never greater than 1
|
matches. This may be zero if no matches were found, and is never greater than 1
|
||||||
unless PCRE2_SUBSTITUTE_GLOBAL is set.
|
unless PCRE2_SUBSTITUTE_GLOBAL is set.
|
||||||
.P
|
.P
|
||||||
|
@ -3487,8 +3487,8 @@ above).
|
||||||
.sp
|
.sp
|
||||||
The \fBpcre2_set_substitution_callout()\fP function can be used to specify a
|
The \fBpcre2_set_substitution_callout()\fP function can be used to specify a
|
||||||
callout function for \fBpcre2_substitute()\fP. This information is passed in
|
callout function for \fBpcre2_substitute()\fP. This information is passed in
|
||||||
a match context. The callout function is called after each substitution has
|
a match context. The callout function is called after each substitution has
|
||||||
been processed, but it can cause the replacement not to happen. The callout
|
been processed, but it can cause the replacement not to happen. The callout
|
||||||
function is not called for simulated substitutions that happen as a result of
|
function is not called for simulated substitutions that happen as a result of
|
||||||
the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option.
|
the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option.
|
||||||
.P
|
.P
|
||||||
|
@ -3497,10 +3497,10 @@ block structure, which contains the following fields, not necessarily in this
|
||||||
order:
|
order:
|
||||||
.sp
|
.sp
|
||||||
uint32_t \fIversion\fP;
|
uint32_t \fIversion\fP;
|
||||||
uint32_t \fIsubscount\fP;
|
uint32_t \fIsubscount\fP;
|
||||||
PCRE2_SPTR \fIinput\fP;
|
PCRE2_SPTR \fIinput\fP;
|
||||||
PCRE2_SPTR \fIoutput\fP;
|
PCRE2_SPTR \fIoutput\fP;
|
||||||
PCRE2_SIZE \fI*ovector\fP;
|
PCRE2_SIZE \fI*ovector\fP;
|
||||||
uint32_t \fIoveccount\fP;
|
uint32_t \fIoveccount\fP;
|
||||||
PCRE2_SIZE \fIoutput_offsets[2]\fP;
|
PCRE2_SIZE \fIoutput_offsets[2]\fP;
|
||||||
.sp
|
.sp
|
||||||
|
@ -3512,9 +3512,9 @@ The \fIsubscount\fP field is the number of the current match. It is 1 for the
|
||||||
first callout, 2 for the second, and so on. The \fIinput\fP and \fIoutput\fP
|
first callout, 2 for the second, and so on. The \fIinput\fP and \fIoutput\fP
|
||||||
pointers are copies of the values passed to \fBpcre2_substitute()\fP.
|
pointers are copies of the values passed to \fBpcre2_substitute()\fP.
|
||||||
.P
|
.P
|
||||||
The \fIovector\fP field points to the ovector, which contains the result of the
|
The \fIovector\fP field points to the ovector, which contains the result of the
|
||||||
most recent match. The \fIoveccount\fP field contains the number of pairs that
|
most recent match. The \fIoveccount\fP field contains the number of pairs that
|
||||||
are set in the ovector, and is always greater than zero.
|
are set in the ovector, and is always greater than zero.
|
||||||
.P
|
.P
|
||||||
The \fIoutput_offsets\fP vector contains the offsets of the replacement in the
|
The \fIoutput_offsets\fP vector contains the offsets of the replacement in the
|
||||||
output string. This has already been processed for dollar and (if requested)
|
output string. This has already been processed for dollar and (if requested)
|
||||||
|
|
|
@ -33,7 +33,7 @@ is described in the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
\fBpcre2api\fP
|
\fBpcre2api\fP
|
||||||
.\"
|
.\"
|
||||||
documentation; the rest of this document is concerned with callouts during
|
documentation; the rest of this document is concerned with callouts during
|
||||||
pattern matching.
|
pattern matching.
|
||||||
.P
|
.P
|
||||||
Within a regular expression, (?C<arg>) indicates a point at which the external
|
Within a regular expression, (?C<arg>) indicates a point at which the external
|
||||||
|
|
|
@ -778,8 +778,8 @@ only callouts with string arguments are useful.
|
||||||
.SS "Calling external programs or scripts"
|
.SS "Calling external programs or scripts"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
This facility can be independently disabled when \fBpcre2grep\fP is built. It
|
This facility can be independently disabled when \fBpcre2grep\fP is built. It
|
||||||
is supported for Windows, where a call to \fB_spawnvp()\fP is used, for VMS,
|
is supported for Windows, where a call to \fB_spawnvp()\fP is used, for VMS,
|
||||||
where \fBlib$spawn()\fP is used, and for any other Unix-like environment where
|
where \fBlib$spawn()\fP is used, and for any other Unix-like environment where
|
||||||
\fBfork()\fP and \fBexecv()\fP are available.
|
\fBfork()\fP and \fBexecv()\fP are available.
|
||||||
.P
|
.P
|
||||||
|
|
|
@ -390,12 +390,12 @@ two compile-time options. If PCRE2_ALT_BSUX is set, the sequence \ex followed
|
||||||
by { is not recognized. Only if \ex is followed by two hexadecimal digits is it
|
by { is not recognized. Only if \ex is followed by two hexadecimal digits is it
|
||||||
recognized as a character escape. Otherwise it is interpreted as a literal "x"
|
recognized as a character escape. Otherwise it is interpreted as a literal "x"
|
||||||
character. In this mode, support for code points greater than 256 is provided
|
character. In this mode, support for code points greater than 256 is provided
|
||||||
by \eu, which must be followed by four hexadecimal digits; otherwise it is
|
by \eu, which must be followed by four hexadecimal digits; otherwise it is
|
||||||
interpreted as a literal "u" character.
|
interpreted as a literal "u" character.
|
||||||
.P
|
.P
|
||||||
PCRE2_EXTRA_ALT_BSUX has the same effect as PCRE2_ALT_BSUX and, in addition,
|
PCRE2_EXTRA_ALT_BSUX has the same effect as PCRE2_ALT_BSUX and, in addition,
|
||||||
\eu{hhh..} is recognized as the character specified by hexadecimal code point.
|
\eu{hhh..} is recognized as the character specified by hexadecimal code point.
|
||||||
There may be any number of hexadecimal digits. This syntax is from ECMAScript
|
There may be any number of hexadecimal digits. This syntax is from ECMAScript
|
||||||
6.
|
6.
|
||||||
.P
|
.P
|
||||||
The \eN{U+hhh..} escape sequence is recognized only when the PCRE2_UTF option
|
The \eN{U+hhh..} escape sequence is recognized only when the PCRE2_UTF option
|
||||||
|
@ -1188,7 +1188,7 @@ character. If any other of these assertions appears in a character class, an
|
||||||
A word boundary is a position in the subject string where the current character
|
A word boundary is a position in the subject string where the current character
|
||||||
and the previous character do not both match \ew or \eW (i.e. one matches
|
and the previous character do not both match \ew or \eW (i.e. one matches
|
||||||
\ew and the other matches \eW), or the start or end of the string if the
|
\ew and the other matches \eW), or the start or end of the string if the
|
||||||
first or last character matches \ew, respectively. When PCRE2 is built with
|
first or last character matches \ew, respectively. When PCRE2 is built with
|
||||||
Unicode support, the meanings of \ew and \eW can be changed by setting the
|
Unicode support, the meanings of \ew and \eW can be changed by setting the
|
||||||
PCRE2_UCP option. When this is done, it also affects \eb and \eB. Neither PCRE2
|
PCRE2_UCP option. When this is done, it also affects \eb and \eB. Neither PCRE2
|
||||||
nor Perl has a separate "start of word" or "end of word" metasequence. However,
|
nor Perl has a separate "start of word" or "end of word" metasequence. However,
|
||||||
|
|
|
@ -29,12 +29,12 @@ and 32-bit libraries. See the
|
||||||
\fBpcre2api\fP
|
\fBpcre2api\fP
|
||||||
.\"
|
.\"
|
||||||
documentation for a description of PCRE2's native API, which contains much
|
documentation for a description of PCRE2's native API, which contains much
|
||||||
additional functionality.
|
additional functionality.
|
||||||
.P
|
.P
|
||||||
The functions described here are wrapper functions that ultimately call the
|
The functions described here are wrapper functions that ultimately call the
|
||||||
PCRE2 native API. Their prototypes are defined in the \fBpcre2posix.h\fP header
|
PCRE2 native API. Their prototypes are defined in the \fBpcre2posix.h\fP header
|
||||||
file, and they all have unique names starting with \fBpcre2_\fP. However, the
|
file, and they all have unique names starting with \fBpcre2_\fP. However, the
|
||||||
\fBpcre2posix.h\fP header also contains macro definitions that convert the
|
\fBpcre2posix.h\fP header also contains macro definitions that convert the
|
||||||
standard POSIX names such \fBregcomp()\fP into \fBpcre2_regcomp()\fP etc. This
|
standard POSIX names such \fBregcomp()\fP into \fBpcre2_regcomp()\fP etc. This
|
||||||
means that a program can use the usual POSIX names without running the risk of
|
means that a program can use the usual POSIX names without running the risk of
|
||||||
accidentally linking with POSIX functions from a different library.
|
accidentally linking with POSIX functions from a different library.
|
||||||
|
@ -44,7 +44,7 @@ can be accessed by adding \fB-lpcre2-posix\fP to the command for linking an
|
||||||
application. Because the POSIX functions call the native ones, it is also
|
application. Because the POSIX functions call the native ones, it is also
|
||||||
necessary to add \fB-lpcre2-8\fP.
|
necessary to add \fB-lpcre2-8\fP.
|
||||||
.P
|
.P
|
||||||
Although they are not defined as protypes in \fBpcre2posix.h\fP, the library
|
Although they are not defined as protypes in \fBpcre2posix.h\fP, the library
|
||||||
does contain functions with the POSIX names \fBregcomp()\fP etc. These simply
|
does contain functions with the POSIX names \fBregcomp()\fP etc. These simply
|
||||||
pass their arguments to the PCRE2 functions. These functions are provided for
|
pass their arguments to the PCRE2 functions. These functions are provided for
|
||||||
backwards compatibility with earlier versions of PCRE2, so that existing
|
backwards compatibility with earlier versions of PCRE2, so that existing
|
||||||
|
|
|
@ -22,7 +22,7 @@ documentation. This document contains a quick-reference summary of the syntax.
|
||||||
.SH "ESCAPED CHARACTERS"
|
.SH "ESCAPED CHARACTERS"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
This table applies to ASCII and Unicode environments. An unrecognized escape
|
This table applies to ASCII and Unicode environments. An unrecognized escape
|
||||||
sequence causes an error.
|
sequence causes an error.
|
||||||
.sp
|
.sp
|
||||||
\ea alarm, that is, the BEL character (hex 07)
|
\ea alarm, that is, the BEL character (hex 07)
|
||||||
|
@ -49,7 +49,7 @@ following are also recognized:
|
||||||
When \ex is not followed by {, from zero to two hexadecimal digits are read,
|
When \ex is not followed by {, from zero to two hexadecimal digits are read,
|
||||||
but in ALT_BSUX mode \ex must be followed by two hexadecimal digits to be
|
but in ALT_BSUX mode \ex must be followed by two hexadecimal digits to be
|
||||||
recognized as a hexadecimal escape; otherwise it matches a literal "x".
|
recognized as a hexadecimal escape; otherwise it matches a literal "x".
|
||||||
Likewise, if \eu (in ALT_BSUX mode) is not followed by four hexadecimal digits
|
Likewise, if \eu (in ALT_BSUX mode) is not followed by four hexadecimal digits
|
||||||
or (in EXTRA_ALT_BSUX mode) a sequence of hex digits in curly brackets, it
|
or (in EXTRA_ALT_BSUX mode) a sequence of hex digits in curly brackets, it
|
||||||
matches a literal "u".
|
matches a literal "u".
|
||||||
.P
|
.P
|
||||||
|
|
|
@ -565,10 +565,10 @@ for a description of the effects of these options.
|
||||||
/s dotall set PCRE2_DOTALL
|
/s dotall set PCRE2_DOTALL
|
||||||
dupnames set PCRE2_DUPNAMES
|
dupnames set PCRE2_DUPNAMES
|
||||||
endanchored set PCRE2_ENDANCHORED
|
endanchored set PCRE2_ENDANCHORED
|
||||||
escaped_cr_is_lf set PCRE2_EXTRA_ESCAPED_CR_IS_LF
|
escaped_cr_is_lf set PCRE2_EXTRA_ESCAPED_CR_IS_LF
|
||||||
/x extended set PCRE2_EXTENDED
|
/x extended set PCRE2_EXTENDED
|
||||||
/xx extended_more set PCRE2_EXTENDED_MORE
|
/xx extended_more set PCRE2_EXTENDED_MORE
|
||||||
extra_alt_bsux set PCRE2_EXTRA_ALT_BSUX
|
extra_alt_bsux set PCRE2_EXTRA_ALT_BSUX
|
||||||
firstline set PCRE2_FIRSTLINE
|
firstline set PCRE2_FIRSTLINE
|
||||||
literal set PCRE2_LITERAL
|
literal set PCRE2_LITERAL
|
||||||
match_line set PCRE2_EXTRA_MATCH_LINE
|
match_line set PCRE2_EXTRA_MATCH_LINE
|
||||||
|
@ -1005,7 +1005,7 @@ process.
|
||||||
aftertext show text after match
|
aftertext show text after match
|
||||||
allaftertext show text after captures
|
allaftertext show text after captures
|
||||||
allcaptures show all captures
|
allcaptures show all captures
|
||||||
allvector show the entire ovector
|
allvector show the entire ovector
|
||||||
allusedtext show all consulted text
|
allusedtext show all consulted text
|
||||||
altglobal alternative global matching
|
altglobal alternative global matching
|
||||||
/g global global matching
|
/g global global matching
|
||||||
|
@ -1013,9 +1013,9 @@ process.
|
||||||
mark show mark values
|
mark show mark values
|
||||||
replace=<string> specify a replacement string
|
replace=<string> specify a replacement string
|
||||||
startchar show starting character when relevant
|
startchar show starting character when relevant
|
||||||
substitute_callout use substitution callouts
|
substitute_callout use substitution callouts
|
||||||
substitute_extended use PCRE2_SUBSTITUTE_EXTENDED
|
substitute_extended use PCRE2_SUBSTITUTE_EXTENDED
|
||||||
substitute_skip=<n> skip substitution number n
|
substitute_skip=<n> skip substitution number n
|
||||||
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||||
substitute_stop=<n> skip substitution number n and greater
|
substitute_stop=<n> skip substitution number n and greater
|
||||||
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||||
|
@ -1160,7 +1160,7 @@ pattern.
|
||||||
aftertext show text after match
|
aftertext show text after match
|
||||||
allaftertext show text after captures
|
allaftertext show text after captures
|
||||||
allcaptures show all captures
|
allcaptures show all captures
|
||||||
allvector show the entire ovector
|
allvector show the entire ovector
|
||||||
allusedtext show all consulted text (non-JIT only)
|
allusedtext show all consulted text (non-JIT only)
|
||||||
altglobal alternative global matching
|
altglobal alternative global matching
|
||||||
callout_capture show captures at callout time
|
callout_capture show captures at callout time
|
||||||
|
@ -1190,9 +1190,9 @@ pattern.
|
||||||
replace=<string> specify a replacement string
|
replace=<string> specify a replacement string
|
||||||
startchar show startchar when relevant
|
startchar show startchar when relevant
|
||||||
startoffset=<n> same as offset=<n>
|
startoffset=<n> same as offset=<n>
|
||||||
substitute_callout use substitution callouts
|
substitute_callout use substitution callouts
|
||||||
substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED
|
substitute_extedded use PCRE2_SUBSTITUTE_EXTENDED
|
||||||
substitute_skip=<n> skip substitution number n
|
substitute_skip=<n> skip substitution number n
|
||||||
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
substitute_overflow_length use PCRE2_SUBSTITUTE_OVERFLOW_LENGTH
|
||||||
substitute_stop=<n> skip substitution number n and greater
|
substitute_stop=<n> skip substitution number n and greater
|
||||||
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
substitute_unknown_unset use PCRE2_SUBSTITUTE_UNKNOWN_UNSET
|
||||||
|
@ -1273,9 +1273,9 @@ result, and also for DFA matching, provides a means of checking that there are
|
||||||
no unexpected modifications to ovector fields. Before each match attempt, the
|
no unexpected modifications to ovector fields. Before each match attempt, the
|
||||||
ovector is filled with a special value, and if this is found in both elements
|
ovector is filled with a special value, and if this is found in both elements
|
||||||
of a capturing pair, "<unchanged>" is output. After a successful match, this
|
of a capturing pair, "<unchanged>" is output. After a successful match, this
|
||||||
applies to all groups after the maximum capture group for the pattern. In other
|
applies to all groups after the maximum capture group for the pattern. In other
|
||||||
cases it applies to the entire ovector. After a partial match, the first two
|
cases it applies to the entire ovector. After a partial match, the first two
|
||||||
elements are the only ones that should be set. After a DFA match, the amount of
|
elements are the only ones that should be set. After a DFA match, the amount of
|
||||||
ovector that is used depends on the number of matches that were found.
|
ovector that is used depends on the number of matches that were found.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
@ -1288,13 +1288,13 @@ controlled by various modifiers listed above whose names begin with
|
||||||
\fBcallout_\fP. Details are given in the section entitled "Callouts"
|
\fBcallout_\fP. Details are given in the section entitled "Callouts"
|
||||||
.\" HTML <a href="#callouts">
|
.\" HTML <a href="#callouts">
|
||||||
.\" </a>
|
.\" </a>
|
||||||
below.
|
below.
|
||||||
.\"
|
.\"
|
||||||
Testing callouts from \fBpcre2_substitute()\fP is decribed separately in
|
Testing callouts from \fBpcre2_substitute()\fP is decribed separately in
|
||||||
"Testing the substitution function"
|
"Testing the substitution function"
|
||||||
.\" HTML <a href="#substitution">
|
.\" HTML <a href="#substitution">
|
||||||
.\" </a>
|
.\" </a>
|
||||||
below.
|
below.
|
||||||
.\"
|
.\"
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
@ -1416,14 +1416,14 @@ matching provokes an error return ("bad option value") from
|
||||||
.SS "Testing substitute callouts"
|
.SS "Testing substitute callouts"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
If the \fBsubstitute_callout\fP modifier is set, a substitution callout
|
If the \fBsubstitute_callout\fP modifier is set, a substitution callout
|
||||||
function is set up. When it is called (after each substitution), details of the
|
function is set up. When it is called (after each substitution), details of the
|
||||||
the input and output strings are output. For example:
|
the input and output strings are output. For example:
|
||||||
.sp
|
.sp
|
||||||
/abc/g,replace=<$0>,substitute_callout
|
/abc/g,replace=<$0>,substitute_callout
|
||||||
abcdefabcpqr
|
abcdefabcpqr
|
||||||
1(1) Old 0 3 "abc" New 0 5 "<abc>"
|
1(1) Old 0 3 "abc" New 0 5 "<abc>"
|
||||||
2(1) Old 6 9 "abc" New 8 13 "<abc>"
|
2(1) Old 6 9 "abc" New 8 13 "<abc>"
|
||||||
2: <abc>def<abc>pqr
|
2: <abc>def<abc>pqr
|
||||||
.sp
|
.sp
|
||||||
The first number on each callout line is the count of matches. The
|
The first number on each callout line is the count of matches. The
|
||||||
|
@ -1432,11 +1432,11 @@ is, one more than the number of capturing groups that were set). Then are
|
||||||
listed the offsets of the old substring, its contents, and the same for the
|
listed the offsets of the old substring, its contents, and the same for the
|
||||||
replacement.
|
replacement.
|
||||||
.P
|
.P
|
||||||
By default, the substitution callout function returns zero, which accepts the
|
By default, the substitution callout function returns zero, which accepts the
|
||||||
replacement and causes matching to continue if /g was used. Two further
|
replacement and causes matching to continue if /g was used. Two further
|
||||||
modifiers can be used to test other return values. If \fBsubstitute_skip\fP is
|
modifiers can be used to test other return values. If \fBsubstitute_skip\fP is
|
||||||
set to a value greater than zero the callout function returns +1 for the match
|
set to a value greater than zero the callout function returns +1 for the match
|
||||||
of that number, and similarly \fBsubstitute_stop\fP returns -1. These cause the
|
of that number, and similarly \fBsubstitute_stop\fP returns -1. These cause the
|
||||||
replacement to be rejected, and -1 causes no further matching to take place. If
|
replacement to be rejected, and -1 causes no further matching to take place. If
|
||||||
either of them are set, \fBsubstitute_callout\fP is assumed. For example:
|
either of them are set, \fBsubstitute_callout\fP is assumed. For example:
|
||||||
.sp
|
.sp
|
||||||
|
@ -1449,7 +1449,7 @@ either of them are set, \fBsubstitute_callout\fP is assumed. For example:
|
||||||
1(1) Old 0 3 "abc" New 0 5 "<abc> STOPPED"
|
1(1) Old 0 3 "abc" New 0 5 "<abc> STOPPED"
|
||||||
1: abcdefabcpqr
|
1: abcdefabcpqr
|
||||||
.sp
|
.sp
|
||||||
If both are set for the same number, stop takes precedence. Only a single skip
|
If both are set for the same number, stop takes precedence. Only a single skip
|
||||||
or stop is supported, which is sufficient for testing that the feature works.
|
or stop is supported, which is sufficient for testing that the feature works.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
|
|
@ -72,7 +72,7 @@ characters (see the description of \eC in the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
\fBpcre2pattern\fP
|
\fBpcre2pattern\fP
|
||||||
.\"
|
.\"
|
||||||
documentation). For this reason, there is a build-time option that disables
|
documentation). For this reason, there is a build-time option that disables
|
||||||
support for \eC completely. There is also a less draconian compile-time option
|
support for \eC completely. There is also a less draconian compile-time option
|
||||||
for locking out the use of \eC when a pattern is compiled.
|
for locking out the use of \eC when a pattern is compiled.
|
||||||
.P
|
.P
|
||||||
|
@ -135,13 +135,13 @@ characters that are all from the same Unicode script. However, because some
|
||||||
scripts are commonly used together, and because some diacritical and other
|
scripts are commonly used together, and because some diacritical and other
|
||||||
marks are used with multiple scripts, it is not that simple.
|
marks are used with multiple scripts, it is not that simple.
|
||||||
.P
|
.P
|
||||||
Every Unicode character has a Script property, mostly with a value
|
Every Unicode character has a Script property, mostly with a value
|
||||||
corresponding to the name of a script, such as Latin, Greek, or Cyrillic. There
|
corresponding to the name of a script, such as Latin, Greek, or Cyrillic. There
|
||||||
are also three special values:
|
are also three special values:
|
||||||
.P
|
.P
|
||||||
"Unknown" is used for code points that have not been assigned, and also for the
|
"Unknown" is used for code points that have not been assigned, and also for the
|
||||||
surrogate code points. In the PCRE2 32-bit library, characters whose code
|
surrogate code points. In the PCRE2 32-bit library, characters whose code
|
||||||
points are greater than the Unicode maximum (U+10FFFF), which are accessible
|
points are greater than the Unicode maximum (U+10FFFF), which are accessible
|
||||||
only in non-UTF mode, are assigned the Unknown script.
|
only in non-UTF mode, are assigned the Unknown script.
|
||||||
.P
|
.P
|
||||||
"Common" is used for characters that are used with many scripts. These include
|
"Common" is used for characters that are used with many scripts. These include
|
||||||
|
@ -152,19 +152,19 @@ digits 0 to 9.
|
||||||
previous character. These are considered to take on the script of the character
|
previous character. These are considered to take on the script of the character
|
||||||
that they modify.
|
that they modify.
|
||||||
.P
|
.P
|
||||||
Some Inherited characters are used with many scripts, but many of them are only
|
Some Inherited characters are used with many scripts, but many of them are only
|
||||||
normally used with a small number of scripts. For example, U+102E0 (Coptic
|
normally used with a small number of scripts. For example, U+102E0 (Coptic
|
||||||
Epact thousands mark) is used only with Arabic and Coptic. In order to make it
|
Epact thousands mark) is used only with Arabic and Coptic. In order to make it
|
||||||
possible to check this, a Unicode property called Script Extension exists. Its
|
possible to check this, a Unicode property called Script Extension exists. Its
|
||||||
value is a list of scripts that apply to the character. For the majority of
|
value is a list of scripts that apply to the character. For the majority of
|
||||||
characters, the list contains just one script, the same one as the Script
|
characters, the list contains just one script, the same one as the Script
|
||||||
property. However, for characters such as U+102E0 more than one Script is
|
property. However, for characters such as U+102E0 more than one Script is
|
||||||
listed. There are also some Common characters that have a single, non-Common
|
listed. There are also some Common characters that have a single, non-Common
|
||||||
script in their Script Extension list.
|
script in their Script Extension list.
|
||||||
.P
|
.P
|
||||||
The next section describes the basic rules for deciding whether a given string
|
The next section describes the basic rules for deciding whether a given string
|
||||||
of characters is a script run. Note, however, that there are some special cases
|
of characters is a script run. Note, however, that there are some special cases
|
||||||
involving the Chinese Han script, and an additional constraint for decimal
|
involving the Chinese Han script, and an additional constraint for decimal
|
||||||
digits. These are covered in subsequent sections.
|
digits. These are covered in subsequent sections.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
@ -185,16 +185,16 @@ all the sets of scripts must not be empty.
|
||||||
.P
|
.P
|
||||||
A simple example is an Internet name such as "google.com". The letters are all
|
A simple example is an Internet name such as "google.com". The letters are all
|
||||||
in the Latin script, and the dot is Common, so this string is a script run.
|
in the Latin script, and the dot is Common, so this string is a script run.
|
||||||
However, the Cyrillic letter "o" looks exactly the same as the Latin "o"; a
|
However, the Cyrillic letter "o" looks exactly the same as the Latin "o"; a
|
||||||
string that looks the same, but with Cyrillic "o"s is not a script run.
|
string that looks the same, but with Cyrillic "o"s is not a script run.
|
||||||
.P
|
.P
|
||||||
More interesting examples involve characters with more than one script in their
|
More interesting examples involve characters with more than one script in their
|
||||||
Script Extension. Consider the following characters:
|
Script Extension. Consider the following characters:
|
||||||
.sp
|
.sp
|
||||||
U+060C Arabic comma
|
U+060C Arabic comma
|
||||||
U+06D4 Arabic full stop
|
U+06D4 Arabic full stop
|
||||||
.sp
|
.sp
|
||||||
The first has the Script Extension list Arabic, Hanifi Rohingya, Syriac, and
|
The first has the Script Extension list Arabic, Hanifi Rohingya, Syriac, and
|
||||||
Thaana; the second has just Arabic and Hanifi Rohingya. Both of them could
|
Thaana; the second has just Arabic and Hanifi Rohingya. Both of them could
|
||||||
appear in script runs of either Arabic or Hanifi Rohingya. The first could also
|
appear in script runs of either Arabic or Hanifi Rohingya. The first could also
|
||||||
appear in Syriac or Thaana script runs, but the second could not.
|
appear in Syriac or Thaana script runs, but the second could not.
|
||||||
|
@ -202,9 +202,9 @@ appear in Syriac or Thaana script runs, but the second could not.
|
||||||
.
|
.
|
||||||
.SS "The Chinese Han script"
|
.SS "The Chinese Han script"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
The Chinese Han script is commonly used in conjunction with other scripts for
|
The Chinese Han script is commonly used in conjunction with other scripts for
|
||||||
writing certain languages. Japanese uses the Hiragana and Katakana scripts
|
writing certain languages. Japanese uses the Hiragana and Katakana scripts
|
||||||
together with Han; Korean uses Hangul and Han; Taiwanese Mandarin uses Bopomofo
|
together with Han; Korean uses Hangul and Han; Taiwanese Mandarin uses Bopomofo
|
||||||
and Han. These three combinations are treated as special cases when checking
|
and Han. These three combinations are treated as special cases when checking
|
||||||
script runs and are, in effect, "virtual scripts". Thus, a script run may
|
script runs and are, in effect, "virtual scripts". Thus, a script run may
|
||||||
|
|
|
@ -29,7 +29,7 @@ if [ $# -gt 1 -a "$1" = "-perl" ] ; then
|
||||||
shift
|
shift
|
||||||
perl=$1
|
perl=$1
|
||||||
shift
|
shift
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ $# -gt 0 -a "$1" = "-w" ] ; then
|
if [ $# -gt 0 -a "$1" = "-w" ] ; then
|
||||||
perlarg="-w"
|
perlarg="-w"
|
||||||
|
@ -386,10 +386,10 @@ for (;;)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
# By closing OUTFILE explicitly, we avoid a Perl warning in -w mode
|
# By closing OUTFILE explicitly, we avoid a Perl warning in -w mode
|
||||||
# "main::OUTFILE" used only once".
|
# "main::OUTFILE" used only once".
|
||||||
|
|
||||||
close(OUTFILE) if $outfile eq "OUTFILE";
|
close(OUTFILE) if $outfile eq "OUTFILE";
|
||||||
|
|
||||||
PERLEND
|
PERLEND
|
||||||
) | $perl $perlarg - $@
|
) | $perl $perlarg - $@
|
||||||
|
|
|
@ -44,7 +44,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define PCRE2_MAJOR 10
|
#define PCRE2_MAJOR 10
|
||||||
#define PCRE2_MINOR 33
|
#define PCRE2_MINOR 33
|
||||||
#define PCRE2_PRERELEASE -RC1
|
#define PCRE2_PRERELEASE -RC1
|
||||||
#define PCRE2_DATE 2018-09-14
|
#define PCRE2_DATE 2019-03-03
|
||||||
|
|
||||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||||
imported have to be identified as such. When building PCRE2, the appropriate
|
imported have to be identified as such. When building PCRE2, the appropriate
|
||||||
|
@ -150,6 +150,7 @@ D is inspected during pcre2_dfa_match() execution
|
||||||
#define PCRE2_EXTRA_MATCH_WORD 0x00000004u /* C */
|
#define PCRE2_EXTRA_MATCH_WORD 0x00000004u /* C */
|
||||||
#define PCRE2_EXTRA_MATCH_LINE 0x00000008u /* C */
|
#define PCRE2_EXTRA_MATCH_LINE 0x00000008u /* C */
|
||||||
#define PCRE2_EXTRA_ESCAPED_CR_IS_LF 0x00000010u /* C */
|
#define PCRE2_EXTRA_ESCAPED_CR_IS_LF 0x00000010u /* C */
|
||||||
|
#define PCRE2_EXTRA_ALT_BSUX 0x00000020u /* C */
|
||||||
|
|
||||||
/* These are for pcre2_jit_compile(). */
|
/* These are for pcre2_jit_compile(). */
|
||||||
|
|
||||||
|
|
|
@ -604,15 +604,15 @@ for(;;)
|
||||||
case OP_SCBRAPOS:
|
case OP_SCBRAPOS:
|
||||||
if (cb->had_recurse) return FALSE;
|
if (cb->had_recurse) return FALSE;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* A script run might have to backtrack if the iterated item can match
|
/* A script run might have to backtrack if the iterated item can match
|
||||||
characters from more than one script. So give up unless repeating an
|
characters from more than one script. So give up unless repeating an
|
||||||
explicit character. */
|
explicit character. */
|
||||||
|
|
||||||
case OP_SCRIPT_RUN:
|
case OP_SCRIPT_RUN:
|
||||||
if (base_list[0] != OP_CHAR && base_list[0] != OP_CHARI)
|
if (base_list[0] != OP_CHAR && base_list[0] != OP_CHARI)
|
||||||
return FALSE;
|
return FALSE;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* Atomic sub-patterns and assertions can always auto-possessify their
|
/* Atomic sub-patterns and assertions can always auto-possessify their
|
||||||
last iterator. However, if the group was entered as a result of checking
|
last iterator. However, if the group was entered as a result of checking
|
||||||
|
|
|
@ -407,7 +407,7 @@ return 0;
|
||||||
|
|
||||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
pcre2_set_substitute_callout(pcre2_match_context *mcontext,
|
pcre2_set_substitute_callout(pcre2_match_context *mcontext,
|
||||||
int (*substitute_callout)(pcre2_substitute_callout_block *, void *),
|
int (*substitute_callout)(pcre2_substitute_callout_block *, void *),
|
||||||
void *substitute_callout_data)
|
void *substitute_callout_data)
|
||||||
{
|
{
|
||||||
mcontext->substitute_callout = substitute_callout;
|
mcontext->substitute_callout = substitute_callout;
|
||||||
|
|
|
@ -182,8 +182,8 @@ static const unsigned char compile_error_texts[] =
|
||||||
"\\N{U+dddd} is supported only in Unicode (UTF) mode\0"
|
"\\N{U+dddd} is supported only in Unicode (UTF) mode\0"
|
||||||
"invalid hyphen in option setting\0"
|
"invalid hyphen in option setting\0"
|
||||||
/* 95 */
|
/* 95 */
|
||||||
"(*alpha_assertion) not recognized\0"
|
"(*alpha_assertion) not recognized\0"
|
||||||
"script runs require Unicode support, which this version of PCRE2 does not have\0"
|
"script runs require Unicode support, which this version of PCRE2 does not have\0"
|
||||||
;
|
;
|
||||||
|
|
||||||
/* Match-time and UTF error texts are in the same format. */
|
/* Match-time and UTF error texts are in the same format. */
|
||||||
|
|
|
@ -525,10 +525,10 @@ bytes in a code unit in that mode. */
|
||||||
enum { PCRE2_MATCHEDBY_INTERPRETER, /* pcre2_match() */
|
enum { PCRE2_MATCHEDBY_INTERPRETER, /* pcre2_match() */
|
||||||
PCRE2_MATCHEDBY_DFA_INTERPRETER, /* pcre2_dfa_match() */
|
PCRE2_MATCHEDBY_DFA_INTERPRETER, /* pcre2_dfa_match() */
|
||||||
PCRE2_MATCHEDBY_JIT }; /* pcre2_jit_match() */
|
PCRE2_MATCHEDBY_JIT }; /* pcre2_jit_match() */
|
||||||
|
|
||||||
/* Values for the flags field in a match data block. */
|
/* Values for the flags field in a match data block. */
|
||||||
|
|
||||||
#define PCRE2_MD_COPIED_SUBJECT 0x01u
|
#define PCRE2_MD_COPIED_SUBJECT 0x01u
|
||||||
|
|
||||||
/* Magic number to provide a small check against being handed junk. */
|
/* Magic number to provide a small check against being handed junk. */
|
||||||
|
|
||||||
|
@ -1774,7 +1774,7 @@ typedef struct {
|
||||||
uint8_t caseset; /* offset to multichar other cases or zero */
|
uint8_t caseset; /* offset to multichar other cases or zero */
|
||||||
int32_t other_case; /* offset to other case, or zero if none */
|
int32_t other_case; /* offset to other case, or zero if none */
|
||||||
int16_t scriptx; /* script extension value */
|
int16_t scriptx; /* script extension value */
|
||||||
int16_t dummy; /* spare - to round to multiple of 4 bytes */
|
int16_t dummy; /* spare - to round to multiple of 4 bytes */
|
||||||
} ucd_record;
|
} ucd_record;
|
||||||
|
|
||||||
/* UCD access macros */
|
/* UCD access macros */
|
||||||
|
|
|
@ -7794,12 +7794,12 @@ if (needstype || needsscript)
|
||||||
|
|
||||||
OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
|
OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
|
||||||
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
|
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
|
||||||
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
|
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
|
||||||
|
|
||||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
|
||||||
|
|
||||||
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 0);
|
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 0);
|
||||||
|
|
||||||
// OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
|
// OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
|
||||||
|
|
||||||
ccbegin = cc;
|
ccbegin = cc;
|
||||||
|
@ -7848,7 +7848,7 @@ if (needstype || needsscript)
|
||||||
//fprintf(stderr, "~~C\n");
|
//fprintf(stderr, "~~C\n");
|
||||||
OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
|
OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
|
||||||
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
|
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
|
||||||
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
|
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
|
||||||
OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP1, 0);
|
OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP1, 0);
|
||||||
|
|
||||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
|
||||||
|
@ -7862,12 +7862,12 @@ if (needstype || needsscript)
|
||||||
// PH hacking
|
// PH hacking
|
||||||
//fprintf(stderr, "~~D\n");
|
//fprintf(stderr, "~~D\n");
|
||||||
OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
|
OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
|
||||||
|
|
||||||
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
|
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
|
||||||
|
|
||||||
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
|
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
|
||||||
OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
|
OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
|
||||||
|
|
||||||
OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
|
OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
|
||||||
typereg = RETURN_ADDR;
|
typereg = RETURN_ADDR;
|
||||||
}
|
}
|
||||||
|
@ -9207,9 +9207,9 @@ if (common->utf && *cc == OP_REFI)
|
||||||
OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
|
OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
|
||||||
|
|
||||||
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
|
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
|
||||||
|
|
||||||
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
|
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
|
||||||
|
|
||||||
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));
|
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));
|
||||||
|
|
||||||
OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case));
|
OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case));
|
||||||
|
|
|
@ -138,7 +138,7 @@ for (i = 0; i < 256; i++)
|
||||||
int x = 0;
|
int x = 0;
|
||||||
if (isspace(i)) x += ctype_space;
|
if (isspace(i)) x += ctype_space;
|
||||||
if (isalpha(i)) x += ctype_letter;
|
if (isalpha(i)) x += ctype_letter;
|
||||||
if (islower(i)) x += ctype_lcletter;
|
if (islower(i)) x += ctype_lcletter;
|
||||||
if (isdigit(i)) x += ctype_digit;
|
if (isdigit(i)) x += ctype_digit;
|
||||||
if (isalnum(i) || i == '_') x += ctype_word;
|
if (isalnum(i) || i == '_') x += ctype_word;
|
||||||
*p++ = x;
|
*p++ = x;
|
||||||
|
|
|
@ -96,10 +96,10 @@ pcre2_match_data_free(pcre2_match_data *match_data)
|
||||||
if (match_data != NULL)
|
if (match_data != NULL)
|
||||||
{
|
{
|
||||||
if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
|
if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
|
||||||
match_data->memctl.free((void *)match_data->subject,
|
match_data->memctl.free((void *)match_data->subject,
|
||||||
match_data->memctl.memory_data);
|
match_data->memctl.memory_data);
|
||||||
match_data->memctl.free(match_data, match_data->memctl.memory_data);
|
match_data->memctl.free(match_data, match_data->memctl.memory_data);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -393,7 +393,7 @@ for(;;)
|
||||||
case OP_ASSERTBACK:
|
case OP_ASSERTBACK:
|
||||||
case OP_ASSERTBACK_NOT:
|
case OP_ASSERTBACK_NOT:
|
||||||
case OP_ONCE:
|
case OP_ONCE:
|
||||||
case OP_SCRIPT_RUN:
|
case OP_SCRIPT_RUN:
|
||||||
case OP_COND:
|
case OP_COND:
|
||||||
case OP_SCOND:
|
case OP_SCOND:
|
||||||
case OP_REVERSE:
|
case OP_REVERSE:
|
||||||
|
|
|
@ -171,7 +171,7 @@ for (;;)
|
||||||
/* Fall through */
|
/* Fall through */
|
||||||
|
|
||||||
case OP_ONCE:
|
case OP_ONCE:
|
||||||
case OP_SCRIPT_RUN:
|
case OP_SCRIPT_RUN:
|
||||||
case OP_SBRA:
|
case OP_SBRA:
|
||||||
case OP_BRAPOS:
|
case OP_BRAPOS:
|
||||||
case OP_SBRAPOS:
|
case OP_SBRAPOS:
|
||||||
|
@ -1076,7 +1076,7 @@ do
|
||||||
case OP_CBRAPOS:
|
case OP_CBRAPOS:
|
||||||
case OP_SCBRAPOS:
|
case OP_SCBRAPOS:
|
||||||
case OP_ONCE:
|
case OP_ONCE:
|
||||||
case OP_SCRIPT_RUN:
|
case OP_SCRIPT_RUN:
|
||||||
case OP_ASSERT:
|
case OP_ASSERT:
|
||||||
rc = set_start_bits(re, tcode, utf);
|
rc = set_start_bits(re, tcode, utf);
|
||||||
if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
|
if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
|
||||||
|
|
|
@ -3,8 +3,8 @@
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* PCRE2 is a library of functions to support regular expressions whose syntax
|
/* PCRE2 is a library of functions to support regular expressions whose syntax
|
||||||
and semantics are as close as possible to those of the Perl 5 language. This is
|
and semantics are as close as possible to those of the Perl 5 language. This is
|
||||||
the public header file to be #included by applications that call PCRE2 via the
|
the public header file to be #included by applications that call PCRE2 via the
|
||||||
POSIX wrapper interface.
|
POSIX wrapper interface.
|
||||||
|
|
||||||
Written by Philip Hazel
|
Written by Philip Hazel
|
||||||
|
@ -138,7 +138,7 @@ file. */
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* The functions. The actual code is in functions with pcre2_xxx names for
|
/* The functions. The actual code is in functions with pcre2_xxx names for
|
||||||
uniqueness. POSIX names are provided as macros for API compatibility with POSIX
|
uniqueness. POSIX names are provided as macros for API compatibility with POSIX
|
||||||
regex functions. It's done this way to ensure to they are always linked from
|
regex functions. It's done this way to ensure to they are always linked from
|
||||||
the PCRE2 library and not by accident from elsewhere (regex_t differs in size
|
the PCRE2 library and not by accident from elsewhere (regex_t differs in size
|
||||||
|
@ -155,7 +155,7 @@ PCRE2POSIX_EXP_DECL void pcre2_regfree(regex_t *);
|
||||||
#define regerror pcre2_regerror
|
#define regerror pcre2_regerror
|
||||||
#define regfree pcre2_regfree
|
#define regfree pcre2_regfree
|
||||||
|
|
||||||
/* Debian had a patch that used different names. These are now here to save
|
/* Debian had a patch that used different names. These are now here to save
|
||||||
them having to maintain their own patch, but are not documented by PCRE2. */
|
them having to maintain their own patch, but are not documented by PCRE2. */
|
||||||
|
|
||||||
#define PCRE2regcomp pcre2_regcomp
|
#define PCRE2regcomp pcre2_regcomp
|
||||||
|
|
Loading…
Reference in New Issue