File tidies for 10.23-RC1
This commit is contained in:
parent
31c62034fe
commit
ca1ecd41a1
6
AUTHORS
6
AUTHORS
|
@ -8,7 +8,7 @@ Email domain: cam.ac.uk
|
||||||
University of Cambridge Computing Service,
|
University of Cambridge Computing Service,
|
||||||
Cambridge, England.
|
Cambridge, England.
|
||||||
|
|
||||||
Copyright (c) 1997-2016 University of Cambridge
|
Copyright (c) 1997-2017 University of Cambridge
|
||||||
All rights reserved
|
All rights reserved
|
||||||
|
|
||||||
|
|
||||||
|
@ -19,7 +19,7 @@ Written by: Zoltan Herczeg
|
||||||
Email local part: hzmester
|
Email local part: hzmester
|
||||||
Emain domain: freemail.hu
|
Emain domain: freemail.hu
|
||||||
|
|
||||||
Copyright(c) 2010-2016 Zoltan Herczeg
|
Copyright(c) 2010-2017 Zoltan Herczeg
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
|
|
||||||
|
@ -30,7 +30,7 @@ Written by: Zoltan Herczeg
|
||||||
Email local part: hzmester
|
Email local part: hzmester
|
||||||
Emain domain: freemail.hu
|
Emain domain: freemail.hu
|
||||||
|
|
||||||
Copyright(c) 2009-2016 Zoltan Herczeg
|
Copyright(c) 2009-2017 Zoltan Herczeg
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
####
|
####
|
||||||
|
|
50
ChangeLog
50
ChangeLog
|
@ -2,8 +2,8 @@ Change Log for PCRE2
|
||||||
--------------------
|
--------------------
|
||||||
|
|
||||||
|
|
||||||
Version 10.23 xx-xxxxxx-2016
|
Version 10.23 16-January-2017
|
||||||
----------------------------
|
-----------------------------
|
||||||
|
|
||||||
1. Extended pcre2test with the utf8_input modifier so that it is able to
|
1. Extended pcre2test with the utf8_input modifier so that it is able to
|
||||||
generate all possible 16-bit and 32-bit code unit values in non-UTF modes.
|
generate all possible 16-bit and 32-bit code unit values in non-UTF modes.
|
||||||
|
@ -37,12 +37,12 @@ some minor bugs and Perl incompatibilities were fixed, including:
|
||||||
(f) When testing zero-terminated patterns under valgrind, the terminating
|
(f) When testing zero-terminated patterns under valgrind, the terminating
|
||||||
zero is now marked "no access". This catches bugs that would otherwise
|
zero is now marked "no access". This catches bugs that would otherwise
|
||||||
show up only with non-zero-terminated patterns.
|
show up only with non-zero-terminated patterns.
|
||||||
|
|
||||||
(g) A hyphen appearing immediately after a POSIX character class (for example
|
(g) A hyphen appearing immediately after a POSIX character class (for example
|
||||||
/[[:ascii:]-z]/) now generates an error. Perl does accept this as a
|
/[[:ascii:]-z]/) now generates an error. Perl does accept this as a
|
||||||
literal, but gives a warning, so it seems best to fail it in PCRE.
|
literal, but gives a warning, so it seems best to fail it in PCRE.
|
||||||
|
|
||||||
(h) An empty \Q\E sequence may appear after a callout that precedes an
|
(h) An empty \Q\E sequence may appear after a callout that precedes an
|
||||||
assertion condition (it is, of course, ignored).
|
assertion condition (it is, of course, ignored).
|
||||||
|
|
||||||
One effect of the refactoring is that some error numbers and messages have
|
One effect of the refactoring is that some error numbers and messages have
|
||||||
|
@ -125,9 +125,9 @@ fully released code, but are noted here for the record.
|
||||||
(r) If a character whose code point was greater than 0xffff appeared within
|
(r) If a character whose code point was greater than 0xffff appeared within
|
||||||
a lookbehind that was within another lookbehind, the calculation of the
|
a lookbehind that was within another lookbehind, the calculation of the
|
||||||
lookbehind length went wrong and could provoke an internal error.
|
lookbehind length went wrong and could provoke an internal error.
|
||||||
|
|
||||||
(t) The sequence \E- or \Q\E- after a POSIX class in a character class caused
|
(t) The sequence \E- or \Q\E- after a POSIX class in a character class caused
|
||||||
an internal error. Now the hyphen is treated as a literal.
|
an internal error. Now the hyphen is treated as a literal.
|
||||||
|
|
||||||
4. Back references are now permitted in lookbehind assertions when there are
|
4. Back references are now permitted in lookbehind assertions when there are
|
||||||
no duplicated group numbers (that is, (?| has not been used), and, if the
|
no duplicated group numbers (that is, (?| has not been used), and, if the
|
||||||
|
@ -269,14 +269,14 @@ auto_callout".
|
||||||
pattern with PCRE2_UCP set without PCRE2_UTF if a class required all wide
|
pattern with PCRE2_UCP set without PCRE2_UTF if a class required all wide
|
||||||
characters to match (for example, /[\s[:^ascii:]]/).
|
characters to match (for example, /[\s[:^ascii:]]/).
|
||||||
|
|
||||||
40. The callout_error modifier has been added to pcre2test to make it possible
|
40. The callout_error modifier has been added to pcre2test to make it possible
|
||||||
to return PCRE2_ERROR_CALLOUT from a callout.
|
to return PCRE2_ERROR_CALLOUT from a callout.
|
||||||
|
|
||||||
41. A minor change to pcre2grep: colour reset is now "<esc>[0m" instead of
|
41. A minor change to pcre2grep: colour reset is now "<esc>[0m" instead of
|
||||||
"<esc>[00m".
|
"<esc>[00m".
|
||||||
|
|
||||||
42. The limit in the auto-possessification code that was intended to catch
|
42. The limit in the auto-possessification code that was intended to catch
|
||||||
overly-complicated patterns and not spend too much time auto-possessifying was
|
overly-complicated patterns and not spend too much time auto-possessifying was
|
||||||
being reset too often, resulting in very long compile times for some patterns.
|
being reset too often, resulting in very long compile times for some patterns.
|
||||||
Now such patterns are no longer completely auto-possessified.
|
Now such patterns are no longer completely auto-possessified.
|
||||||
|
|
||||||
|
@ -284,10 +284,10 @@ Now such patterns are no longer completely auto-possessified.
|
||||||
|
|
||||||
44. Added a new Windows script RunGrepTest.bat, courtesy of Jason Hood.
|
44. Added a new Windows script RunGrepTest.bat, courtesy of Jason Hood.
|
||||||
|
|
||||||
45. Minor cosmetic fix to pcre2test: move a variable that is not used under
|
45. Minor cosmetic fix to pcre2test: move a variable that is not used under
|
||||||
Windows into the "not Windows" code.
|
Windows into the "not Windows" code.
|
||||||
|
|
||||||
46. Applied Jason Hood's patches to upgrade pcre2grep under Windows and tidy
|
46. Applied Jason Hood's patches to upgrade pcre2grep under Windows and tidy
|
||||||
some of the code:
|
some of the code:
|
||||||
|
|
||||||
* normalised the Windows condition by ensuring WIN32 is defined;
|
* normalised the Windows condition by ensuring WIN32 is defined;
|
||||||
|
@ -306,27 +306,27 @@ some of the code:
|
||||||
* fixes a typo.
|
* fixes a typo.
|
||||||
|
|
||||||
In addition, colour settings containing anything other than digits and
|
In addition, colour settings containing anything other than digits and
|
||||||
semicolon are ignored, and the colour controls are no longer output for empty
|
semicolon are ignored, and the colour controls are no longer output for empty
|
||||||
strings.
|
strings.
|
||||||
|
|
||||||
47. Detecting patterns that are too large inside the length-measuring loop
|
47. Detecting patterns that are too large inside the length-measuring loop
|
||||||
saves processing ridiculously long patterns to their end.
|
saves processing ridiculously long patterns to their end.
|
||||||
|
|
||||||
48. Ignore PCRE2_CASELESS when processing \h, \H, \v, and \V in classes as it
|
48. Ignore PCRE2_CASELESS when processing \h, \H, \v, and \V in classes as it
|
||||||
just wastes time. In the UTF case it can also produce redundant entries in
|
just wastes time. In the UTF case it can also produce redundant entries in
|
||||||
XCLASS lists caused by characters with multiple other cases and pairs of
|
XCLASS lists caused by characters with multiple other cases and pairs of
|
||||||
characters in the same "not-x" sublists.
|
characters in the same "not-x" sublists.
|
||||||
|
|
||||||
49. A pattern such as /(?=(a\K))/ can report the end of the match being before
|
49. A pattern such as /(?=(a\K))/ can report the end of the match being before
|
||||||
its start; pcre2test was not handling this correctly when using the POSIX
|
its start; pcre2test was not handling this correctly when using the POSIX
|
||||||
interface (it was OK with the native interface).
|
interface (it was OK with the native interface).
|
||||||
|
|
||||||
50. In pcre2grep, ignore all JIT compile errors. This means that pcre2grep will
|
50. In pcre2grep, ignore all JIT compile errors. This means that pcre2grep will
|
||||||
continue to work, falling back to interpretation if anything goes wrong with
|
continue to work, falling back to interpretation if anything goes wrong with
|
||||||
JIT.
|
JIT.
|
||||||
|
|
||||||
51. Applied patches from Christian Persch to configure.ac to make use of the
|
51. Applied patches from Christian Persch to configure.ac to make use of the
|
||||||
AC_USE_SYSTEM_EXTENSIONS macro and to test for functions used by the JIT
|
AC_USE_SYSTEM_EXTENSIONS macro and to test for functions used by the JIT
|
||||||
modules.
|
modules.
|
||||||
|
|
||||||
52. Minor fixed to pcre2grep from Jason Hood:
|
52. Minor fixed to pcre2grep from Jason Hood:
|
||||||
|
@ -335,7 +335,7 @@ modules.
|
||||||
to use appropriate quotes [in an example];
|
to use appropriate quotes [in an example];
|
||||||
* LC_ALL was displayed as "LCC_ALL";
|
* LC_ALL was displayed as "LCC_ALL";
|
||||||
* numbers 11, 12 & 13 should end in "th".
|
* numbers 11, 12 & 13 should end in "th".
|
||||||
* use double quotes in usage message
|
* use double quotes in usage message
|
||||||
|
|
||||||
|
|
||||||
Version 10.22 29-July-2016
|
Version 10.22 29-July-2016
|
||||||
|
|
6
LICENCE
6
LICENCE
|
@ -25,7 +25,7 @@ Email domain: cam.ac.uk
|
||||||
University of Cambridge Computing Service,
|
University of Cambridge Computing Service,
|
||||||
Cambridge, England.
|
Cambridge, England.
|
||||||
|
|
||||||
Copyright (c) 1997-2016 University of Cambridge
|
Copyright (c) 1997-2017 University of Cambridge
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
|
|
||||||
|
@ -36,7 +36,7 @@ Written by: Zoltan Herczeg
|
||||||
Email local part: hzmester
|
Email local part: hzmester
|
||||||
Emain domain: freemail.hu
|
Emain domain: freemail.hu
|
||||||
|
|
||||||
Copyright(c) 2010-2016 Zoltan Herczeg
|
Copyright(c) 2010-2017 Zoltan Herczeg
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
|
|
||||||
|
@ -47,7 +47,7 @@ Written by: Zoltan Herczeg
|
||||||
Email local part: hzmester
|
Email local part: hzmester
|
||||||
Emain domain: freemail.hu
|
Emain domain: freemail.hu
|
||||||
|
|
||||||
Copyright(c) 2009-2016 Zoltan Herczeg
|
Copyright(c) 2009-2017 Zoltan Herczeg
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -3,6 +3,10 @@
|
||||||
AUTOMAKE_OPTIONS = subdir-objects
|
AUTOMAKE_OPTIONS = subdir-objects
|
||||||
ACLOCAL_AMFLAGS = -I m4
|
ACLOCAL_AMFLAGS = -I m4
|
||||||
|
|
||||||
|
## This seems to have become necessary for building in non-source directory.
|
||||||
|
|
||||||
|
AM_CPPFLAGS="-I$(srcdir)/src"
|
||||||
|
|
||||||
## Specify the documentation files that are distributed.
|
## Specify the documentation files that are distributed.
|
||||||
|
|
||||||
dist_doc_DATA = \
|
dist_doc_DATA = \
|
||||||
|
|
36
NEWS
36
NEWS
|
@ -1,6 +1,42 @@
|
||||||
News about PCRE2 releases
|
News about PCRE2 releases
|
||||||
-------------------------
|
-------------------------
|
||||||
|
|
||||||
|
Version 10.23 16-January-2017
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
|
1. ChangeLog has the details of a lot of bug fixes and tidies.
|
||||||
|
|
||||||
|
2. There has been a major re-factoring of the pcre2_compile.c file. Most syntax
|
||||||
|
checking is now done in the pre-pass that identifies capturing groups. This has
|
||||||
|
reduced the amount of duplication and made the code tidier. While doing this,
|
||||||
|
some minor bugs and Perl incompatibilities were fixed (see ChangeLog for
|
||||||
|
details.)
|
||||||
|
|
||||||
|
3. Back references are now permitted in lookbehind assertions when there are
|
||||||
|
no duplicated group numbers (that is, (?| has not been used), and, if the
|
||||||
|
reference is by name, there is only one group of that name. The referenced
|
||||||
|
group must, of course be of fixed length.
|
||||||
|
|
||||||
|
4. \g{+<number>} (e.g. \g{+2} ) is now supported. It is a "forward back
|
||||||
|
reference" and can be useful in repetitions (compare \g{-<number>} ). Perl does
|
||||||
|
not recognize this syntax.
|
||||||
|
|
||||||
|
5. pcre2grep now automatically expands its buffer up to a maximum set by
|
||||||
|
--max-buffer-size.
|
||||||
|
|
||||||
|
6. The -t option (grand total) has been added to pcre2grep.
|
||||||
|
|
||||||
|
7. A new function called pcre2_code_copy_with_tables() exists to copy a
|
||||||
|
compiled pattern along with a private copy of the character tables that is
|
||||||
|
uses.
|
||||||
|
|
||||||
|
8. A user supplied a number of patches to upgrade pcre2grep under Windows and
|
||||||
|
tidy the code.
|
||||||
|
|
||||||
|
9. Several updates have been made to pcre2test and test scripts (see
|
||||||
|
ChangeLog).
|
||||||
|
|
||||||
|
|
||||||
Version 10.22 29-July-2016
|
Version 10.22 29-July-2016
|
||||||
--------------------------
|
--------------------------
|
||||||
|
|
||||||
|
|
40
README
40
README
|
@ -337,18 +337,18 @@ library. They are also documented in the pcre2build man page.
|
||||||
|
|
||||||
--with-pcre2grep-bufsize=51200
|
--with-pcre2grep-bufsize=51200
|
||||||
|
|
||||||
The value must be a plain integer. The default is 20480. The amount of memory
|
The value must be a plain integer. The default is 20480. The amount of memory
|
||||||
used by pcre2grep is actually three times this number, to allow for "before"
|
used by pcre2grep is actually three times this number, to allow for "before"
|
||||||
and "after" lines. If very long lines are encountered, the buffer is
|
and "after" lines. If very long lines are encountered, the buffer is
|
||||||
automatically enlarged, up to a fixed maximum size.
|
automatically enlarged, up to a fixed maximum size.
|
||||||
|
|
||||||
. The default maximum size of pcre2grep's internal buffer can be set by, for
|
. The default maximum size of pcre2grep's internal buffer can be set by, for
|
||||||
example:
|
example:
|
||||||
|
|
||||||
--with-pcre2grep-max-bufsize=2097152
|
--with-pcre2grep-max-bufsize=2097152
|
||||||
|
|
||||||
The default is either 1048576 or the value of --with-pcre2grep-bufsize,
|
The default is either 1048576 or the value of --with-pcre2grep-bufsize,
|
||||||
whichever is the larger.
|
whichever is the larger.
|
||||||
|
|
||||||
. It is possible to compile pcre2test so that it links with the libreadline
|
. It is possible to compile pcre2test so that it links with the libreadline
|
||||||
or libedit libraries, by specifying, respectively,
|
or libedit libraries, by specifying, respectively,
|
||||||
|
@ -372,21 +372,21 @@ library. They are also documented in the pcre2build man page.
|
||||||
If you get error messages about missing functions tgetstr, tgetent, tputs,
|
If you get error messages about missing functions tgetstr, tgetent, tputs,
|
||||||
tgetflag, or tgoto, this is the problem, and linking with the ncurses library
|
tgetflag, or tgoto, this is the problem, and linking with the ncurses library
|
||||||
should fix it.
|
should fix it.
|
||||||
|
|
||||||
. There is a special option called --enable-fuzz-support for use by people who
|
. There is a special option called --enable-fuzz-support for use by people who
|
||||||
want to run fuzzing tests on PCRE2. At present this applies only to the 8-bit
|
want to run fuzzing tests on PCRE2. At present this applies only to the 8-bit
|
||||||
library. If set, it causes an extra library called libpcre2-fuzzsupport.a to
|
library. If set, it causes an extra library called libpcre2-fuzzsupport.a to
|
||||||
be built, but not installed. This contains a single function called
|
be built, but not installed. This contains a single function called
|
||||||
LLVMFuzzerTestOneInput() whose arguments are a pointer to a string and the
|
LLVMFuzzerTestOneInput() whose arguments are a pointer to a string and the
|
||||||
length of the string. When called, this function tries to compile the string
|
length of the string. When called, this function tries to compile the string
|
||||||
as a pattern, and if that succeeds, to match it. This is done both with no
|
as a pattern, and if that succeeds, to match it. This is done both with no
|
||||||
options and with some random options bits that are generated from the string.
|
options and with some random options bits that are generated from the string.
|
||||||
Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck to
|
Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck to
|
||||||
be created. This is normally run under valgrind or used when PCRE2 is
|
be created. This is normally run under valgrind or used when PCRE2 is
|
||||||
compiled with address sanitizing enabled. It calls the fuzzing function and
|
compiled with address sanitizing enabled. It calls the fuzzing function and
|
||||||
outputs information about it is doing. The input strings are specified by
|
outputs information about it is doing. The input strings are specified by
|
||||||
arguments: if an argument starts with "=" the rest of it is a literal input
|
arguments: if an argument starts with "=" the rest of it is a literal input
|
||||||
string. Otherwise, it is assumed to be a file name, and the contents of the
|
string. Otherwise, it is assumed to be a file name, and the contents of the
|
||||||
file are the test string.
|
file are the test string.
|
||||||
|
|
||||||
The "configure" script builds the following files for the basic C library:
|
The "configure" script builds the following files for the basic C library:
|
||||||
|
@ -777,7 +777,7 @@ The distribution should contain the files listed below.
|
||||||
src/pcre2_xclass.c )
|
src/pcre2_xclass.c )
|
||||||
|
|
||||||
src/pcre2_printint.c debugging function that is used by pcre2test,
|
src/pcre2_printint.c debugging function that is used by pcre2test,
|
||||||
src/pcre2_fuzzsupport.c function for (optional) fuzzing support
|
src/pcre2_fuzzsupport.c function for (optional) fuzzing support
|
||||||
|
|
||||||
src/config.h.in template for config.h, when built by "configure"
|
src/config.h.in template for config.h, when built by "configure"
|
||||||
src/pcre2.h.in template for pcre2.h when built by "configure"
|
src/pcre2.h.in template for pcre2.h when built by "configure"
|
||||||
|
|
2
RunTest
2
RunTest
|
@ -520,7 +520,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
||||||
echo $title2 "(excluding UTF-$bits) (64M stack)"
|
echo $title2 "(excluding UTF-$bits) (64M stack)"
|
||||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q -S 64 $bmode $opt $testdata/testinput2 testtry
|
$sim $valgrind ${opt:+$vjs} ./pcre2test -q -S 64 $bmode $opt $testdata/testinput2 testtry
|
||||||
if [ $? = 0 ] ; then
|
if [ $? = 0 ] ; then
|
||||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -63,-62,-2,-1,0,100,188,189 >>testtry
|
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -63,-62,-2,-1,0,100,188,189,190,191 >>testtry
|
||||||
checkresult $? 2 "$opt"
|
checkresult $? 2 "$opt"
|
||||||
else
|
else
|
||||||
echo " "
|
echo " "
|
||||||
|
|
16
configure.ac
16
configure.ac
|
@ -11,16 +11,16 @@ dnl be defined as -RC2, for example. For real releases, it should be empty.
|
||||||
m4_define(pcre2_major, [10])
|
m4_define(pcre2_major, [10])
|
||||||
m4_define(pcre2_minor, [23])
|
m4_define(pcre2_minor, [23])
|
||||||
m4_define(pcre2_prerelease, [-RC1])
|
m4_define(pcre2_prerelease, [-RC1])
|
||||||
m4_define(pcre2_date, [2016-08-01])
|
m4_define(pcre2_date, [2017-01-16])
|
||||||
|
|
||||||
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
||||||
# 50 lines of this file. Please update that if the variables above are moved.
|
# 50 lines of this file. Please update that if the variables above are moved.
|
||||||
|
|
||||||
# Libtool shared library interface versions (current:revision:age)
|
# Libtool shared library interface versions (current:revision:age)
|
||||||
m4_define(libpcre2_8_version, [4:0:4])
|
m4_define(libpcre2_8_version, [5:0:5])
|
||||||
m4_define(libpcre2_16_version, [4:0:4])
|
m4_define(libpcre2_16_version, [5:0:5])
|
||||||
m4_define(libpcre2_32_version, [4:0:4])
|
m4_define(libpcre2_32_version, [5:0:5])
|
||||||
m4_define(libpcre2_posix_version, [1:0:0])
|
m4_define(libpcre2_posix_version, [1:1:0])
|
||||||
|
|
||||||
AC_PREREQ(2.57)
|
AC_PREREQ(2.57)
|
||||||
AC_INIT(PCRE2, pcre2_major.pcre2_minor[]pcre2_prerelease, , pcre2)
|
AC_INIT(PCRE2, pcre2_major.pcre2_minor[]pcre2_prerelease, , pcre2)
|
||||||
|
@ -630,7 +630,7 @@ fi
|
||||||
AC_DEFINE_UNQUOTED([PCRE2GREP_BUFSIZE], [$with_pcre2grep_bufsize], [
|
AC_DEFINE_UNQUOTED([PCRE2GREP_BUFSIZE], [$with_pcre2grep_bufsize], [
|
||||||
The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by
|
The value of PCRE2GREP_BUFSIZE is the starting size of the buffer used by
|
||||||
pcre2grep to hold parts of the file it is searching. The buffer will be
|
pcre2grep to hold parts of the file it is searching. The buffer will be
|
||||||
expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing very
|
expanded up to PCRE2GREP_MAX_BUFSIZE if necessary, for files containing very
|
||||||
long lines. The actual amount of memory used by pcre2grep is three times this
|
long lines. The actual amount of memory used by pcre2grep is three times this
|
||||||
number, because it allows for the buffering of "before" and "after" lines.])
|
number, because it allows for the buffering of "before" and "after" lines.])
|
||||||
|
|
||||||
|
@ -936,7 +936,7 @@ $PACKAGE-$VERSION configuration summary:
|
||||||
C compiler flags ................... : ${CFLAGS} ${VISIBILITY_CFLAGS}
|
C compiler flags ................... : ${CFLAGS} ${VISIBILITY_CFLAGS}
|
||||||
Linker flags ....................... : ${LDFLAGS}
|
Linker flags ....................... : ${LDFLAGS}
|
||||||
Extra libraries .................... : ${LIBS}
|
Extra libraries .................... : ${LIBS}
|
||||||
|
|
||||||
Build 8-bit pcre2 library .......... : ${enable_pcre2_8}
|
Build 8-bit pcre2 library .......... : ${enable_pcre2_8}
|
||||||
Build 16-bit pcre2 library ......... : ${enable_pcre2_16}
|
Build 16-bit pcre2 library ......... : ${enable_pcre2_16}
|
||||||
Build 32-bit pcre2 library ......... : ${enable_pcre2_32}
|
Build 32-bit pcre2 library ......... : ${enable_pcre2_32}
|
||||||
|
@ -966,7 +966,7 @@ $PACKAGE-$VERSION configuration summary:
|
||||||
Link pcre2test with libreadline .... : ${enable_pcre2test_libreadline}
|
Link pcre2test with libreadline .... : ${enable_pcre2test_libreadline}
|
||||||
Valgrind support ................... : ${enable_valgrind}
|
Valgrind support ................... : ${enable_valgrind}
|
||||||
Code coverage ...................... : ${enable_coverage}
|
Code coverage ...................... : ${enable_coverage}
|
||||||
Fuzzer support ..................... : ${enable_fuzz_support}
|
Fuzzer support ..................... : ${enable_fuzz_support}
|
||||||
|
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
|
|
|
@ -337,18 +337,18 @@ library. They are also documented in the pcre2build man page.
|
||||||
|
|
||||||
--with-pcre2grep-bufsize=51200
|
--with-pcre2grep-bufsize=51200
|
||||||
|
|
||||||
The value must be a plain integer. The default is 20480. The amount of memory
|
The value must be a plain integer. The default is 20480. The amount of memory
|
||||||
used by pcre2grep is actually three times this number, to allow for "before"
|
used by pcre2grep is actually three times this number, to allow for "before"
|
||||||
and "after" lines. If very long lines are encountered, the buffer is
|
and "after" lines. If very long lines are encountered, the buffer is
|
||||||
automatically enlarged, up to a fixed maximum size.
|
automatically enlarged, up to a fixed maximum size.
|
||||||
|
|
||||||
. The default maximum size of pcre2grep's internal buffer can be set by, for
|
. The default maximum size of pcre2grep's internal buffer can be set by, for
|
||||||
example:
|
example:
|
||||||
|
|
||||||
--with-pcre2grep-max-bufsize=2097152
|
--with-pcre2grep-max-bufsize=2097152
|
||||||
|
|
||||||
The default is either 1048576 or the value of --with-pcre2grep-bufsize,
|
The default is either 1048576 or the value of --with-pcre2grep-bufsize,
|
||||||
whichever is the larger.
|
whichever is the larger.
|
||||||
|
|
||||||
. It is possible to compile pcre2test so that it links with the libreadline
|
. It is possible to compile pcre2test so that it links with the libreadline
|
||||||
or libedit libraries, by specifying, respectively,
|
or libedit libraries, by specifying, respectively,
|
||||||
|
@ -372,21 +372,21 @@ library. They are also documented in the pcre2build man page.
|
||||||
If you get error messages about missing functions tgetstr, tgetent, tputs,
|
If you get error messages about missing functions tgetstr, tgetent, tputs,
|
||||||
tgetflag, or tgoto, this is the problem, and linking with the ncurses library
|
tgetflag, or tgoto, this is the problem, and linking with the ncurses library
|
||||||
should fix it.
|
should fix it.
|
||||||
|
|
||||||
. There is a special option called --enable-fuzz-support for use by people who
|
. There is a special option called --enable-fuzz-support for use by people who
|
||||||
want to run fuzzing tests on PCRE2. At present this applies only to the 8-bit
|
want to run fuzzing tests on PCRE2. At present this applies only to the 8-bit
|
||||||
library. If set, it causes an extra library called libpcre2-fuzzsupport.a to
|
library. If set, it causes an extra library called libpcre2-fuzzsupport.a to
|
||||||
be built, but not installed. This contains a single function called
|
be built, but not installed. This contains a single function called
|
||||||
LLVMFuzzerTestOneInput() whose arguments are a pointer to a string and the
|
LLVMFuzzerTestOneInput() whose arguments are a pointer to a string and the
|
||||||
length of the string. When called, this function tries to compile the string
|
length of the string. When called, this function tries to compile the string
|
||||||
as a pattern, and if that succeeds, to match it. This is done both with no
|
as a pattern, and if that succeeds, to match it. This is done both with no
|
||||||
options and with some random options bits that are generated from the string.
|
options and with some random options bits that are generated from the string.
|
||||||
Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck to
|
Setting --enable-fuzz-support also causes a binary called pcre2fuzzcheck to
|
||||||
be created. This is normally run under valgrind or used when PCRE2 is
|
be created. This is normally run under valgrind or used when PCRE2 is
|
||||||
compiled with address sanitizing enabled. It calls the fuzzing function and
|
compiled with address sanitizing enabled. It calls the fuzzing function and
|
||||||
outputs information about it is doing. The input strings are specified by
|
outputs information about it is doing. The input strings are specified by
|
||||||
arguments: if an argument starts with "=" the rest of it is a literal input
|
arguments: if an argument starts with "=" the rest of it is a literal input
|
||||||
string. Otherwise, it is assumed to be a file name, and the contents of the
|
string. Otherwise, it is assumed to be a file name, and the contents of the
|
||||||
file are the test string.
|
file are the test string.
|
||||||
|
|
||||||
The "configure" script builds the following files for the basic C library:
|
The "configure" script builds the following files for the basic C library:
|
||||||
|
@ -777,7 +777,7 @@ The distribution should contain the files listed below.
|
||||||
src/pcre2_xclass.c )
|
src/pcre2_xclass.c )
|
||||||
|
|
||||||
src/pcre2_printint.c debugging function that is used by pcre2test,
|
src/pcre2_printint.c debugging function that is used by pcre2test,
|
||||||
src/pcre2_fuzzsupport.c function for (optional) fuzzing support
|
src/pcre2_fuzzsupport.c function for (optional) fuzzing support
|
||||||
|
|
||||||
src/config.h.in template for config.h, when built by "configure"
|
src/config.h.in template for config.h, when built by "configure"
|
||||||
src/pcre2.h.in template for pcre2.h when built by "configure"
|
src/pcre2.h.in template for pcre2.h when built by "configure"
|
||||||
|
|
|
@ -27,7 +27,7 @@ DESCRIPTION
|
||||||
<P>
|
<P>
|
||||||
This function makes a copy of the memory used for a compiled pattern, excluding
|
This function makes a copy of the memory used for a compiled pattern, excluding
|
||||||
any memory used by the JIT compiler. Without a subsequent call to
|
any memory used by the JIT compiler. Without a subsequent call to
|
||||||
<b>pcre2_jit_compile()</b>, the copy can be used only for non-JIT matching.
|
<b>pcre2_jit_compile()</b>, the copy can be used only for non-JIT matching.
|
||||||
Unlike <b>pcre2_code_copy()</b>, a separate copy of the character tables is also
|
Unlike <b>pcre2_code_copy()</b>, a separate copy of the character tables is also
|
||||||
made, with the new code pointing to it. This memory will be automatically freed
|
made, with the new code pointing to it. This memory will be automatically freed
|
||||||
when <b>pcre2_code_free()</b> is called. The yield of the function is NULL if
|
when <b>pcre2_code_free()</b> is called. The yield of the function is NULL if
|
||||||
|
|
|
@ -45,8 +45,8 @@ is <b>pcre2_match()</b>.) The arguments for this function are:
|
||||||
<i>wscount</i> Number of elements in the vector
|
<i>wscount</i> Number of elements in the vector
|
||||||
</pre>
|
</pre>
|
||||||
For <b>pcre2_dfa_match()</b>, a match context is needed only if you want to set
|
For <b>pcre2_dfa_match()</b>, a match context is needed only if you want to set
|
||||||
up a callout function. The <i>length</i> and <i>startoffset</i> values are code
|
up a callout function or specify the recursion limit. The <i>length</i> and
|
||||||
units, not characters. The options are:
|
<i>startoffset</i> values are code units, not characters. The options are:
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_ANCHORED Match only at the first position
|
PCRE2_ANCHORED Match only at the first position
|
||||||
PCRE2_NOTBOL Subject is not the beginning of a line
|
PCRE2_NOTBOL Subject is not the beginning of a line
|
||||||
|
|
|
@ -570,7 +570,7 @@ If JIT is being used, but the JIT compilation is not being done immediately,
|
||||||
(perhaps waiting to see if the pattern is used often enough) similar logic is
|
(perhaps waiting to see if the pattern is used often enough) similar logic is
|
||||||
required. JIT compilation updates a pointer within the compiled code block, so
|
required. JIT compilation updates a pointer within the compiled code block, so
|
||||||
a thread must gain unique write access to the pointer before calling
|
a thread must gain unique write access to the pointer before calling
|
||||||
<b>pcre2_jit_compile()</b>. Alternatively, <b>pcre2_code_copy()</b> or
|
<b>pcre2_jit_compile()</b>. Alternatively, <b>pcre2_code_copy()</b> or
|
||||||
<b>pcre2_code_copy_with_tables()</b> can be used to obtain a private copy of the
|
<b>pcre2_code_copy_with_tables()</b> can be used to obtain a private copy of the
|
||||||
compiled code.
|
compiled code.
|
||||||
</P>
|
</P>
|
||||||
|
@ -740,7 +740,7 @@ functions, <i>pcre2_match()</i> and <i>pcre2_dfa_match()</i>.
|
||||||
<br>
|
<br>
|
||||||
This parameter ajusts the limit, set when PCRE2 is built (default 250), on the
|
This parameter ajusts the limit, set when PCRE2 is built (default 250), on the
|
||||||
depth of parenthesis nesting in a pattern. This limit stops rogue patterns
|
depth of parenthesis nesting in a pattern. This limit stops rogue patterns
|
||||||
using up too much system stack when being compiled. The limit applies to
|
using up too much system stack when being compiled. The limit applies to
|
||||||
parentheses of all kinds, not just capturing parentheses.
|
parentheses of all kinds, not just capturing parentheses.
|
||||||
<b>int pcre2_set_compile_recursion_guard(pcre2_compile_context *<i>ccontext</i>,</b>
|
<b>int pcre2_set_compile_recursion_guard(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||||
<b> int (*<i>guard_function</i>)(uint32_t, void *), void *<i>user_data</i>);</b>
|
<b> int (*<i>guard_function</i>)(uint32_t, void *), void *<i>user_data</i>);</b>
|
||||||
|
@ -879,21 +879,23 @@ This limit is of use only if it is set smaller than <i>match_limit</i>.
|
||||||
Limiting the recursion depth limits the amount of system stack that can be
|
Limiting the recursion depth limits the amount of system stack that can be
|
||||||
used, or, when PCRE2 has been compiled to use memory on the heap instead of the
|
used, or, when PCRE2 has been compiled to use memory on the heap instead of the
|
||||||
stack, the amount of heap memory that can be used. This limit is not relevant,
|
stack, the amount of heap memory that can be used. This limit is not relevant,
|
||||||
and is ignored, when matching is done using JIT compiled code or by the
|
and is ignored, when matching is done using JIT compiled code. However, it is
|
||||||
<b>pcre2_dfa_match()</b> function.
|
supported by <b>pcre2_dfa_match()</b>, which uses recursive function calls less
|
||||||
|
frequently than <b>pcre2_match()</b>, but which can be caused to use a lot of
|
||||||
|
stack by a recursive pattern such as /(.)(?1)/ matched to a very long string.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The default value for <i>recursion_limit</i> can be set when PCRE2 is built; the
|
The default value for <i>recursion_limit</i> can be set when PCRE2 is built; the
|
||||||
default default is the same value as the default for <i>match_limit</i>. If the
|
default default is the same value as the default for <i>match_limit</i>. If the
|
||||||
limit is exceeded, <b>pcre2_match()</b> returns PCRE2_ERROR_RECURSIONLIMIT. A
|
limit is exceeded, <b>pcre2_match()</b> and <b>pcre2_dfa_match()</b> return
|
||||||
value for the recursion limit may also be supplied by an item at the start of a
|
PCRE2_ERROR_RECURSIONLIMIT. A value for the recursion limit may also be
|
||||||
pattern of the form
|
supplied by an item at the start of a pattern of the form
|
||||||
<pre>
|
<pre>
|
||||||
(*LIMIT_RECURSION=ddd)
|
(*LIMIT_RECURSION=ddd)
|
||||||
</pre>
|
</pre>
|
||||||
where ddd is a decimal number. However, such a setting is ignored unless ddd is
|
where ddd is a decimal number. However, such a setting is ignored unless ddd is
|
||||||
less than the limit set by the caller of <b>pcre2_match()</b> or, if no such
|
less than the limit set by the caller of <b>pcre2_match()</b> or
|
||||||
limit is set, less than the default.
|
<b>pcre2_dfa_match()</b> or, if no such limit is set, less than the default.
|
||||||
<b>int pcre2_set_recursion_memory_management(</b>
|
<b>int pcre2_set_recursion_memory_management(</b>
|
||||||
<b> pcre2_match_context *<i>mcontext</i>,</b>
|
<b> pcre2_match_context *<i>mcontext</i>,</b>
|
||||||
<b> void *(*<i>private_malloc</i>)(PCRE2_SIZE, void *),</b>
|
<b> void *(*<i>private_malloc</i>)(PCRE2_SIZE, void *),</b>
|
||||||
|
@ -1087,20 +1089,20 @@ if the code has been processed by the JIT compiler (see
|
||||||
<a href="#jitcompiling">below),</a>
|
<a href="#jitcompiling">below),</a>
|
||||||
the JIT information cannot be copied (because it is position-dependent).
|
the JIT information cannot be copied (because it is position-dependent).
|
||||||
The new copy can initially be used only for non-JIT matching, though it can be
|
The new copy can initially be used only for non-JIT matching, though it can be
|
||||||
passed to <b>pcre2_jit_compile()</b> if required.
|
passed to <b>pcre2_jit_compile()</b> if required.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <b>pcre2_code_copy()</b> function provides a way for individual threads in a
|
The <b>pcre2_code_copy()</b> function provides a way for individual threads in a
|
||||||
multithreaded application to acquire a private copy of shared compiled code.
|
multithreaded application to acquire a private copy of shared compiled code.
|
||||||
However, it does not make a copy of the character tables used by the compiled
|
However, it does not make a copy of the character tables used by the compiled
|
||||||
pattern; the new pattern code points to the same tables as the original code.
|
pattern; the new pattern code points to the same tables as the original code.
|
||||||
(See
|
(See
|
||||||
<a href="#jitcompiling">"Locale Support"</a>
|
<a href="#jitcompiling">"Locale Support"</a>
|
||||||
below for details of these character tables.) In many applications the same
|
below for details of these character tables.) In many applications the same
|
||||||
tables are used throughout, so this behaviour is appropriate. Nevertheless,
|
tables are used throughout, so this behaviour is appropriate. Nevertheless,
|
||||||
there are occasions when a copy of a compiled pattern and the relevant tables
|
there are occasions when a copy of a compiled pattern and the relevant tables
|
||||||
are needed. The <b>pcre2_code_copy_with_tables()</b> provides this facility.
|
are needed. The <b>pcre2_code_copy_with_tables()</b> provides this facility.
|
||||||
Copies of both the code and the tables are made, with the new code pointing to
|
Copies of both the code and the tables are made, with the new code pointing to
|
||||||
the new tables. The memory for the new tables is automatically freed when
|
the new tables. The memory for the new tables is automatically freed when
|
||||||
<b>pcre2_code_free()</b> is called for the new copy of the compiled code.
|
<b>pcre2_code_free()</b> is called for the new copy of the compiled code.
|
||||||
</P>
|
</P>
|
||||||
|
@ -1140,7 +1142,7 @@ NULL immediately. Otherwise, the variables to which these point are set to an
|
||||||
error code and an offset (number of code units) within the pattern,
|
error code and an offset (number of code units) within the pattern,
|
||||||
respectively, when <b>pcre2_compile()</b> returns NULL because a compilation
|
respectively, when <b>pcre2_compile()</b> returns NULL because a compilation
|
||||||
error has occurred. The values are not defined when compilation is successful
|
error has occurred. The values are not defined when compilation is successful
|
||||||
and <b>pcre2_compile()</b> returns a non-NULL value.
|
and <b>pcre2_compile()</b> returns a non-NULL value.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The value returned in <i>erroroffset</i> is an indication of where in the
|
The value returned in <i>erroroffset</i> is an indication of where in the
|
||||||
|
@ -1976,7 +1978,7 @@ Information about a successful or unsuccessful match is placed in a match
|
||||||
data block, which is an opaque structure that is accessed by function calls. In
|
data block, which is an opaque structure that is accessed by function calls. In
|
||||||
particular, the match data block contains a vector of offsets into the subject
|
particular, the match data block contains a vector of offsets into the subject
|
||||||
string that define the matched part of the subject and any substrings that were
|
string that define the matched part of the subject and any substrings that were
|
||||||
captured. This is know as the <i>ovector</i>.
|
captured. This is known as the <i>ovector</i>.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Before calling <b>pcre2_match()</b>, <b>pcre2_dfa_match()</b>, or
|
Before calling <b>pcre2_match()</b>, <b>pcre2_dfa_match()</b>, or
|
||||||
|
@ -3263,7 +3265,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC41" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC41" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 22 November 2016
|
Last updated: 23 December 2016
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2016 University of Cambridge.
|
Copyright © 1997-2016 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -380,15 +380,15 @@ scanning, in order to be able to output "before" and "after" lines when it
|
||||||
finds a match. The starting size of the buffer is controlled by a parameter
|
finds a match. The starting size of the buffer is controlled by a parameter
|
||||||
whose default value is 20K. The buffer itself is three times this size, but
|
whose default value is 20K. The buffer itself is three times this size, but
|
||||||
because of the way it is used for holding "before" lines, the longest line that
|
because of the way it is used for holding "before" lines, the longest line that
|
||||||
is guaranteed to be processable is the parameter size. If a longer line is
|
is guaranteed to be processable is the parameter size. If a longer line is
|
||||||
encountered, <b>pcre2grep</b> automatically expands the buffer, up to a
|
encountered, <b>pcre2grep</b> automatically expands the buffer, up to a
|
||||||
specified maximum size, whose default is 1M or the starting size, whichever is
|
specified maximum size, whose default is 1M or the starting size, whichever is
|
||||||
the larger. You can change the default parameter values by adding, for example,
|
the larger. You can change the default parameter values by adding, for example,
|
||||||
<pre>
|
<pre>
|
||||||
--with-pcre2grep-bufsize=51200
|
--with-pcre2grep-bufsize=51200
|
||||||
--with-pcre2grep-max-bufsize=2097152
|
--with-pcre2grep-max-bufsize=2097152
|
||||||
</pre>
|
</pre>
|
||||||
to the <b>configure</b> command. The caller of \fPpcre2grep\fP can override
|
to the <b>configure</b> command. The caller of \fPpcre2grep\fP can override
|
||||||
these values by using --buffer-size and --max-buffer-size on the command line.
|
these values by using --buffer-size and --max-buffer-size on the command line.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC18" href="#TOC1">PCRE2TEST OPTION FOR LIBREADLINE SUPPORT</a><br>
|
<br><a name="SEC18" href="#TOC1">PCRE2TEST OPTION FOR LIBREADLINE SUPPORT</a><br>
|
||||||
|
|
|
@ -64,7 +64,7 @@ For example, if PCRE2_AUTO_CALLOUT is used with the pattern
|
||||||
</pre>
|
</pre>
|
||||||
it is processed as if it were
|
it is processed as if it were
|
||||||
<pre>
|
<pre>
|
||||||
(?C255)A(?C3)B(?C255)
|
(?C255)A(?C3)B(?C255)
|
||||||
</pre>
|
</pre>
|
||||||
Here is a more complicated example:
|
Here is a more complicated example:
|
||||||
<pre>
|
<pre>
|
||||||
|
|
|
@ -133,8 +133,8 @@ an error is given at compile time.
|
||||||
<P>
|
<P>
|
||||||
14. Perl used to recognize comments in some places that PCRE2 does not, for
|
14. Perl used to recognize comments in some places that PCRE2 does not, for
|
||||||
example, between the ( and ? at the start of a subpattern. If the /x modifier
|
example, between the ( and ? at the start of a subpattern. If the /x modifier
|
||||||
is set, Perl allowed white space between ( and ? though the latest Perls give
|
is set, Perl allowed white space between ( and ? though the latest Perls give
|
||||||
an error (for a while it was just deprecated). There may still be some cases
|
an error (for a while it was just deprecated). There may still be some cases
|
||||||
where Perl behaves differently.
|
where Perl behaves differently.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -159,7 +159,7 @@ list is with respect to Perl 5.10:
|
||||||
<br>
|
<br>
|
||||||
(a) Although lookbehind assertions in PCRE2 must match fixed length strings,
|
(a) Although lookbehind assertions in PCRE2 must match fixed length strings,
|
||||||
each alternative branch of a lookbehind assertion can match a different length
|
each alternative branch of a lookbehind assertion can match a different length
|
||||||
of string. Perl requires them all to have the same length.
|
of string. Perl requires them all to have the same length.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
(b) From PCRE2 10.23, back references to groups of fixed length are supported
|
(b) From PCRE2 10.23, back references to groups of fixed length are supported
|
||||||
|
|
|
@ -91,7 +91,7 @@ if a line is too long and the buffer can no longer be expanded.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The block of memory that is actually used is three times the "buffer size", to
|
The block of memory that is actually used is three times the "buffer size", to
|
||||||
allow for buffering "before" and "after" lines. If the buffer size is too
|
allow for buffering "before" and "after" lines. If the buffer size is too
|
||||||
small, fewer than requested "before" and "after" lines may be output.
|
small, fewer than requested "before" and "after" lines may be output.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -178,8 +178,8 @@ Treat binary files as text. This is equivalent to
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-B</b> <i>number</i>, <b>--before-context=</b><i>number</i>
|
<b>-B</b> <i>number</i>, <b>--before-context=</b><i>number</i>
|
||||||
Output up to <i>number</i> lines of context before each matching line. Fewer
|
Output up to <i>number</i> lines of context before each matching line. Fewer
|
||||||
lines are output if the previous match or the start of the file is within
|
lines are output if the previous match or the start of the file is within
|
||||||
<i>number</i> lines, or if the processing buffer size has been set too small. If
|
<i>number</i> lines, or if the processing buffer size has been set too small. If
|
||||||
file names and/or line numbers are being output, a hyphen separator is used
|
file names and/or line numbers are being output, a hyphen separator is used
|
||||||
instead of a colon for the context lines. A line containing "--" is output
|
instead of a colon for the context lines. A line containing "--" is output
|
||||||
|
@ -202,8 +202,8 @@ return code.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>--buffer-size=</b><i>number</i>
|
<b>--buffer-size=</b><i>number</i>
|
||||||
Set the parameter that controls how much memory is obtained at the start of
|
Set the parameter that controls how much memory is obtained at the start of
|
||||||
processing for buffering files that are being scanned. See also
|
processing for buffering files that are being scanned. See also
|
||||||
<b>--max-buffer-size</b> below.
|
<b>--max-buffer-size</b> below.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -222,7 +222,7 @@ suppressed lines than the count (that is, the number of matches).
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
If no lines are selected, the number zero is output. If several files are are
|
If no lines are selected, the number zero is output. If several files are are
|
||||||
being scanned, a count is output for each of them and the <b>-t</b> option can
|
being scanned, a count is output for each of them and the <b>-t</b> option can
|
||||||
be used to cause a total to be output at the end. However, if the
|
be used to cause a total to be output at the end. However, if the
|
||||||
<b>--files-with-matches</b> option is also used, only those files whose counts
|
<b>--files-with-matches</b> option is also used, only those files whose counts
|
||||||
are greater than zero are listed. When <b>-c</b> is used, the <b>-A</b>,
|
are greater than zero are listed. When <b>-c</b> is used, the <b>-A</b>,
|
||||||
|
@ -245,13 +245,23 @@ because <b>pcre2grep</b> has to search for all possible matches in a line, not
|
||||||
just one, in order to colour them all.
|
just one, in order to colour them all.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
The colour that is used can be specified by setting the environment variable
|
The colour that is used can be specified by setting one of the environment
|
||||||
PCRE2GREP_COLOUR or PCRE2GREP_COLOR. If neither of these are set,
|
variables PCRE2GREP_COLOUR, PCRE2GREP_COLOR, PCREGREP_COLOUR, or
|
||||||
<b>pcre2grep</b> looks for GREP_COLOUR or GREP_COLOR. The value of the variable
|
PCREGREP_COLOR, which are checked in that order. If none of these are set,
|
||||||
should be a string of two numbers, separated by a semicolon. They are copied
|
<b>pcre2grep</b> looks for GREP_COLORS or GREP_COLOR (in that order). The value
|
||||||
directly into the control string for setting colour on a terminal, so it is
|
of the variable should be a string of two numbers, separated by a semicolon,
|
||||||
your responsibility to ensure that they make sense. If neither of the
|
except in the case of GREP_COLORS, which must start with "ms=" or "mt="
|
||||||
environment variables is set, the default is "1;31", which gives red.
|
followed by two semicolon-separated colours, terminated by the end of the
|
||||||
|
string or by a colon. If GREP_COLORS does not start with "ms=" or "mt=" it is
|
||||||
|
ignored, and GREP_COLOR is checked.
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
If the string obtained from one of the above variables contains any characters
|
||||||
|
other than semicolon or digits, the setting is ignored and the default colour
|
||||||
|
is used. The string is copied directly into the control string for setting
|
||||||
|
colour on a terminal, so it is your responsibility to ensure that the values
|
||||||
|
make sense. If no relevant environment variable is set, the default is "1;31",
|
||||||
|
which gives red.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-D</b> <i>action</i>, <b>--devices=</b><i>action</i>
|
<b>-D</b> <i>action</i>, <b>--devices=</b><i>action</i>
|
||||||
|
@ -353,7 +363,7 @@ tested before the file's patterns. However, no other pattern is taken from the
|
||||||
command line; all arguments are treated as the names of paths to be searched.
|
command line; all arguments are treated as the names of paths to be searched.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>--file-list</b>=<i>filename</i>
|
<b>--file-list</b>=<i>filename</i>
|
||||||
Read a list of files and/or directories that are to be scanned from the given
|
Read a list of files and/or directories that are to be scanned from the given
|
||||||
file, one per line. Trailing white space is removed from each line, and blank
|
file, one per line. Trailing white space is removed from each line, and blank
|
||||||
lines are ignored. These paths are processed before any that are listed on the
|
lines are ignored. These paths are processed before any that are listed on the
|
||||||
|
@ -518,8 +528,8 @@ when the PCRE2 library is compiled, with the default default being 10 million.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
\fB--max-buffer-size=<i>number</i>
|
\fB--max-buffer-size=<i>number</i>
|
||||||
This limits the expansion of the processing buffer, whose initial size can be
|
This limits the expansion of the processing buffer, whose initial size can be
|
||||||
set by <b>--buffer-size</b>. The maximum buffer size is silently forced to be no
|
set by <b>--buffer-size</b>. The maximum buffer size is silently forced to be no
|
||||||
smaller than the starting buffer size.
|
smaller than the starting buffer size.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -619,7 +629,7 @@ capturing parentheses do not exist in the pattern, or were not set in the
|
||||||
match, nothing is output unless the file name or line number are being output.
|
match, nothing is output unless the file name or line number are being output.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
If this option is given multiple times, multiple substrings are output for each
|
If this option is given multiple times, multiple substrings are output for each
|
||||||
match, in the order the options are given, and all on one line. For example,
|
match, in the order the options are given, and all on one line. For example,
|
||||||
-o3 -o1 -o3 causes the substrings matched by capturing parentheses 3 and 1 and
|
-o3 -o1 -o3 causes the substrings matched by capturing parentheses 3 and 1 and
|
||||||
then 3 again to be output. By default, there is no separator (but see the next
|
then 3 again to be output. By default, there is no separator (but see the next
|
||||||
|
@ -693,7 +703,7 @@ specified by any of the <b>--include</b> or <b>--exclude</b> options.
|
||||||
<P>
|
<P>
|
||||||
<b>-x</b>, <b>--line-regex</b>, <b>--line-regexp</b>
|
<b>-x</b>, <b>--line-regex</b>, <b>--line-regexp</b>
|
||||||
Force the patterns to be anchored (each must start matching at the beginning of
|
Force the patterns to be anchored (each must start matching at the beginning of
|
||||||
a line) and in addition, require them to match entire lines. In multiline mode
|
a line) and in addition, require them to match entire lines. In multiline mode
|
||||||
the match may be more than one line. This is equivalent to having \A and \Z
|
the match may be more than one line. This is equivalent to having \A and \Z
|
||||||
characters at the start and end of each alternative top-level branch in every
|
characters at the start and end of each alternative top-level branch in every
|
||||||
pattern. This option applies only to the patterns that are matched against the
|
pattern. This option applies only to the patterns that are matched against the
|
||||||
|
@ -773,12 +783,12 @@ character. Otherwise <b>pcre2grep</b> will assume that it has no data.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC10" href="#TOC1">CALLING EXTERNAL SCRIPTS</a><br>
|
<br><a name="SEC10" href="#TOC1">CALLING EXTERNAL SCRIPTS</a><br>
|
||||||
<P>
|
<P>
|
||||||
On non-Windows systems, <b>pcre2grep</b> has, by default, support for calling
|
<b>pcre2grep</b> has, by default, support for calling external programs or
|
||||||
external programs or scripts during matching by making use of PCRE2's callout
|
scripts during matching by making use of PCRE2's callout facility. However,
|
||||||
facility. However, this support can be disabled when <b>pcre2grep</b> is built.
|
this support can be disabled when <b>pcre2grep</b> is built. You can find out
|
||||||
You can find out whether your binary has support for callouts by running it
|
whether your binary has support for callouts by running it with the <b>--help</b>
|
||||||
with the <b>--help</b> option. If the support is not enabled, all callouts in
|
option. If the support is not enabled, all callouts in patterns are ignored by
|
||||||
patterns are ignored by <b>pcre2grep</b>.
|
<b>pcre2grep</b>.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
A callout in a PCRE2 pattern is of the form (?C<arg>) where the argument is
|
A callout in a PCRE2 pattern is of the form (?C<arg>) where the argument is
|
||||||
|
@ -860,7 +870,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC15" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC15" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 31 October 2016
|
Last updated: 31 December 2016
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2016 University of Cambridge.
|
Copyright © 1997-2016 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -62,8 +62,8 @@ There is no limit to the number of parenthesized subpatterns, but there can be
|
||||||
no more than 65535 capturing subpatterns. There is, however, a limit to the
|
no more than 65535 capturing subpatterns. There is, however, a limit to the
|
||||||
depth of nesting of parenthesized subpatterns of all kinds. This is imposed in
|
depth of nesting of parenthesized subpatterns of all kinds. This is imposed in
|
||||||
order to limit the amount of system stack used at compile time. The default
|
order to limit the amount of system stack used at compile time. The default
|
||||||
limit can be specified when PCRE2 is built; the default default is 250. An
|
limit can be specified when PCRE2 is built; the default default is 250. An
|
||||||
application can change this limit by calling pcre2_set_parens_nest_limit() to
|
application can change this limit by calling pcre2_set_parens_nest_limit() to
|
||||||
set the limit in a compile context.
|
set the limit in a compile context.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -76,7 +76,7 @@ is 255 code units for the 8-bit library and 65535 code units for the 16-bit and
|
||||||
32-bit libraries.
|
32-bit libraries.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The maximum length of a string argument to a callout is the largest number a
|
The maximum length of a string argument to a callout is the largest number a
|
||||||
32-bit unsigned integer can hold.
|
32-bit unsigned integer can hold.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
|
|
|
@ -190,6 +190,12 @@ be less than the value set (or defaulted) by the caller of <b>pcre2_match()</b>
|
||||||
for it to have any effect. In other words, the pattern writer can lower the
|
for it to have any effect. In other words, the pattern writer can lower the
|
||||||
limits set by the programmer, but not raise them. If there is more than one
|
limits set by the programmer, but not raise them. If there is more than one
|
||||||
setting of one of these limits, the lower value is used.
|
setting of one of these limits, the lower value is used.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The match limit is used (but in a different way) when JIT is being used, but it
|
||||||
|
is not relevant, and is ignored, when matching with <b>pcre2_dfa_match()</b>.
|
||||||
|
However, the recursion limit is relevant for DFA matching, which does use some
|
||||||
|
function recursion, in particular, for recursions within the pattern.
|
||||||
<a name="newlines"></a></P>
|
<a name="newlines"></a></P>
|
||||||
<br><b>
|
<br><b>
|
||||||
Newline conventions
|
Newline conventions
|
||||||
|
@ -1347,10 +1353,10 @@ or immediately after a range. For example, [b-d-z] matches letters in the range
|
||||||
b to d, a hyphen character, or z.
|
b to d, a hyphen character, or z.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Perl treats a hyphen as a literal if it appears before a POSIX class (see
|
Perl treats a hyphen as a literal if it appears before or after a POSIX class
|
||||||
below) or a character type escape such as as \d, but gives a warning in its
|
(see below) or a character type escape such as as \d, but gives a warning in
|
||||||
warning mode, as this is most likely a user error. As PCRE2 has no facility for
|
its warning mode, as this is most likely a user error. As PCRE2 has no facility
|
||||||
warning, an error is given in these cases.
|
for warning, an error is given in these cases.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
It is not possible to have the literal character "]" as the end character of a
|
It is not possible to have the literal character "]" as the end character of a
|
||||||
|
@ -2087,7 +2093,7 @@ subpattern is possible using named parentheses (see below).
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Another way of avoiding the ambiguity inherent in the use of digits following a
|
Another way of avoiding the ambiguity inherent in the use of digits following a
|
||||||
backslash is to use the \g escape sequence. This escape must be followed by a
|
backslash is to use the \g escape sequence. This escape must be followed by a
|
||||||
signed or unsigned number, optionally enclosed in braces. These examples are
|
signed or unsigned number, optionally enclosed in braces. These examples are
|
||||||
all identical:
|
all identical:
|
||||||
<pre>
|
<pre>
|
||||||
|
@ -2108,8 +2114,8 @@ can be helpful in long patterns, and also in patterns that are created by
|
||||||
joining together fragments that contain references within themselves.
|
joining together fragments that contain references within themselves.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The sequence \g{+1} is a reference to the next capturing subpattern. This kind
|
The sequence \g{+1} is a reference to the next capturing subpattern. This kind
|
||||||
of forward reference can be useful it patterns that repeat. Perl does not
|
of forward reference can be useful it patterns that repeat. Perl does not
|
||||||
support the use of + in this way.
|
support the use of + in this way.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -2212,11 +2218,11 @@ capturing is carried out only for positive assertions. (Perl sometimes, but not
|
||||||
always, does do capturing in negative assertions.)
|
always, does do capturing in negative assertions.)
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
WARNING: If a positive assertion containing one or more capturing subpatterns
|
WARNING: If a positive assertion containing one or more capturing subpatterns
|
||||||
succeeds, but failure to match later in the pattern causes backtracking over
|
succeeds, but failure to match later in the pattern causes backtracking over
|
||||||
this assertion, the captures within the assertion are reset only if no higher
|
this assertion, the captures within the assertion are reset only if no higher
|
||||||
numbered captures are already set. This is, unfortunately, a fundamental
|
numbered captures are already set. This is, unfortunately, a fundamental
|
||||||
limitation of the current implementation; it may get removed in a future
|
limitation of the current implementation; it may get removed in a future
|
||||||
reworking.
|
reworking.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -2478,7 +2484,7 @@ Checking for a used subpattern by name
|
||||||
<P>
|
<P>
|
||||||
Perl uses the syntax (?(<name>)...) or (?('name')...) to test for a used
|
Perl uses the syntax (?(<name>)...) or (?('name')...) to test for a used
|
||||||
subpattern by name. For compatibility with earlier versions of PCRE1, which had
|
subpattern by name. For compatibility with earlier versions of PCRE1, which had
|
||||||
this facility before Perl, the syntax (?(name)...) is also recognized. Note,
|
this facility before Perl, the syntax (?(name)...) is also recognized. Note,
|
||||||
however, that undelimited names consisting of the letter R followed by digits
|
however, that undelimited names consisting of the letter R followed by digits
|
||||||
are ambiguous (see the following section).
|
are ambiguous (see the following section).
|
||||||
</P>
|
</P>
|
||||||
|
@ -2508,7 +2514,7 @@ If a condition is the string (R), and there is no subpattern with the name R,
|
||||||
the condition is true if matching is currently in a recursion or subroutine
|
the condition is true if matching is currently in a recursion or subroutine
|
||||||
call to the whole pattern or any subpattern. If digits follow the letter R, and
|
call to the whole pattern or any subpattern. If digits follow the letter R, and
|
||||||
there is no subpattern with that name, the condition is true if the most recent
|
there is no subpattern with that name, the condition is true if the most recent
|
||||||
call is into a subpattern with the given number, which must exist somewhere in
|
call is into a subpattern with the given number, which must exist somewhere in
|
||||||
the overall pattern. This is a contrived example that is equivalent to a+b:
|
the overall pattern. This is a contrived example that is equivalent to a+b:
|
||||||
<pre>
|
<pre>
|
||||||
((?(R1)a+|(?1)b))
|
((?(R1)a+|(?1)b))
|
||||||
|
@ -2523,11 +2529,11 @@ If a name preceded by ampersand follows the letter R, for example:
|
||||||
<pre>
|
<pre>
|
||||||
(?(R&name)...)
|
(?(R&name)...)
|
||||||
</pre>
|
</pre>
|
||||||
the condition is true if the most recent recursion is into a subpattern of that
|
the condition is true if the most recent recursion is into a subpattern of that
|
||||||
name (which must exist within the pattern).
|
name (which must exist within the pattern).
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
This condition does not check the entire recursion stack. It tests only the
|
This condition does not check the entire recursion stack. It tests only the
|
||||||
current level. If the name used in a condition of this kind is a duplicate, the
|
current level. If the name used in a condition of this kind is a duplicate, the
|
||||||
test is applied to all subpatterns of the same name, and is true if any one of
|
test is applied to all subpatterns of the same name, and is true if any one of
|
||||||
them is the most recent recursion.
|
them is the most recent recursion.
|
||||||
|
@ -3008,21 +3014,21 @@ depending on whether or not a name is present.
|
||||||
By default, for compatibility with Perl, a name is any sequence of characters
|
By default, for compatibility with Perl, a name is any sequence of characters
|
||||||
that does not include a closing parenthesis. The name is not processed in
|
that does not include a closing parenthesis. The name is not processed in
|
||||||
any way, and it is not possible to include a closing parenthesis in the name.
|
any way, and it is not possible to include a closing parenthesis in the name.
|
||||||
This can be changed by setting the PCRE2_ALT_VERBNAMES option, but the result
|
This can be changed by setting the PCRE2_ALT_VERBNAMES option, but the result
|
||||||
is no longer Perl-compatible.
|
is no longer Perl-compatible.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
When PCRE2_ALT_VERBNAMES is set, backslash processing is applied to verb names
|
When PCRE2_ALT_VERBNAMES is set, backslash processing is applied to verb names
|
||||||
and only an unescaped closing parenthesis terminates the name. However, the
|
and only an unescaped closing parenthesis terminates the name. However, the
|
||||||
only backslash items that are permitted are \Q, \E, and sequences such as
|
only backslash items that are permitted are \Q, \E, and sequences such as
|
||||||
\x{100} that define character code points. Character type escapes such as \d
|
\x{100} that define character code points. Character type escapes such as \d
|
||||||
are faulted.
|
are faulted.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
A closing parenthesis can be included in a name either as \) or between \Q
|
A closing parenthesis can be included in a name either as \) or between \Q
|
||||||
and \E. In addition to backslash processing, if the PCRE2_EXTENDED option is
|
and \E. In addition to backslash processing, if the PCRE2_EXTENDED option is
|
||||||
also set, unescaped whitespace in verb names is skipped, and #-comments are
|
also set, unescaped whitespace in verb names is skipped, and #-comments are
|
||||||
recognized, exactly as in the rest of the pattern. PCRE2_EXTENDED does not
|
recognized, exactly as in the rest of the pattern. PCRE2_EXTENDED does not
|
||||||
affect verb names unless PCRE2_ALT_VERBNAMES is also set.
|
affect verb names unless PCRE2_ALT_VERBNAMES is also set.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -3446,7 +3452,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC30" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC30" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 23 October 2016
|
Last updated: 27 December 2016
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2016 University of Cambridge.
|
Copyright © 1997-2016 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -57,12 +57,13 @@ assertion and "once-only" subpatterns, which are handled like subroutine calls.
|
||||||
Normally, these are never very deep, and the limit on the complexity of
|
Normally, these are never very deep, and the limit on the complexity of
|
||||||
<b>pcre2_dfa_match()</b> is controlled by the amount of workspace it is given.
|
<b>pcre2_dfa_match()</b> is controlled by the amount of workspace it is given.
|
||||||
However, it is possible to write patterns with runaway infinite recursions;
|
However, it is possible to write patterns with runaway infinite recursions;
|
||||||
such patterns will cause <b>pcre2_dfa_match()</b> to run out of stack. At
|
such patterns will cause <b>pcre2_dfa_match()</b> to run out of stack unless a
|
||||||
present, there is no protection against this.
|
limit is applied (see below).
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The comments that follow do NOT apply to <b>pcre2_dfa_match()</b>; they are
|
The comments in the next three sections do not apply to
|
||||||
relevant only for <b>pcre2_match()</b> without the JIT optimization.
|
<b>pcre2_dfa_match()</b>; they are relevant only for <b>pcre2_match()</b> without
|
||||||
|
the JIT optimization.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
Reducing <b>pcre2_match()</b>'s stack usage
|
Reducing <b>pcre2_match()</b>'s stack usage
|
||||||
|
@ -115,7 +116,7 @@ entitled
|
||||||
in the
|
in the
|
||||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||||
documentation. Since the block sizes are always the same, it may be possible to
|
documentation. Since the block sizes are always the same, it may be possible to
|
||||||
implement customized a memory handler that is more efficient than the standard
|
implement a customized memory handler that is more efficient than the standard
|
||||||
function. The memory blocks obtained for this purpose are retained and re-used
|
function. The memory blocks obtained for this purpose are retained and re-used
|
||||||
if possible while <b>pcre2_match()</b> is running. They are all freed just
|
if possible while <b>pcre2_match()</b> is running. They are all freed just
|
||||||
before it exits.
|
before it exits.
|
||||||
|
@ -151,6 +152,15 @@ pattern to match. This is done by calling <b>pcre2_match()</b> repeatedly with
|
||||||
different limits.
|
different limits.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
|
Limiting <b>pcre2_dfa_match()</b>'s stack usage
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
The recursion limit, as described above for <b>pcre2_match()</b>, also applies
|
||||||
|
to <b>pcre2_dfa_match()</b>, whose use of recursive function calls for
|
||||||
|
recursions in the pattern can lead to runaway stack usage. The non-recursive
|
||||||
|
match limit is not relevant for DFA matching, and is ignored.
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
Changing stack size in Unix-like systems
|
Changing stack size in Unix-like systems
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -198,9 +208,9 @@ Cambridge, England.
|
||||||
REVISION
|
REVISION
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 21 November 2014
|
Last updated: 23 December 2016
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2014 University of Cambridge.
|
Copyright © 1997-2016 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||||
|
|
|
@ -451,9 +451,10 @@ appear.
|
||||||
(*UCP) set PCRE2_UCP (use Unicode properties for \d etc)
|
(*UCP) set PCRE2_UCP (use Unicode properties for \d etc)
|
||||||
</pre>
|
</pre>
|
||||||
Note that LIMIT_MATCH and LIMIT_RECURSION can only reduce the value of the
|
Note that LIMIT_MATCH and LIMIT_RECURSION can only reduce the value of the
|
||||||
limits set by the caller of pcre2_match(), not increase them. The application
|
limits set by the caller of <b>pcre2_match()</b> or <b>pcre2_dfa_match()</b>, not
|
||||||
can lock out the use of (*UTF) and (*UCP) by setting the PCRE2_NEVER_UTF or
|
increase them. The application can lock out the use of (*UTF) and (*UCP) by
|
||||||
PCRE2_NEVER_UCP options, respectively, at compile time.
|
setting the PCRE2_NEVER_UTF or PCRE2_NEVER_UCP options, respectively, at
|
||||||
|
compile time.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC17" href="#TOC1">NEWLINE CONVENTION</a><br>
|
<br><a name="SEC17" href="#TOC1">NEWLINE CONVENTION</a><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -494,7 +495,7 @@ Each top-level branch of a look behind must be of a fixed length.
|
||||||
\g{n} reference by number
|
\g{n} reference by number
|
||||||
\g+n relative reference by number (PCRE2 extension)
|
\g+n relative reference by number (PCRE2 extension)
|
||||||
\g-n relative reference by number
|
\g-n relative reference by number
|
||||||
\g{+n} relative reference by number (PCRE2 extension)
|
\g{+n} relative reference by number (PCRE2 extension)
|
||||||
\g{-n} relative reference by number
|
\g{-n} relative reference by number
|
||||||
\k<name> reference by name (Perl)
|
\k<name> reference by name (Perl)
|
||||||
\k'name' reference by name (Perl)
|
\k'name' reference by name (Perl)
|
||||||
|
@ -541,7 +542,7 @@ Each top-level branch of a look behind must be of a fixed length.
|
||||||
(?(VERSION[>]=n.m) test PCRE2 version
|
(?(VERSION[>]=n.m) test PCRE2 version
|
||||||
(?(assert) assertion condition
|
(?(assert) assertion condition
|
||||||
</pre>
|
</pre>
|
||||||
Note the ambiguity of (?(R) and (?(Rn) which might be named reference
|
Note the ambiguity of (?(R) and (?(Rn) which might be named reference
|
||||||
conditions or recursion tests. Such a condition is interpreted as a reference
|
conditions or recursion tests. Such a condition is interpreted as a reference
|
||||||
condition if the relevant named group exists.
|
condition if the relevant named group exists.
|
||||||
</P>
|
</P>
|
||||||
|
@ -595,7 +596,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC27" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC27" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 28 September 2016
|
Last updated: 23 December 2016
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2016 University of Cambridge.
|
Copyright © 1997-2016 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -114,7 +114,7 @@ to the library. For subject lines, backslash escapes can be used. In addition,
|
||||||
when the <b>utf</b> modifier (see
|
when the <b>utf</b> modifier (see
|
||||||
<a href="#optionmodifiers">"Setting compilation options"</a>
|
<a href="#optionmodifiers">"Setting compilation options"</a>
|
||||||
below) is set, the pattern and any following subject lines are interpreted as
|
below) is set, the pattern and any following subject lines are interpreted as
|
||||||
UTF-8 strings and translated to UTF-16 or UTF-32 as appropriate.
|
UTF-8 strings and translated to UTF-16 or UTF-32 as appropriate.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
For non-UTF testing of wide characters, the <b>utf8_input</b> modifier can be
|
For non-UTF testing of wide characters, the <b>utf8_input</b> modifier can be
|
||||||
|
@ -153,8 +153,13 @@ the 32-bit library has been built, this is the default. If the 32-bit library
|
||||||
has not been built, this option causes an error.
|
has not been built, this option causes an error.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
<b>-ac</b>
|
||||||
|
Behave as if each pattern has the <b>auto_callout</b> modifier, that is, insert
|
||||||
|
automatic callouts into every pattern that is compiled.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
<b>-b</b>
|
<b>-b</b>
|
||||||
Behave as if each pattern has the <b>/fullbincode</b> modifier; the full
|
Behave as if each pattern has the <b>fullbincode</b> modifier; the full
|
||||||
internal binary form of the pattern is output after compilation.
|
internal binary form of the pattern is output after compilation.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -220,7 +225,7 @@ Output a brief summary these options and then exit.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>-i</b>
|
<b>-i</b>
|
||||||
Behave as if each pattern has the <b>/info</b> modifier; information about the
|
Behave as if each pattern has the <b>info</b> modifier; information about the
|
||||||
compiled pattern is given after compilation.
|
compiled pattern is given after compilation.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -582,7 +587,7 @@ for a description of their effects.
|
||||||
As well as turning on the PCRE2_UTF option, the <b>utf</b> modifier causes all
|
As well as turning on the PCRE2_UTF option, the <b>utf</b> modifier causes all
|
||||||
non-printing characters in output strings to be printed using the \x{hh...}
|
non-printing characters in output strings to be printed using the \x{hh...}
|
||||||
notation. Otherwise, those less than 0x100 are output in hex without the curly
|
notation. Otherwise, those less than 0x100 are output in hex without the curly
|
||||||
brackets. Setting <b>utf</b> in 16-bit or 32-bit mode also causes pattern and
|
brackets. Setting <b>utf</b> in 16-bit or 32-bit mode also causes pattern and
|
||||||
subject strings to be translated to UTF-16 or UTF-32, respectively, before
|
subject strings to be translated to UTF-16 or UTF-32, respectively, before
|
||||||
being passed to library functions.
|
being passed to library functions.
|
||||||
<a name="controlmodifiers"></a></P>
|
<a name="controlmodifiers"></a></P>
|
||||||
|
@ -615,8 +620,8 @@ about the pattern:
|
||||||
pushcopy push a copy onto the stack
|
pushcopy push a copy onto the stack
|
||||||
stackguard=<number> test the stackguard feature
|
stackguard=<number> test the stackguard feature
|
||||||
tables=[0|1|2] select internal tables
|
tables=[0|1|2] select internal tables
|
||||||
use_length do not zero-terminate the pattern
|
use_length do not zero-terminate the pattern
|
||||||
utf8_input treat input as UTF-8
|
utf8_input treat input as UTF-8
|
||||||
</pre>
|
</pre>
|
||||||
The effects of these modifiers are described in the following sections.
|
The effects of these modifiers are described in the following sections.
|
||||||
</P>
|
</P>
|
||||||
|
@ -705,7 +710,7 @@ Specifying the pattern's length
|
||||||
By default, patterns are passed to the compiling functions as zero-terminated
|
By default, patterns are passed to the compiling functions as zero-terminated
|
||||||
strings. When using the POSIX wrapper API, there is no other option. However,
|
strings. When using the POSIX wrapper API, there is no other option. However,
|
||||||
when using PCRE2's native API, patterns can be passed by length instead of
|
when using PCRE2's native API, patterns can be passed by length instead of
|
||||||
being zero-terminated. The <b>use_length</b> modifier causes this to happen.
|
being zero-terminated. The <b>use_length</b> modifier causes this to happen.
|
||||||
Using a length happens automatically (whether or not <b>use_length</b> is set)
|
Using a length happens automatically (whether or not <b>use_length</b> is set)
|
||||||
when <b>hex</b> is set, because patterns specified in hexadecimal may contain
|
when <b>hex</b> is set, because patterns specified in hexadecimal may contain
|
||||||
binary zeros.
|
binary zeros.
|
||||||
|
@ -733,17 +738,17 @@ the delimiter within a substring. The <b>hex</b> and <b>expand</b> modifiers are
|
||||||
mutually exclusive.
|
mutually exclusive.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The POSIX API cannot be used with patterns specified in hexadecimal because
|
The POSIX API cannot be used with patterns specified in hexadecimal because
|
||||||
they may contain binary zeros, which conflicts with <b>regcomp()</b>'s
|
they may contain binary zeros, which conflicts with <b>regcomp()</b>'s
|
||||||
requirement for a zero-terminated string. Such patterns are always passed to
|
requirement for a zero-terminated string. Such patterns are always passed to
|
||||||
<b>pcre2_compile()</b> as a string with a length, not as zero-terminated.
|
<b>pcre2_compile()</b> as a string with a length, not as zero-terminated.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
Specifying wide characters in 16-bit and 32-bit modes
|
Specifying wide characters in 16-bit and 32-bit modes
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
In 16-bit and 32-bit modes, all input is automatically treated as UTF-8 and
|
In 16-bit and 32-bit modes, all input is automatically treated as UTF-8 and
|
||||||
translated to UTF-16 or UTF-32 when the <b>utf</b> modifier is set. For testing
|
translated to UTF-16 or UTF-32 when the <b>utf</b> modifier is set. For testing
|
||||||
the 16-bit and 32-bit libraries in non-UTF mode, the <b>utf8_input</b> modifier
|
the 16-bit and 32-bit libraries in non-UTF mode, the <b>utf8_input</b> modifier
|
||||||
can be used. It is mutually exclusive with <b>utf</b>. Input lines are
|
can be used. It is mutually exclusive with <b>utf</b>. Input lines are
|
||||||
interpreted as UTF-8 as a means of specifying wide characters. More details are
|
interpreted as UTF-8 as a means of specifying wide characters. More details are
|
||||||
|
@ -806,7 +811,7 @@ modes are to be compiled:
|
||||||
2 compile JIT code for soft partial matching
|
2 compile JIT code for soft partial matching
|
||||||
4 compile JIT code for hard partial matching
|
4 compile JIT code for hard partial matching
|
||||||
</pre>
|
</pre>
|
||||||
The possible values for the <b>/jit</b> modifier are therefore:
|
The possible values for the <b>jit</b> modifier are therefore:
|
||||||
<pre>
|
<pre>
|
||||||
0 disable JIT
|
0 disable JIT
|
||||||
1 normal matching only
|
1 normal matching only
|
||||||
|
@ -852,14 +857,14 @@ code was actually used in the match.
|
||||||
Setting a locale
|
Setting a locale
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
The <b>/locale</b> modifier must specify the name of a locale, for example:
|
The <b>locale</b> modifier must specify the name of a locale, for example:
|
||||||
<pre>
|
<pre>
|
||||||
/pattern/locale=fr_FR
|
/pattern/locale=fr_FR
|
||||||
</pre>
|
</pre>
|
||||||
The given locale is set, <b>pcre2_maketables()</b> is called to build a set of
|
The given locale is set, <b>pcre2_maketables()</b> is called to build a set of
|
||||||
character tables for the locale, and this is then passed to
|
character tables for the locale, and this is then passed to
|
||||||
<b>pcre2_compile()</b> when compiling the regular expression. The same tables
|
<b>pcre2_compile()</b> when compiling the regular expression. The same tables
|
||||||
are used when matching the following subject lines. The <b>/locale</b> modifier
|
are used when matching the following subject lines. The <b>locale</b> modifier
|
||||||
applies only to the pattern on which it appears, but can be given in a
|
applies only to the pattern on which it appears, but can be given in a
|
||||||
<b>#pattern</b> command if a default is needed. Setting a locale and alternate
|
<b>#pattern</b> command if a default is needed. Setting a locale and alternate
|
||||||
character tables are mutually exclusive.
|
character tables are mutually exclusive.
|
||||||
|
@ -868,7 +873,7 @@ character tables are mutually exclusive.
|
||||||
Showing pattern memory
|
Showing pattern memory
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
The <b>/memory</b> modifier causes the size in bytes of the memory used to hold
|
The <b>memory</b> modifier causes the size in bytes of the memory used to hold
|
||||||
the compiled pattern to be output. This does not include the size of the
|
the compiled pattern to be output. This does not include the size of the
|
||||||
<b>pcre2_code</b> block; it is just the actual compiled data. If the pattern is
|
<b>pcre2_code</b> block; it is just the actual compiled data. If the pattern is
|
||||||
subsequently passed to the JIT compiler, the size of the JIT compiled code is
|
subsequently passed to the JIT compiler, the size of the JIT compiled code is
|
||||||
|
@ -937,7 +942,7 @@ an error.
|
||||||
Testing the stack guard feature
|
Testing the stack guard feature
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
The <b>/stackguard</b> modifier is used to test the use of
|
The <b>stackguard</b> modifier is used to test the use of
|
||||||
<b>pcre2_set_compile_recursion_guard()</b>, a function that is provided to
|
<b>pcre2_set_compile_recursion_guard()</b>, a function that is provided to
|
||||||
enable stack availability to be checked during compilation (see the
|
enable stack availability to be checked during compilation (see the
|
||||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||||
|
@ -952,7 +957,7 @@ be aborted.
|
||||||
Using alternative character tables
|
Using alternative character tables
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
The value specified for the <b>/tables</b> modifier must be one of the digits 0,
|
The value specified for the <b>tables</b> modifier must be one of the digits 0,
|
||||||
1, or 2. It causes a specific set of built-in character tables to be passed to
|
1, or 2. It causes a specific set of built-in character tables to be passed to
|
||||||
<b>pcre2_compile()</b>. This is used in the PCRE2 tests to check behaviour with
|
<b>pcre2_compile()</b>. This is used in the PCRE2 tests to check behaviour with
|
||||||
different character tables. The digit specifies the tables as follows:
|
different character tables. The digit specifies the tables as follows:
|
||||||
|
@ -1042,7 +1047,7 @@ The partial matching modifiers are provided with abbreviations because they
|
||||||
appear frequently in tests.
|
appear frequently in tests.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If the <b>/posix</b> modifier was present on the pattern, causing the POSIX
|
If the <b>posix</b> modifier was present on the pattern, causing the POSIX
|
||||||
wrapper API to be used, the only option-setting modifiers that have any effect
|
wrapper API to be used, the only option-setting modifiers that have any effect
|
||||||
are <b>notbol</b>, <b>notempty</b>, and <b>noteol</b>, causing REG_NOTBOL,
|
are <b>notbol</b>, <b>notempty</b>, and <b>noteol</b>, causing REG_NOTBOL,
|
||||||
REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to <b>regexec()</b>.
|
REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to <b>regexec()</b>.
|
||||||
|
@ -1064,6 +1069,7 @@ pattern.
|
||||||
altglobal alternative global matching
|
altglobal alternative global matching
|
||||||
callout_capture show captures at callout time
|
callout_capture show captures at callout time
|
||||||
callout_data=<n> set a value to pass via callouts
|
callout_data=<n> set a value to pass via callouts
|
||||||
|
callout_error=<n>[:<m>] control callout error
|
||||||
callout_fail=<n>[:<m>] control callout failure
|
callout_fail=<n>[:<m>] control callout failure
|
||||||
callout_none do not supply a callout function
|
callout_none do not supply a callout function
|
||||||
copy=<number or name> copy captured substring
|
copy=<number or name> copy captured substring
|
||||||
|
@ -1159,15 +1165,22 @@ Testing callouts
|
||||||
<P>
|
<P>
|
||||||
A callout function is supplied when <b>pcre2test</b> calls the library matching
|
A callout function is supplied when <b>pcre2test</b> calls the library matching
|
||||||
functions, unless <b>callout_none</b> is specified. If <b>callout_capture</b> is
|
functions, unless <b>callout_none</b> is specified. If <b>callout_capture</b> is
|
||||||
set, the current captured groups are output when a callout occurs.
|
set, the current captured groups are output when a callout occurs. The default
|
||||||
|
return from the callout function is zero, which allows matching to continue.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <b>callout_fail</b> modifier can be given one or two numbers. If there is
|
The <b>callout_fail</b> modifier can be given one or two numbers. If there is
|
||||||
only one number, 1 is returned instead of 0 when a callout of that number is
|
only one number, 1 is returned instead of 0 (causing matching to backtrack)
|
||||||
reached. If two numbers are given, 1 is returned when callout <n> is reached
|
when a callout of that number is reached. If two numbers (<n>:<m>) are given, 1
|
||||||
for the <m>th time. Note that callouts with string arguments are always given
|
is returned when callout <n> is reached and there have been at least <m>
|
||||||
the number zero. See "Callouts" below for a description of the output when a
|
callouts. The <b>callout_error</b> modifier is similar, except that
|
||||||
callout it taken.
|
PCRE2_ERROR_CALLOUT is returned, causing the entire matching process to be
|
||||||
|
aborted. If both these modifiers are set for the same callout number,
|
||||||
|
<b>callout_error</b> takes precedence.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
Note that callouts with string arguments are always given the number zero. See
|
||||||
|
"Callouts" below for a description of the output when a callout it taken.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <b>callout_data</b> modifier can be given an unsigned or a negative number.
|
The <b>callout_data</b> modifier can be given an unsigned or a negative number.
|
||||||
|
@ -1180,7 +1193,7 @@ Finding all matches in a string
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
Searching for all possible matches within a subject can be requested by the
|
Searching for all possible matches within a subject can be requested by the
|
||||||
<b>global</b> or <b>/altglobal</b> modifier. After finding a match, the matching
|
<b>global</b> or <b>altglobal</b> modifier. After finding a match, the matching
|
||||||
function is called again to search the remainder of the subject. The difference
|
function is called again to search the remainder of the subject. The difference
|
||||||
between <b>global</b> and <b>altglobal</b> is that the former uses the
|
between <b>global</b> and <b>altglobal</b> is that the former uses the
|
||||||
<i>start_offset</i> argument to <b>pcre2_match()</b> or <b>pcre2_dfa_match()</b>
|
<i>start_offset</i> argument to <b>pcre2_match()</b> or <b>pcre2_dfa_match()</b>
|
||||||
|
@ -1480,7 +1493,7 @@ unset substring is shown as "<unset>", as for the second data line.
|
||||||
If the strings contain any non-printing characters, they are output as \xhh
|
If the strings contain any non-printing characters, they are output as \xhh
|
||||||
escapes if the value is less than 256 and UTF mode is not set. Otherwise they
|
escapes if the value is less than 256 and UTF mode is not set. Otherwise they
|
||||||
are output as \x{hh...} escapes. See below for the definition of non-printing
|
are output as \x{hh...} escapes. See below for the definition of non-printing
|
||||||
characters. If the <b>/aftertext</b> modifier is set, the output for substring
|
characters. If the <b>aftertext</b> modifier is set, the output for substring
|
||||||
0 is followed by the the rest of the subject string, identified by "0+" like
|
0 is followed by the the rest of the subject string, identified by "0+" like
|
||||||
this:
|
this:
|
||||||
<pre>
|
<pre>
|
||||||
|
@ -1673,7 +1686,7 @@ therefore shown as hex escapes.
|
||||||
<P>
|
<P>
|
||||||
When <b>pcre2test</b> is outputting text that is a matched part of a subject
|
When <b>pcre2test</b> is outputting text that is a matched part of a subject
|
||||||
string, it behaves in the same way, unless a different locale has been set for
|
string, it behaves in the same way, unless a different locale has been set for
|
||||||
the pattern (using the <b>/locale</b> modifier). In this case, the
|
the pattern (using the <b>locale</b> modifier). In this case, the
|
||||||
<b>isprint()</b> function is used to distinguish printing and non-printing
|
<b>isprint()</b> function is used to distinguish printing and non-printing
|
||||||
characters.
|
characters.
|
||||||
<a name="saverestore"></a></P>
|
<a name="saverestore"></a></P>
|
||||||
|
@ -1766,7 +1779,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 04 November 2016
|
Last updated: 28 December 2016
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2016 University of Cambridge.
|
Copyright © 1997-2016 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
2023
doc/pcre2.txt
2023
doc/pcre2.txt
File diff suppressed because it is too large
Load Diff
|
@ -15,7 +15,7 @@ PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.sp
|
.sp
|
||||||
This function makes a copy of the memory used for a compiled pattern, excluding
|
This function makes a copy of the memory used for a compiled pattern, excluding
|
||||||
any memory used by the JIT compiler. Without a subsequent call to
|
any memory used by the JIT compiler. Without a subsequent call to
|
||||||
\fBpcre2_jit_compile()\fP, the copy can be used only for non-JIT matching.
|
\fBpcre2_jit_compile()\fP, the copy can be used only for non-JIT matching.
|
||||||
Unlike \fBpcre2_code_copy()\fP, a separate copy of the character tables is also
|
Unlike \fBpcre2_code_copy()\fP, a separate copy of the character tables is also
|
||||||
made, with the new code pointing to it. This memory will be automatically freed
|
made, with the new code pointing to it. This memory will be automatically freed
|
||||||
when \fBpcre2_code_free()\fP is called. The yield of the function is NULL if
|
when \fBpcre2_code_free()\fP is called. The yield of the function is NULL if
|
||||||
|
|
|
@ -511,7 +511,7 @@ If JIT is being used, but the JIT compilation is not being done immediately,
|
||||||
(perhaps waiting to see if the pattern is used often enough) similar logic is
|
(perhaps waiting to see if the pattern is used often enough) similar logic is
|
||||||
required. JIT compilation updates a pointer within the compiled code block, so
|
required. JIT compilation updates a pointer within the compiled code block, so
|
||||||
a thread must gain unique write access to the pointer before calling
|
a thread must gain unique write access to the pointer before calling
|
||||||
\fBpcre2_jit_compile()\fP. Alternatively, \fBpcre2_code_copy()\fP or
|
\fBpcre2_jit_compile()\fP. Alternatively, \fBpcre2_code_copy()\fP or
|
||||||
\fBpcre2_code_copy_with_tables()\fP can be used to obtain a private copy of the
|
\fBpcre2_code_copy_with_tables()\fP can be used to obtain a private copy of the
|
||||||
compiled code.
|
compiled code.
|
||||||
.
|
.
|
||||||
|
@ -696,7 +696,7 @@ functions, \fIpcre2_match()\fP and \fIpcre2_dfa_match()\fP.
|
||||||
.sp
|
.sp
|
||||||
This parameter ajusts the limit, set when PCRE2 is built (default 250), on the
|
This parameter ajusts the limit, set when PCRE2 is built (default 250), on the
|
||||||
depth of parenthesis nesting in a pattern. This limit stops rogue patterns
|
depth of parenthesis nesting in a pattern. This limit stops rogue patterns
|
||||||
using up too much system stack when being compiled. The limit applies to
|
using up too much system stack when being compiled. The limit applies to
|
||||||
parentheses of all kinds, not just capturing parentheses.
|
parentheses of all kinds, not just capturing parentheses.
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
|
@ -840,9 +840,9 @@ This limit is of use only if it is set smaller than \fImatch_limit\fP.
|
||||||
Limiting the recursion depth limits the amount of system stack that can be
|
Limiting the recursion depth limits the amount of system stack that can be
|
||||||
used, or, when PCRE2 has been compiled to use memory on the heap instead of the
|
used, or, when PCRE2 has been compiled to use memory on the heap instead of the
|
||||||
stack, the amount of heap memory that can be used. This limit is not relevant,
|
stack, the amount of heap memory that can be used. This limit is not relevant,
|
||||||
and is ignored, when matching is done using JIT compiled code. However, it is
|
and is ignored, when matching is done using JIT compiled code. However, it is
|
||||||
supported by \fBpcre2_dfa_match()\fP, which uses recursive function calls less
|
supported by \fBpcre2_dfa_match()\fP, which uses recursive function calls less
|
||||||
frequently than \fBpcre2_match()\fP, but which can be caused to use a lot of
|
frequently than \fBpcre2_match()\fP, but which can be caused to use a lot of
|
||||||
stack by a recursive pattern such as /(.)(?1)/ matched to a very long string.
|
stack by a recursive pattern such as /(.)(?1)/ matched to a very long string.
|
||||||
.P
|
.P
|
||||||
The default value for \fIrecursion_limit\fP can be set when PCRE2 is built; the
|
The default value for \fIrecursion_limit\fP can be set when PCRE2 is built; the
|
||||||
|
@ -1056,11 +1056,11 @@ below),
|
||||||
.\"
|
.\"
|
||||||
the JIT information cannot be copied (because it is position-dependent).
|
the JIT information cannot be copied (because it is position-dependent).
|
||||||
The new copy can initially be used only for non-JIT matching, though it can be
|
The new copy can initially be used only for non-JIT matching, though it can be
|
||||||
passed to \fBpcre2_jit_compile()\fP if required.
|
passed to \fBpcre2_jit_compile()\fP if required.
|
||||||
.P
|
.P
|
||||||
The \fBpcre2_code_copy()\fP function provides a way for individual threads in a
|
The \fBpcre2_code_copy()\fP function provides a way for individual threads in a
|
||||||
multithreaded application to acquire a private copy of shared compiled code.
|
multithreaded application to acquire a private copy of shared compiled code.
|
||||||
However, it does not make a copy of the character tables used by the compiled
|
However, it does not make a copy of the character tables used by the compiled
|
||||||
pattern; the new pattern code points to the same tables as the original code.
|
pattern; the new pattern code points to the same tables as the original code.
|
||||||
(See
|
(See
|
||||||
.\" HTML <a href="#jitcompiling">
|
.\" HTML <a href="#jitcompiling">
|
||||||
|
@ -1068,10 +1068,10 @@ pattern; the new pattern code points to the same tables as the original code.
|
||||||
"Locale Support"
|
"Locale Support"
|
||||||
.\"
|
.\"
|
||||||
below for details of these character tables.) In many applications the same
|
below for details of these character tables.) In many applications the same
|
||||||
tables are used throughout, so this behaviour is appropriate. Nevertheless,
|
tables are used throughout, so this behaviour is appropriate. Nevertheless,
|
||||||
there are occasions when a copy of a compiled pattern and the relevant tables
|
there are occasions when a copy of a compiled pattern and the relevant tables
|
||||||
are needed. The \fBpcre2_code_copy_with_tables()\fP provides this facility.
|
are needed. The \fBpcre2_code_copy_with_tables()\fP provides this facility.
|
||||||
Copies of both the code and the tables are made, with the new code pointing to
|
Copies of both the code and the tables are made, with the new code pointing to
|
||||||
the new tables. The memory for the new tables is automatically freed when
|
the new tables. The memory for the new tables is automatically freed when
|
||||||
\fBpcre2_code_free()\fP is called for the new copy of the compiled code.
|
\fBpcre2_code_free()\fP is called for the new copy of the compiled code.
|
||||||
.P
|
.P
|
||||||
|
@ -1114,7 +1114,7 @@ NULL immediately. Otherwise, the variables to which these point are set to an
|
||||||
error code and an offset (number of code units) within the pattern,
|
error code and an offset (number of code units) within the pattern,
|
||||||
respectively, when \fBpcre2_compile()\fP returns NULL because a compilation
|
respectively, when \fBpcre2_compile()\fP returns NULL because a compilation
|
||||||
error has occurred. The values are not defined when compilation is successful
|
error has occurred. The values are not defined when compilation is successful
|
||||||
and \fBpcre2_compile()\fP returns a non-NULL value.
|
and \fBpcre2_compile()\fP returns a non-NULL value.
|
||||||
.P
|
.P
|
||||||
The value returned in \fIerroroffset\fP is an indication of where in the
|
The value returned in \fIerroroffset\fP is an indication of where in the
|
||||||
pattern the error occurred. It is not necessarily the furthest point in the
|
pattern the error occurred. It is not necessarily the furthest point in the
|
||||||
|
|
|
@ -388,15 +388,15 @@ scanning, in order to be able to output "before" and "after" lines when it
|
||||||
finds a match. The starting size of the buffer is controlled by a parameter
|
finds a match. The starting size of the buffer is controlled by a parameter
|
||||||
whose default value is 20K. The buffer itself is three times this size, but
|
whose default value is 20K. The buffer itself is three times this size, but
|
||||||
because of the way it is used for holding "before" lines, the longest line that
|
because of the way it is used for holding "before" lines, the longest line that
|
||||||
is guaranteed to be processable is the parameter size. If a longer line is
|
is guaranteed to be processable is the parameter size. If a longer line is
|
||||||
encountered, \fBpcre2grep\fP automatically expands the buffer, up to a
|
encountered, \fBpcre2grep\fP automatically expands the buffer, up to a
|
||||||
specified maximum size, whose default is 1M or the starting size, whichever is
|
specified maximum size, whose default is 1M or the starting size, whichever is
|
||||||
the larger. You can change the default parameter values by adding, for example,
|
the larger. You can change the default parameter values by adding, for example,
|
||||||
.sp
|
.sp
|
||||||
--with-pcre2grep-bufsize=51200
|
--with-pcre2grep-bufsize=51200
|
||||||
--with-pcre2grep-max-bufsize=2097152
|
--with-pcre2grep-max-bufsize=2097152
|
||||||
.sp
|
.sp
|
||||||
to the \fBconfigure\fP command. The caller of \fPpcre2grep\fP can override
|
to the \fBconfigure\fP command. The caller of \fPpcre2grep\fP can override
|
||||||
these values by using --buffer-size and --max-buffer-size on the command line.
|
these values by using --buffer-size and --max-buffer-size on the command line.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
@ -522,7 +522,7 @@ There is a special option for use by people who want to run fuzzing tests on
|
||||||
PCRE2:
|
PCRE2:
|
||||||
.sp
|
.sp
|
||||||
--enable-fuzz-support
|
--enable-fuzz-support
|
||||||
.sp
|
.sp
|
||||||
At present this applies only to the 8-bit library. If set, it causes an extra
|
At present this applies only to the 8-bit library. If set, it causes an extra
|
||||||
library called libpcre2-fuzzsupport.a to be built, but not installed. This
|
library called libpcre2-fuzzsupport.a to be built, but not installed. This
|
||||||
contains a single function called LLVMFuzzerTestOneInput() whose arguments are
|
contains a single function called LLVMFuzzerTestOneInput() whose arguments are
|
||||||
|
|
|
@ -47,7 +47,7 @@ For example, if PCRE2_AUTO_CALLOUT is used with the pattern
|
||||||
.sp
|
.sp
|
||||||
it is processed as if it were
|
it is processed as if it were
|
||||||
.sp
|
.sp
|
||||||
(?C255)A(?C3)B(?C255)
|
(?C255)A(?C3)B(?C255)
|
||||||
.sp
|
.sp
|
||||||
Here is a more complicated example:
|
Here is a more complicated example:
|
||||||
.sp
|
.sp
|
||||||
|
|
|
@ -118,8 +118,8 @@ an error is given at compile time.
|
||||||
.P
|
.P
|
||||||
14. Perl used to recognize comments in some places that PCRE2 does not, for
|
14. Perl used to recognize comments in some places that PCRE2 does not, for
|
||||||
example, between the ( and ? at the start of a subpattern. If the /x modifier
|
example, between the ( and ? at the start of a subpattern. If the /x modifier
|
||||||
is set, Perl allowed white space between ( and ? though the latest Perls give
|
is set, Perl allowed white space between ( and ? though the latest Perls give
|
||||||
an error (for a while it was just deprecated). There may still be some cases
|
an error (for a while it was just deprecated). There may still be some cases
|
||||||
where Perl behaves differently.
|
where Perl behaves differently.
|
||||||
.P
|
.P
|
||||||
15. Perl, when in warning mode, gives warnings for character classes such as
|
15. Perl, when in warning mode, gives warnings for character classes such as
|
||||||
|
@ -140,7 +140,7 @@ list is with respect to Perl 5.10:
|
||||||
.sp
|
.sp
|
||||||
(a) Although lookbehind assertions in PCRE2 must match fixed length strings,
|
(a) Although lookbehind assertions in PCRE2 must match fixed length strings,
|
||||||
each alternative branch of a lookbehind assertion can match a different length
|
each alternative branch of a lookbehind assertion can match a different length
|
||||||
of string. Perl requires them all to have the same length.
|
of string. Perl requires them all to have the same length.
|
||||||
.sp
|
.sp
|
||||||
(b) From PCRE2 10.23, back references to groups of fixed length are supported
|
(b) From PCRE2 10.23, back references to groups of fixed length are supported
|
||||||
in lookbehinds, provided that there is no possibility of referencing a
|
in lookbehinds, provided that there is no possibility of referencing a
|
||||||
|
|
|
@ -62,7 +62,7 @@ built, with the default defaults being 20K and 1M respectively. An error occurs
|
||||||
if a line is too long and the buffer can no longer be expanded.
|
if a line is too long and the buffer can no longer be expanded.
|
||||||
.P
|
.P
|
||||||
The block of memory that is actually used is three times the "buffer size", to
|
The block of memory that is actually used is three times the "buffer size", to
|
||||||
allow for buffering "before" and "after" lines. If the buffer size is too
|
allow for buffering "before" and "after" lines. If the buffer size is too
|
||||||
small, fewer than requested "before" and "after" lines may be output.
|
small, fewer than requested "before" and "after" lines may be output.
|
||||||
.P
|
.P
|
||||||
Patterns can be no longer than 8K or BUFSIZ bytes, whichever is the greater.
|
Patterns can be no longer than 8K or BUFSIZ bytes, whichever is the greater.
|
||||||
|
@ -146,8 +146,8 @@ Treat binary files as text. This is equivalent to
|
||||||
\fB--binary-files\fP=\fItext\fP.
|
\fB--binary-files\fP=\fItext\fP.
|
||||||
.TP
|
.TP
|
||||||
\fB-B\fP \fInumber\fP, \fB--before-context=\fP\fInumber\fP
|
\fB-B\fP \fInumber\fP, \fB--before-context=\fP\fInumber\fP
|
||||||
Output up to \fInumber\fP lines of context before each matching line. Fewer
|
Output up to \fInumber\fP lines of context before each matching line. Fewer
|
||||||
lines are output if the previous match or the start of the file is within
|
lines are output if the previous match or the start of the file is within
|
||||||
\fInumber\fP lines, or if the processing buffer size has been set too small. If
|
\fInumber\fP lines, or if the processing buffer size has been set too small. If
|
||||||
file names and/or line numbers are being output, a hyphen separator is used
|
file names and/or line numbers are being output, a hyphen separator is used
|
||||||
instead of a colon for the context lines. A line containing "--" is output
|
instead of a colon for the context lines. A line containing "--" is output
|
||||||
|
@ -168,8 +168,8 @@ be of interest and are skipped without causing any output or affecting the
|
||||||
return code.
|
return code.
|
||||||
.TP
|
.TP
|
||||||
\fB--buffer-size=\fP\fInumber\fP
|
\fB--buffer-size=\fP\fInumber\fP
|
||||||
Set the parameter that controls how much memory is obtained at the start of
|
Set the parameter that controls how much memory is obtained at the start of
|
||||||
processing for buffering files that are being scanned. See also
|
processing for buffering files that are being scanned. See also
|
||||||
\fB--max-buffer-size\fP below.
|
\fB--max-buffer-size\fP below.
|
||||||
.TP
|
.TP
|
||||||
\fB-C\fP \fInumber\fP, \fB--context=\fP\fInumber\fP
|
\fB-C\fP \fInumber\fP, \fB--context=\fP\fInumber\fP
|
||||||
|
@ -185,7 +185,7 @@ exactly the same as the number of lines that would have been output, but if the
|
||||||
suppressed lines than the count (that is, the number of matches).
|
suppressed lines than the count (that is, the number of matches).
|
||||||
.sp
|
.sp
|
||||||
If no lines are selected, the number zero is output. If several files are are
|
If no lines are selected, the number zero is output. If several files are are
|
||||||
being scanned, a count is output for each of them and the \fB-t\fP option can
|
being scanned, a count is output for each of them and the \fB-t\fP option can
|
||||||
be used to cause a total to be output at the end. However, if the
|
be used to cause a total to be output at the end. However, if the
|
||||||
\fB--files-with-matches\fP option is also used, only those files whose counts
|
\fB--files-with-matches\fP option is also used, only those files whose counts
|
||||||
are greater than zero are listed. When \fB-c\fP is used, the \fB-A\fP,
|
are greater than zero are listed. When \fB-c\fP is used, the \fB-A\fP,
|
||||||
|
@ -209,14 +209,14 @@ The colour that is used can be specified by setting one of the environment
|
||||||
variables PCRE2GREP_COLOUR, PCRE2GREP_COLOR, PCREGREP_COLOUR, or
|
variables PCRE2GREP_COLOUR, PCRE2GREP_COLOR, PCREGREP_COLOUR, or
|
||||||
PCREGREP_COLOR, which are checked in that order. If none of these are set,
|
PCREGREP_COLOR, which are checked in that order. If none of these are set,
|
||||||
\fBpcre2grep\fP looks for GREP_COLORS or GREP_COLOR (in that order). The value
|
\fBpcre2grep\fP looks for GREP_COLORS or GREP_COLOR (in that order). The value
|
||||||
of the variable should be a string of two numbers, separated by a semicolon,
|
of the variable should be a string of two numbers, separated by a semicolon,
|
||||||
except in the case of GREP_COLORS, which must start with "ms=" or "mt="
|
except in the case of GREP_COLORS, which must start with "ms=" or "mt="
|
||||||
followed by two semicolon-separated colours, terminated by the end of the
|
followed by two semicolon-separated colours, terminated by the end of the
|
||||||
string or by a colon. If GREP_COLORS does not start with "ms=" or "mt=" it is
|
string or by a colon. If GREP_COLORS does not start with "ms=" or "mt=" it is
|
||||||
ignored, and GREP_COLOR is checked.
|
ignored, and GREP_COLOR is checked.
|
||||||
.sp
|
.sp
|
||||||
If the string obtained from one of the above variables contains any characters
|
If the string obtained from one of the above variables contains any characters
|
||||||
other than semicolon or digits, the setting is ignored and the default colour
|
other than semicolon or digits, the setting is ignored and the default colour
|
||||||
is used. The string is copied directly into the control string for setting
|
is used. The string is copied directly into the control string for setting
|
||||||
colour on a terminal, so it is your responsibility to ensure that the values
|
colour on a terminal, so it is your responsibility to ensure that the values
|
||||||
make sense. If no relevant environment variable is set, the default is "1;31",
|
make sense. If no relevant environment variable is set, the default is "1;31",
|
||||||
|
@ -311,7 +311,7 @@ specified on the command line using \fB-e\fP may also be present; they are
|
||||||
tested before the file's patterns. However, no other pattern is taken from the
|
tested before the file's patterns. However, no other pattern is taken from the
|
||||||
command line; all arguments are treated as the names of paths to be searched.
|
command line; all arguments are treated as the names of paths to be searched.
|
||||||
.TP
|
.TP
|
||||||
\fB--file-list\fP=\fIfilename\fP
|
\fB--file-list\fP=\fIfilename\fP
|
||||||
Read a list of files and/or directories that are to be scanned from the given
|
Read a list of files and/or directories that are to be scanned from the given
|
||||||
file, one per line. Trailing white space is removed from each line, and blank
|
file, one per line. Trailing white space is removed from each line, and blank
|
||||||
lines are ignored. These paths are processed before any that are listed on the
|
lines are ignored. These paths are processed before any that are listed on the
|
||||||
|
@ -456,8 +456,8 @@ There are no short forms for these options. The default settings are specified
|
||||||
when the PCRE2 library is compiled, with the default default being 10 million.
|
when the PCRE2 library is compiled, with the default default being 10 million.
|
||||||
.TP
|
.TP
|
||||||
\fB--max-buffer-size=\fInumber\fP
|
\fB--max-buffer-size=\fInumber\fP
|
||||||
This limits the expansion of the processing buffer, whose initial size can be
|
This limits the expansion of the processing buffer, whose initial size can be
|
||||||
set by \fB--buffer-size\fP. The maximum buffer size is silently forced to be no
|
set by \fB--buffer-size\fP. The maximum buffer size is silently forced to be no
|
||||||
smaller than the starting buffer size.
|
smaller than the starting buffer size.
|
||||||
.TP
|
.TP
|
||||||
\fB-M\fP, \fB--multiline\fP
|
\fB-M\fP, \fB--multiline\fP
|
||||||
|
@ -547,7 +547,7 @@ for the non-argument case above also apply to this case. If the specified
|
||||||
capturing parentheses do not exist in the pattern, or were not set in the
|
capturing parentheses do not exist in the pattern, or were not set in the
|
||||||
match, nothing is output unless the file name or line number are being output.
|
match, nothing is output unless the file name or line number are being output.
|
||||||
.sp
|
.sp
|
||||||
If this option is given multiple times, multiple substrings are output for each
|
If this option is given multiple times, multiple substrings are output for each
|
||||||
match, in the order the options are given, and all on one line. For example,
|
match, in the order the options are given, and all on one line. For example,
|
||||||
-o3 -o1 -o3 causes the substrings matched by capturing parentheses 3 and 1 and
|
-o3 -o1 -o3 causes the substrings matched by capturing parentheses 3 and 1 and
|
||||||
then 3 again to be output. By default, there is no separator (but see the next
|
then 3 again to be output. By default, there is no separator (but see the next
|
||||||
|
@ -610,7 +610,7 @@ specified by any of the \fB--include\fP or \fB--exclude\fP options.
|
||||||
.TP
|
.TP
|
||||||
\fB-x\fP, \fB--line-regex\fP, \fB--line-regexp\fP
|
\fB-x\fP, \fB--line-regex\fP, \fB--line-regexp\fP
|
||||||
Force the patterns to be anchored (each must start matching at the beginning of
|
Force the patterns to be anchored (each must start matching at the beginning of
|
||||||
a line) and in addition, require them to match entire lines. In multiline mode
|
a line) and in addition, require them to match entire lines. In multiline mode
|
||||||
the match may be more than one line. This is equivalent to having \eA and \eZ
|
the match may be more than one line. This is equivalent to having \eA and \eZ
|
||||||
characters at the start and end of each alternative top-level branch in every
|
characters at the start and end of each alternative top-level branch in every
|
||||||
pattern. This option applies only to the patterns that are matched against the
|
pattern. This option applies only to the patterns that are matched against the
|
||||||
|
|
|
@ -219,15 +219,25 @@ OPTIONS
|
||||||
possible matches in a line, not just one, in order to colour
|
possible matches in a line, not just one, in order to colour
|
||||||
them all.
|
them all.
|
||||||
|
|
||||||
The colour that is used can be specified by setting the envi-
|
The colour that is used can be specified by setting one of
|
||||||
ronment variable PCRE2GREP_COLOUR or PCRE2GREP_COLOR. If nei-
|
the environment variables PCRE2GREP_COLOUR, PCRE2GREP_COLOR,
|
||||||
ther of these are set, pcre2grep looks for GREP_COLOUR or
|
PCREGREP_COLOUR, or PCREGREP_COLOR, which are checked in that
|
||||||
GREP_COLOR. The value of the variable should be a string of
|
order. If none of these are set, pcre2grep looks for
|
||||||
two numbers, separated by a semicolon. They are copied
|
GREP_COLORS or GREP_COLOR (in that order). The value of the
|
||||||
directly into the control string for setting colour on a ter-
|
variable should be a string of two numbers, separated by a
|
||||||
minal, so it is your responsibility to ensure that they make
|
semicolon, except in the case of GREP_COLORS, which must
|
||||||
sense. If neither of the environment variables is set, the
|
start with "ms=" or "mt=" followed by two semicolon-separated
|
||||||
default is "1;31", which gives red.
|
colours, terminated by the end of the string or by a colon.
|
||||||
|
If GREP_COLORS does not start with "ms=" or "mt=" it is
|
||||||
|
ignored, and GREP_COLOR is checked.
|
||||||
|
|
||||||
|
If the string obtained from one of the above variables con-
|
||||||
|
tains any characters other than semicolon or digits, the set-
|
||||||
|
ting is ignored and the default colour is used. The string is
|
||||||
|
copied directly into the control string for setting colour on
|
||||||
|
a terminal, so it is your responsibility to ensure that the
|
||||||
|
values make sense. If no relevant environment variable is
|
||||||
|
set, the default is "1;31", which gives red.
|
||||||
|
|
||||||
-D action, --devices=action
|
-D action, --devices=action
|
||||||
If an input path is not a regular file or a directory,
|
If an input path is not a regular file or a directory,
|
||||||
|
@ -763,12 +773,12 @@ OPTIONS WITH DATA
|
||||||
|
|
||||||
CALLING EXTERNAL SCRIPTS
|
CALLING EXTERNAL SCRIPTS
|
||||||
|
|
||||||
On non-Windows systems, pcre2grep has, by default, support for calling
|
pcre2grep has, by default, support for calling external programs or
|
||||||
external programs or scripts during matching by making use of PCRE2's
|
scripts during matching by making use of PCRE2's callout facility. How-
|
||||||
callout facility. However, this support can be disabled when pcre2grep
|
ever, this support can be disabled when pcre2grep is built. You can
|
||||||
is built. You can find out whether your binary has support for call-
|
find out whether your binary has support for callouts by running it
|
||||||
outs by running it with the --help option. If the support is not
|
with the --help option. If the support is not enabled, all callouts in
|
||||||
enabled, all callouts in patterns are ignored by pcre2grep.
|
patterns are ignored by pcre2grep.
|
||||||
|
|
||||||
A callout in a PCRE2 pattern is of the form (?C<arg>) where the argu-
|
A callout in a PCRE2 pattern is of the form (?C<arg>) where the argu-
|
||||||
ment is either a number or a quoted string (see the pcre2callout docu-
|
ment is either a number or a quoted string (see the pcre2callout docu-
|
||||||
|
@ -852,5 +862,5 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 31 October 2016
|
Last updated: 31 December 2016
|
||||||
Copyright (c) 1997-2016 University of Cambridge.
|
Copyright (c) 1997-2016 University of Cambridge.
|
||||||
|
|
|
@ -47,8 +47,8 @@ There is no limit to the number of parenthesized subpatterns, but there can be
|
||||||
no more than 65535 capturing subpatterns. There is, however, a limit to the
|
no more than 65535 capturing subpatterns. There is, however, a limit to the
|
||||||
depth of nesting of parenthesized subpatterns of all kinds. This is imposed in
|
depth of nesting of parenthesized subpatterns of all kinds. This is imposed in
|
||||||
order to limit the amount of system stack used at compile time. The default
|
order to limit the amount of system stack used at compile time. The default
|
||||||
limit can be specified when PCRE2 is built; the default default is 250. An
|
limit can be specified when PCRE2 is built; the default default is 250. An
|
||||||
application can change this limit by calling pcre2_set_parens_nest_limit() to
|
application can change this limit by calling pcre2_set_parens_nest_limit() to
|
||||||
set the limit in a compile context.
|
set the limit in a compile context.
|
||||||
.P
|
.P
|
||||||
The maximum length of name for a named subpattern is 32 code units, and the
|
The maximum length of name for a named subpattern is 32 code units, and the
|
||||||
|
@ -58,7 +58,7 @@ The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb
|
||||||
is 255 code units for the 8-bit library and 65535 code units for the 16-bit and
|
is 255 code units for the 8-bit library and 65535 code units for the 16-bit and
|
||||||
32-bit libraries.
|
32-bit libraries.
|
||||||
.P
|
.P
|
||||||
The maximum length of a string argument to a callout is the largest number a
|
The maximum length of a string argument to a callout is the largest number a
|
||||||
32-bit unsigned integer can hold.
|
32-bit unsigned integer can hold.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
|
|
@ -159,9 +159,9 @@ for it to have any effect. In other words, the pattern writer can lower the
|
||||||
limits set by the programmer, but not raise them. If there is more than one
|
limits set by the programmer, but not raise them. If there is more than one
|
||||||
setting of one of these limits, the lower value is used.
|
setting of one of these limits, the lower value is used.
|
||||||
.P
|
.P
|
||||||
The match limit is used (but in a different way) when JIT is being used, but it
|
The match limit is used (but in a different way) when JIT is being used, but it
|
||||||
is not relevant, and is ignored, when matching with \fBpcre2_dfa_match()\fP.
|
is not relevant, and is ignored, when matching with \fBpcre2_dfa_match()\fP.
|
||||||
However, the recursion limit is relevant for DFA matching, which does use some
|
However, the recursion limit is relevant for DFA matching, which does use some
|
||||||
function recursion, in particular, for recursions within the pattern.
|
function recursion, in particular, for recursions within the pattern.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
@ -2096,7 +2096,7 @@ no such problem when named parentheses are used. A back reference to any
|
||||||
subpattern is possible using named parentheses (see below).
|
subpattern is possible using named parentheses (see below).
|
||||||
.P
|
.P
|
||||||
Another way of avoiding the ambiguity inherent in the use of digits following a
|
Another way of avoiding the ambiguity inherent in the use of digits following a
|
||||||
backslash is to use the \eg escape sequence. This escape must be followed by a
|
backslash is to use the \eg escape sequence. This escape must be followed by a
|
||||||
signed or unsigned number, optionally enclosed in braces. These examples are
|
signed or unsigned number, optionally enclosed in braces. These examples are
|
||||||
all identical:
|
all identical:
|
||||||
.sp
|
.sp
|
||||||
|
@ -2116,8 +2116,8 @@ Similarly, \eg{-2} would be equivalent to \e1. The use of relative references
|
||||||
can be helpful in long patterns, and also in patterns that are created by
|
can be helpful in long patterns, and also in patterns that are created by
|
||||||
joining together fragments that contain references within themselves.
|
joining together fragments that contain references within themselves.
|
||||||
.P
|
.P
|
||||||
The sequence \eg{+1} is a reference to the next capturing subpattern. This kind
|
The sequence \eg{+1} is a reference to the next capturing subpattern. This kind
|
||||||
of forward reference can be useful it patterns that repeat. Perl does not
|
of forward reference can be useful it patterns that repeat. Perl does not
|
||||||
support the use of + in this way.
|
support the use of + in this way.
|
||||||
.P
|
.P
|
||||||
A back reference matches whatever actually matched the capturing subpattern in
|
A back reference matches whatever actually matched the capturing subpattern in
|
||||||
|
@ -2227,11 +2227,11 @@ numbering the capturing subpatterns in the whole pattern. However, substring
|
||||||
capturing is carried out only for positive assertions. (Perl sometimes, but not
|
capturing is carried out only for positive assertions. (Perl sometimes, but not
|
||||||
always, does do capturing in negative assertions.)
|
always, does do capturing in negative assertions.)
|
||||||
.P
|
.P
|
||||||
WARNING: If a positive assertion containing one or more capturing subpatterns
|
WARNING: If a positive assertion containing one or more capturing subpatterns
|
||||||
succeeds, but failure to match later in the pattern causes backtracking over
|
succeeds, but failure to match later in the pattern causes backtracking over
|
||||||
this assertion, the captures within the assertion are reset only if no higher
|
this assertion, the captures within the assertion are reset only if no higher
|
||||||
numbered captures are already set. This is, unfortunately, a fundamental
|
numbered captures are already set. This is, unfortunately, a fundamental
|
||||||
limitation of the current implementation; it may get removed in a future
|
limitation of the current implementation; it may get removed in a future
|
||||||
reworking.
|
reworking.
|
||||||
.P
|
.P
|
||||||
For compatibility with Perl, most assertion subpatterns may be repeated; though
|
For compatibility with Perl, most assertion subpatterns may be repeated; though
|
||||||
|
@ -2497,7 +2497,7 @@ This makes the fragment independent of the parentheses in the larger pattern.
|
||||||
.sp
|
.sp
|
||||||
Perl uses the syntax (?(<name>)...) or (?('name')...) to test for a used
|
Perl uses the syntax (?(<name>)...) or (?('name')...) to test for a used
|
||||||
subpattern by name. For compatibility with earlier versions of PCRE1, which had
|
subpattern by name. For compatibility with earlier versions of PCRE1, which had
|
||||||
this facility before Perl, the syntax (?(name)...) is also recognized. Note,
|
this facility before Perl, the syntax (?(name)...) is also recognized. Note,
|
||||||
however, that undelimited names consisting of the letter R followed by digits
|
however, that undelimited names consisting of the letter R followed by digits
|
||||||
are ambiguous (see the following section).
|
are ambiguous (see the following section).
|
||||||
.P
|
.P
|
||||||
|
@ -2531,11 +2531,11 @@ If a condition is the string (R), and there is no subpattern with the name R,
|
||||||
the condition is true if matching is currently in a recursion or subroutine
|
the condition is true if matching is currently in a recursion or subroutine
|
||||||
call to the whole pattern or any subpattern. If digits follow the letter R, and
|
call to the whole pattern or any subpattern. If digits follow the letter R, and
|
||||||
there is no subpattern with that name, the condition is true if the most recent
|
there is no subpattern with that name, the condition is true if the most recent
|
||||||
call is into a subpattern with the given number, which must exist somewhere in
|
call is into a subpattern with the given number, which must exist somewhere in
|
||||||
the overall pattern. This is a contrived example that is equivalent to a+b:
|
the overall pattern. This is a contrived example that is equivalent to a+b:
|
||||||
.sp
|
.sp
|
||||||
((?(R1)a+|(?1)b))
|
((?(R1)a+|(?1)b))
|
||||||
.sp
|
.sp
|
||||||
However, in both cases, if there is a subpattern with a matching name, the
|
However, in both cases, if there is a subpattern with a matching name, the
|
||||||
condition tests for its being set, as described in the section above, instead
|
condition tests for its being set, as described in the section above, instead
|
||||||
of testing for recursion. For example, creating a group with the name R1 by
|
of testing for recursion. For example, creating a group with the name R1 by
|
||||||
|
@ -2545,10 +2545,10 @@ If a name preceded by ampersand follows the letter R, for example:
|
||||||
.sp
|
.sp
|
||||||
(?(R&name)...)
|
(?(R&name)...)
|
||||||
.sp
|
.sp
|
||||||
the condition is true if the most recent recursion is into a subpattern of that
|
the condition is true if the most recent recursion is into a subpattern of that
|
||||||
name (which must exist within the pattern).
|
name (which must exist within the pattern).
|
||||||
.P
|
.P
|
||||||
This condition does not check the entire recursion stack. It tests only the
|
This condition does not check the entire recursion stack. It tests only the
|
||||||
current level. If the name used in a condition of this kind is a duplicate, the
|
current level. If the name used in a condition of this kind is a duplicate, the
|
||||||
test is applied to all subpatterns of the same name, and is true if any one of
|
test is applied to all subpatterns of the same name, and is true if any one of
|
||||||
them is the most recent recursion.
|
them is the most recent recursion.
|
||||||
|
@ -3039,19 +3039,19 @@ depending on whether or not a name is present.
|
||||||
By default, for compatibility with Perl, a name is any sequence of characters
|
By default, for compatibility with Perl, a name is any sequence of characters
|
||||||
that does not include a closing parenthesis. The name is not processed in
|
that does not include a closing parenthesis. The name is not processed in
|
||||||
any way, and it is not possible to include a closing parenthesis in the name.
|
any way, and it is not possible to include a closing parenthesis in the name.
|
||||||
This can be changed by setting the PCRE2_ALT_VERBNAMES option, but the result
|
This can be changed by setting the PCRE2_ALT_VERBNAMES option, but the result
|
||||||
is no longer Perl-compatible.
|
is no longer Perl-compatible.
|
||||||
.P
|
.P
|
||||||
When PCRE2_ALT_VERBNAMES is set, backslash processing is applied to verb names
|
When PCRE2_ALT_VERBNAMES is set, backslash processing is applied to verb names
|
||||||
and only an unescaped closing parenthesis terminates the name. However, the
|
and only an unescaped closing parenthesis terminates the name. However, the
|
||||||
only backslash items that are permitted are \eQ, \eE, and sequences such as
|
only backslash items that are permitted are \eQ, \eE, and sequences such as
|
||||||
\ex{100} that define character code points. Character type escapes such as \ed
|
\ex{100} that define character code points. Character type escapes such as \ed
|
||||||
are faulted.
|
are faulted.
|
||||||
.P
|
.P
|
||||||
A closing parenthesis can be included in a name either as \e) or between \eQ
|
A closing parenthesis can be included in a name either as \e) or between \eQ
|
||||||
and \eE. In addition to backslash processing, if the PCRE2_EXTENDED option is
|
and \eE. In addition to backslash processing, if the PCRE2_EXTENDED option is
|
||||||
also set, unescaped whitespace in verb names is skipped, and #-comments are
|
also set, unescaped whitespace in verb names is skipped, and #-comments are
|
||||||
recognized, exactly as in the rest of the pattern. PCRE2_EXTENDED does not
|
recognized, exactly as in the rest of the pattern. PCRE2_EXTENDED does not
|
||||||
affect verb names unless PCRE2_ALT_VERBNAMES is also set.
|
affect verb names unless PCRE2_ALT_VERBNAMES is also set.
|
||||||
.P
|
.P
|
||||||
The maximum length of a name is 255 in the 8-bit library and 65535 in the
|
The maximum length of a name is 255 in the 8-bit library and 65535 in the
|
||||||
|
|
|
@ -43,8 +43,8 @@ assertion and "once-only" subpatterns, which are handled like subroutine calls.
|
||||||
Normally, these are never very deep, and the limit on the complexity of
|
Normally, these are never very deep, and the limit on the complexity of
|
||||||
\fBpcre2_dfa_match()\fP is controlled by the amount of workspace it is given.
|
\fBpcre2_dfa_match()\fP is controlled by the amount of workspace it is given.
|
||||||
However, it is possible to write patterns with runaway infinite recursions;
|
However, it is possible to write patterns with runaway infinite recursions;
|
||||||
such patterns will cause \fBpcre2_dfa_match()\fP to run out of stack unless a
|
such patterns will cause \fBpcre2_dfa_match()\fP to run out of stack unless a
|
||||||
limit is applied (see below).
|
limit is applied (see below).
|
||||||
.P
|
.P
|
||||||
The comments in the next three sections do not apply to
|
The comments in the next three sections do not apply to
|
||||||
\fBpcre2_dfa_match()\fP; they are relevant only for \fBpcre2_match()\fP without
|
\fBpcre2_dfa_match()\fP; they are relevant only for \fBpcre2_match()\fP without
|
||||||
|
@ -151,9 +151,9 @@ different limits.
|
||||||
.SS "Limiting \fBpcre2_dfa_match()\fP's stack usage"
|
.SS "Limiting \fBpcre2_dfa_match()\fP's stack usage"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
The recursion limit, as described above for \fBpcre2_match()\fP, also applies
|
The recursion limit, as described above for \fBpcre2_match()\fP, also applies
|
||||||
to \fBpcre2_dfa_match()\fP, whose use of recursive function calls for
|
to \fBpcre2_dfa_match()\fP, whose use of recursive function calls for
|
||||||
recursions in the pattern can lead to runaway stack usage. The non-recursive
|
recursions in the pattern can lead to runaway stack usage. The non-recursive
|
||||||
match limit is not relevant for DFA matching, and is ignored.
|
match limit is not relevant for DFA matching, and is ignored.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
|
|
@ -476,7 +476,7 @@ Each top-level branch of a look behind must be of a fixed length.
|
||||||
\eg{n} reference by number
|
\eg{n} reference by number
|
||||||
\eg+n relative reference by number (PCRE2 extension)
|
\eg+n relative reference by number (PCRE2 extension)
|
||||||
\eg-n relative reference by number
|
\eg-n relative reference by number
|
||||||
\eg{+n} relative reference by number (PCRE2 extension)
|
\eg{+n} relative reference by number (PCRE2 extension)
|
||||||
\eg{-n} relative reference by number
|
\eg{-n} relative reference by number
|
||||||
\ek<name> reference by name (Perl)
|
\ek<name> reference by name (Perl)
|
||||||
\ek'name' reference by name (Perl)
|
\ek'name' reference by name (Perl)
|
||||||
|
@ -523,7 +523,7 @@ Each top-level branch of a look behind must be of a fixed length.
|
||||||
(?(VERSION[>]=n.m) test PCRE2 version
|
(?(VERSION[>]=n.m) test PCRE2 version
|
||||||
(?(assert) assertion condition
|
(?(assert) assertion condition
|
||||||
.sp
|
.sp
|
||||||
Note the ambiguity of (?(R) and (?(Rn) which might be named reference
|
Note the ambiguity of (?(R) and (?(Rn) which might be named reference
|
||||||
conditions or recursion tests. Such a condition is interpreted as a reference
|
conditions or recursion tests. Such a condition is interpreted as a reference
|
||||||
condition if the relevant named group exists.
|
condition if the relevant named group exists.
|
||||||
.
|
.
|
||||||
|
|
|
@ -88,7 +88,7 @@ when the \fButf\fP modifier (see
|
||||||
"Setting compilation options"
|
"Setting compilation options"
|
||||||
.\"
|
.\"
|
||||||
below) is set, the pattern and any following subject lines are interpreted as
|
below) is set, the pattern and any following subject lines are interpreted as
|
||||||
UTF-8 strings and translated to UTF-16 or UTF-32 as appropriate.
|
UTF-8 strings and translated to UTF-16 or UTF-32 as appropriate.
|
||||||
.P
|
.P
|
||||||
For non-UTF testing of wide characters, the \fButf8_input\fP modifier can be
|
For non-UTF testing of wide characters, the \fButf8_input\fP modifier can be
|
||||||
used. This is mutually exclusive with \fButf\fP, and is allowed only in 16-bit
|
used. This is mutually exclusive with \fButf\fP, and is allowed only in 16-bit
|
||||||
|
@ -125,7 +125,7 @@ the 32-bit library has been built, this is the default. If the 32-bit library
|
||||||
has not been built, this option causes an error.
|
has not been built, this option causes an error.
|
||||||
.TP 10
|
.TP 10
|
||||||
\fB-ac\fP
|
\fB-ac\fP
|
||||||
Behave as if each pattern has the \fBauto_callout\fP modifier, that is, insert
|
Behave as if each pattern has the \fBauto_callout\fP modifier, that is, insert
|
||||||
automatic callouts into every pattern that is compiled.
|
automatic callouts into every pattern that is compiled.
|
||||||
.TP 10
|
.TP 10
|
||||||
\fB-b\fP
|
\fB-b\fP
|
||||||
|
@ -550,7 +550,7 @@ for a description of their effects.
|
||||||
As well as turning on the PCRE2_UTF option, the \fButf\fP modifier causes all
|
As well as turning on the PCRE2_UTF option, the \fButf\fP modifier causes all
|
||||||
non-printing characters in output strings to be printed using the \ex{hh...}
|
non-printing characters in output strings to be printed using the \ex{hh...}
|
||||||
notation. Otherwise, those less than 0x100 are output in hex without the curly
|
notation. Otherwise, those less than 0x100 are output in hex without the curly
|
||||||
brackets. Setting \fButf\fP in 16-bit or 32-bit mode also causes pattern and
|
brackets. Setting \fButf\fP in 16-bit or 32-bit mode also causes pattern and
|
||||||
subject strings to be translated to UTF-16 or UTF-32, respectively, before
|
subject strings to be translated to UTF-16 or UTF-32, respectively, before
|
||||||
being passed to library functions.
|
being passed to library functions.
|
||||||
.
|
.
|
||||||
|
@ -584,8 +584,8 @@ about the pattern:
|
||||||
pushcopy push a copy onto the stack
|
pushcopy push a copy onto the stack
|
||||||
stackguard=<number> test the stackguard feature
|
stackguard=<number> test the stackguard feature
|
||||||
tables=[0|1|2] select internal tables
|
tables=[0|1|2] select internal tables
|
||||||
use_length do not zero-terminate the pattern
|
use_length do not zero-terminate the pattern
|
||||||
utf8_input treat input as UTF-8
|
utf8_input treat input as UTF-8
|
||||||
.sp
|
.sp
|
||||||
The effects of these modifiers are described in the following sections.
|
The effects of these modifiers are described in the following sections.
|
||||||
.
|
.
|
||||||
|
@ -669,7 +669,7 @@ default values).
|
||||||
By default, patterns are passed to the compiling functions as zero-terminated
|
By default, patterns are passed to the compiling functions as zero-terminated
|
||||||
strings. When using the POSIX wrapper API, there is no other option. However,
|
strings. When using the POSIX wrapper API, there is no other option. However,
|
||||||
when using PCRE2's native API, patterns can be passed by length instead of
|
when using PCRE2's native API, patterns can be passed by length instead of
|
||||||
being zero-terminated. The \fBuse_length\fP modifier causes this to happen.
|
being zero-terminated. The \fBuse_length\fP modifier causes this to happen.
|
||||||
Using a length happens automatically (whether or not \fBuse_length\fP is set)
|
Using a length happens automatically (whether or not \fBuse_length\fP is set)
|
||||||
when \fBhex\fP is set, because patterns specified in hexadecimal may contain
|
when \fBhex\fP is set, because patterns specified in hexadecimal may contain
|
||||||
binary zeros.
|
binary zeros.
|
||||||
|
@ -696,17 +696,17 @@ Either single or double quotes may be used. There is no way of including
|
||||||
the delimiter within a substring. The \fBhex\fP and \fBexpand\fP modifiers are
|
the delimiter within a substring. The \fBhex\fP and \fBexpand\fP modifiers are
|
||||||
mutually exclusive.
|
mutually exclusive.
|
||||||
.P
|
.P
|
||||||
The POSIX API cannot be used with patterns specified in hexadecimal because
|
The POSIX API cannot be used with patterns specified in hexadecimal because
|
||||||
they may contain binary zeros, which conflicts with \fBregcomp()\fP's
|
they may contain binary zeros, which conflicts with \fBregcomp()\fP's
|
||||||
requirement for a zero-terminated string. Such patterns are always passed to
|
requirement for a zero-terminated string. Such patterns are always passed to
|
||||||
\fBpcre2_compile()\fP as a string with a length, not as zero-terminated.
|
\fBpcre2_compile()\fP as a string with a length, not as zero-terminated.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SS "Specifying wide characters in 16-bit and 32-bit modes"
|
.SS "Specifying wide characters in 16-bit and 32-bit modes"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
In 16-bit and 32-bit modes, all input is automatically treated as UTF-8 and
|
In 16-bit and 32-bit modes, all input is automatically treated as UTF-8 and
|
||||||
translated to UTF-16 or UTF-32 when the \fButf\fP modifier is set. For testing
|
translated to UTF-16 or UTF-32 when the \fButf\fP modifier is set. For testing
|
||||||
the 16-bit and 32-bit libraries in non-UTF mode, the \fButf8_input\fP modifier
|
the 16-bit and 32-bit libraries in non-UTF mode, the \fButf8_input\fP modifier
|
||||||
can be used. It is mutually exclusive with \fButf\fP. Input lines are
|
can be used. It is mutually exclusive with \fButf\fP. Input lines are
|
||||||
interpreted as UTF-8 as a means of specifying wide characters. More details are
|
interpreted as UTF-8 as a means of specifying wide characters. More details are
|
||||||
|
@ -1134,15 +1134,15 @@ does no capturing); it is ignored, with a warning message, if present.
|
||||||
.sp
|
.sp
|
||||||
A callout function is supplied when \fBpcre2test\fP calls the library matching
|
A callout function is supplied when \fBpcre2test\fP calls the library matching
|
||||||
functions, unless \fBcallout_none\fP is specified. If \fBcallout_capture\fP is
|
functions, unless \fBcallout_none\fP is specified. If \fBcallout_capture\fP is
|
||||||
set, the current captured groups are output when a callout occurs. The default
|
set, the current captured groups are output when a callout occurs. The default
|
||||||
return from the callout function is zero, which allows matching to continue.
|
return from the callout function is zero, which allows matching to continue.
|
||||||
.P
|
.P
|
||||||
The \fBcallout_fail\fP modifier can be given one or two numbers. If there is
|
The \fBcallout_fail\fP modifier can be given one or two numbers. If there is
|
||||||
only one number, 1 is returned instead of 0 (causing matching to backtrack)
|
only one number, 1 is returned instead of 0 (causing matching to backtrack)
|
||||||
when a callout of that number is reached. If two numbers (<n>:<m>) are given, 1
|
when a callout of that number is reached. If two numbers (<n>:<m>) are given, 1
|
||||||
is returned when callout <n> is reached and there have been at least <m>
|
is returned when callout <n> is reached and there have been at least <m>
|
||||||
callouts. The \fBcallout_error\fP modifier is similar, except that
|
callouts. The \fBcallout_error\fP modifier is similar, except that
|
||||||
PCRE2_ERROR_CALLOUT is returned, causing the entire matching process to be
|
PCRE2_ERROR_CALLOUT is returned, causing the entire matching process to be
|
||||||
aborted. If both these modifiers are set for the same callout number,
|
aborted. If both these modifiers are set for the same callout number,
|
||||||
\fBcallout_error\fP takes precedence.
|
\fBcallout_error\fP takes precedence.
|
||||||
.P
|
.P
|
||||||
|
|
|
@ -116,7 +116,11 @@ COMMAND LINE OPTIONS
|
||||||
is the default. If the 32-bit library has not been built,
|
is the default. If the 32-bit library has not been built,
|
||||||
this option causes an error.
|
this option causes an error.
|
||||||
|
|
||||||
-b Behave as if each pattern has the /fullbincode modifier; the
|
-ac Behave as if each pattern has the auto_callout modifier, that
|
||||||
|
is, insert automatic callouts into every pattern that is com-
|
||||||
|
piled.
|
||||||
|
|
||||||
|
-b Behave as if each pattern has the fullbincode modifier; the
|
||||||
full internal binary form of the pattern is output after com-
|
full internal binary form of the pattern is output after com-
|
||||||
pilation.
|
pilation.
|
||||||
|
|
||||||
|
@ -174,7 +178,7 @@ COMMAND LINE OPTIONS
|
||||||
|
|
||||||
-help Output a brief summary these options and then exit.
|
-help Output a brief summary these options and then exit.
|
||||||
|
|
||||||
-i Behave as if each pattern has the /info modifier; information
|
-i Behave as if each pattern has the info modifier; information
|
||||||
about the compiled pattern is given after compilation.
|
about the compiled pattern is given after compilation.
|
||||||
|
|
||||||
-jit Behave as if each pattern line has the jit modifier; after
|
-jit Behave as if each pattern line has the jit modifier; after
|
||||||
|
@ -724,7 +728,7 @@ PATTERN MODIFIERS
|
||||||
2 compile JIT code for soft partial matching
|
2 compile JIT code for soft partial matching
|
||||||
4 compile JIT code for hard partial matching
|
4 compile JIT code for hard partial matching
|
||||||
|
|
||||||
The possible values for the /jit modifier are therefore:
|
The possible values for the jit modifier are therefore:
|
||||||
|
|
||||||
0 disable JIT
|
0 disable JIT
|
||||||
1 normal matching only
|
1 normal matching only
|
||||||
|
@ -764,24 +768,24 @@ PATTERN MODIFIERS
|
||||||
|
|
||||||
Setting a locale
|
Setting a locale
|
||||||
|
|
||||||
The /locale modifier must specify the name of a locale, for example:
|
The locale modifier must specify the name of a locale, for example:
|
||||||
|
|
||||||
/pattern/locale=fr_FR
|
/pattern/locale=fr_FR
|
||||||
|
|
||||||
The given locale is set, pcre2_maketables() is called to build a set of
|
The given locale is set, pcre2_maketables() is called to build a set of
|
||||||
character tables for the locale, and this is then passed to pcre2_com-
|
character tables for the locale, and this is then passed to pcre2_com-
|
||||||
pile() when compiling the regular expression. The same tables are used
|
pile() when compiling the regular expression. The same tables are used
|
||||||
when matching the following subject lines. The /locale modifier applies
|
when matching the following subject lines. The locale modifier applies
|
||||||
only to the pattern on which it appears, but can be given in a #pattern
|
only to the pattern on which it appears, but can be given in a #pattern
|
||||||
command if a default is needed. Setting a locale and alternate charac-
|
command if a default is needed. Setting a locale and alternate charac-
|
||||||
ter tables are mutually exclusive.
|
ter tables are mutually exclusive.
|
||||||
|
|
||||||
Showing pattern memory
|
Showing pattern memory
|
||||||
|
|
||||||
The /memory modifier causes the size in bytes of the memory used to
|
The memory modifier causes the size in bytes of the memory used to hold
|
||||||
hold the compiled pattern to be output. This does not include the size
|
the compiled pattern to be output. This does not include the size of
|
||||||
of the pcre2_code block; it is just the actual compiled data. If the
|
the pcre2_code block; it is just the actual compiled data. If the pat-
|
||||||
pattern is subsequently passed to the JIT compiler, the size of the JIT
|
tern is subsequently passed to the JIT compiler, the size of the JIT
|
||||||
compiled code is also output. Here is an example:
|
compiled code is also output. Here is an example:
|
||||||
|
|
||||||
re> /a(b)c/jit,memory
|
re> /a(b)c/jit,memory
|
||||||
|
@ -837,7 +841,7 @@ PATTERN MODIFIERS
|
||||||
|
|
||||||
Testing the stack guard feature
|
Testing the stack guard feature
|
||||||
|
|
||||||
The /stackguard modifier is used to test the use of pcre2_set_com-
|
The stackguard modifier is used to test the use of pcre2_set_com-
|
||||||
pile_recursion_guard(), a function that is provided to enable stack
|
pile_recursion_guard(), a function that is provided to enable stack
|
||||||
availability to be checked during compilation (see the pcre2api docu-
|
availability to be checked during compilation (see the pcre2api docu-
|
||||||
mentation for details). If the number specified by the modifier is
|
mentation for details). If the number specified by the modifier is
|
||||||
|
@ -849,7 +853,7 @@ PATTERN MODIFIERS
|
||||||
|
|
||||||
Using alternative character tables
|
Using alternative character tables
|
||||||
|
|
||||||
The value specified for the /tables modifier must be one of the digits
|
The value specified for the tables modifier must be one of the digits
|
||||||
0, 1, or 2. It causes a specific set of built-in character tables to be
|
0, 1, or 2. It causes a specific set of built-in character tables to be
|
||||||
passed to pcre2_compile(). This is used in the PCRE2 tests to check be-
|
passed to pcre2_compile(). This is used in the PCRE2 tests to check be-
|
||||||
haviour with different character tables. The digit specifies the tables
|
haviour with different character tables. The digit specifies the tables
|
||||||
|
@ -931,7 +935,7 @@ SUBJECT MODIFIERS
|
||||||
The partial matching modifiers are provided with abbreviations because
|
The partial matching modifiers are provided with abbreviations because
|
||||||
they appear frequently in tests.
|
they appear frequently in tests.
|
||||||
|
|
||||||
If the /posix modifier was present on the pattern, causing the POSIX
|
If the posix modifier was present on the pattern, causing the POSIX
|
||||||
wrapper API to be used, the only option-setting modifiers that have any
|
wrapper API to be used, the only option-setting modifiers that have any
|
||||||
effect are notbol, notempty, and noteol, causing REG_NOTBOL,
|
effect are notbol, notempty, and noteol, causing REG_NOTBOL,
|
||||||
REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to regexec().
|
REG_NOTEMPTY, and REG_NOTEOL, respectively, to be passed to regexec().
|
||||||
|
@ -951,6 +955,7 @@ SUBJECT MODIFIERS
|
||||||
altglobal alternative global matching
|
altglobal alternative global matching
|
||||||
callout_capture show captures at callout time
|
callout_capture show captures at callout time
|
||||||
callout_data=<n> set a value to pass via callouts
|
callout_data=<n> set a value to pass via callouts
|
||||||
|
callout_error=<n>[:<m>] control callout error
|
||||||
callout_fail=<n>[:<m>] control callout failure
|
callout_fail=<n>[:<m>] control callout failure
|
||||||
callout_none do not supply a callout function
|
callout_none do not supply a callout function
|
||||||
copy=<number or name> copy captured substring
|
copy=<number or name> copy captured substring
|
||||||
|
@ -1042,14 +1047,22 @@ SUBJECT MODIFIERS
|
||||||
|
|
||||||
A callout function is supplied when pcre2test calls the library match-
|
A callout function is supplied when pcre2test calls the library match-
|
||||||
ing functions, unless callout_none is specified. If callout_capture is
|
ing functions, unless callout_none is specified. If callout_capture is
|
||||||
set, the current captured groups are output when a callout occurs.
|
set, the current captured groups are output when a callout occurs. The
|
||||||
|
default return from the callout function is zero, which allows matching
|
||||||
|
to continue.
|
||||||
|
|
||||||
The callout_fail modifier can be given one or two numbers. If there is
|
The callout_fail modifier can be given one or two numbers. If there is
|
||||||
only one number, 1 is returned instead of 0 when a callout of that num-
|
only one number, 1 is returned instead of 0 (causing matching to back-
|
||||||
ber is reached. If two numbers are given, 1 is returned when callout
|
track) when a callout of that number is reached. If two numbers
|
||||||
<n> is reached for the <m>th time. Note that callouts with string argu-
|
(<n>:<m>) are given, 1 is returned when callout <n> is reached and
|
||||||
ments are always given the number zero. See "Callouts" below for a
|
there have been at least <m> callouts. The callout_error modifier is
|
||||||
description of the output when a callout it taken.
|
similar, except that PCRE2_ERROR_CALLOUT is returned, causing the
|
||||||
|
entire matching process to be aborted. If both these modifiers are set
|
||||||
|
for the same callout number, callout_error takes precedence.
|
||||||
|
|
||||||
|
Note that callouts with string arguments are always given the number
|
||||||
|
zero. See "Callouts" below for a description of the output when a call-
|
||||||
|
out it taken.
|
||||||
|
|
||||||
The callout_data modifier can be given an unsigned or a negative num-
|
The callout_data modifier can be given an unsigned or a negative num-
|
||||||
ber. This is set as the "user data" that is passed to the matching
|
ber. This is set as the "user data" that is passed to the matching
|
||||||
|
@ -1060,7 +1073,7 @@ SUBJECT MODIFIERS
|
||||||
Finding all matches in a string
|
Finding all matches in a string
|
||||||
|
|
||||||
Searching for all possible matches within a subject can be requested by
|
Searching for all possible matches within a subject can be requested by
|
||||||
the global or /altglobal modifier. After finding a match, the matching
|
the global or altglobal modifier. After finding a match, the matching
|
||||||
function is called again to search the remainder of the subject. The
|
function is called again to search the remainder of the subject. The
|
||||||
difference between global and altglobal is that the former uses the
|
difference between global and altglobal is that the former uses the
|
||||||
start_offset argument to pcre2_match() or pcre2_dfa_match() to start
|
start_offset argument to pcre2_match() or pcre2_dfa_match() to start
|
||||||
|
@ -1329,7 +1342,7 @@ DEFAULT OUTPUT FROM pcre2test
|
||||||
If the strings contain any non-printing characters, they are output as
|
If the strings contain any non-printing characters, they are output as
|
||||||
\xhh escapes if the value is less than 256 and UTF mode is not set.
|
\xhh escapes if the value is less than 256 and UTF mode is not set.
|
||||||
Otherwise they are output as \x{hh...} escapes. See below for the defi-
|
Otherwise they are output as \x{hh...} escapes. See below for the defi-
|
||||||
nition of non-printing characters. If the /aftertext modifier is set,
|
nition of non-printing characters. If the aftertext modifier is set,
|
||||||
the output for substring 0 is followed by the the rest of the subject
|
the output for substring 0 is followed by the the rest of the subject
|
||||||
string, identified by "0+" like this:
|
string, identified by "0+" like this:
|
||||||
|
|
||||||
|
@ -1512,7 +1525,7 @@ NON-PRINTING CHARACTERS
|
||||||
|
|
||||||
When pcre2test is outputting text that is a matched part of a subject
|
When pcre2test is outputting text that is a matched part of a subject
|
||||||
string, it behaves in the same way, unless a different locale has been
|
string, it behaves in the same way, unless a different locale has been
|
||||||
set for the pattern (using the /locale modifier). In this case, the
|
set for the pattern (using the locale modifier). In this case, the
|
||||||
isprint() function is used to distinguish printing and non-printing
|
isprint() function is used to distinguish printing and non-printing
|
||||||
characters.
|
characters.
|
||||||
|
|
||||||
|
@ -1601,5 +1614,5 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 04 November 2016
|
Last updated: 28 December 2016
|
||||||
Copyright (c) 1997-2016 University of Cambridge.
|
Copyright (c) 1997-2016 University of Cambridge.
|
||||||
|
|
|
@ -78,6 +78,9 @@ sure both macros are undefined; an emulation function will then be used. */
|
||||||
/* Define to 1 if you have the <memory.h> header file. */
|
/* Define to 1 if you have the <memory.h> header file. */
|
||||||
/* #undef HAVE_MEMORY_H */
|
/* #undef HAVE_MEMORY_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `mkostemp' function. */
|
||||||
|
/* #undef HAVE_MKOSTEMP */
|
||||||
|
|
||||||
/* Define if you have POSIX threads libraries and header files. */
|
/* Define if you have POSIX threads libraries and header files. */
|
||||||
/* #undef HAVE_PTHREAD */
|
/* #undef HAVE_PTHREAD */
|
||||||
|
|
||||||
|
@ -90,6 +93,9 @@ sure both macros are undefined; an emulation function will then be used. */
|
||||||
/* Define to 1 if you have the <readline/readline.h> header file. */
|
/* Define to 1 if you have the <readline/readline.h> header file. */
|
||||||
/* #undef HAVE_READLINE_READLINE_H */
|
/* #undef HAVE_READLINE_READLINE_H */
|
||||||
|
|
||||||
|
/* Define to 1 if you have the `secure_getenv' function. */
|
||||||
|
/* #undef HAVE_SECURE_GETENV */
|
||||||
|
|
||||||
/* Define to 1 if you have the <stdint.h> header file. */
|
/* Define to 1 if you have the <stdint.h> header file. */
|
||||||
/* #undef HAVE_STDINT_H */
|
/* #undef HAVE_STDINT_H */
|
||||||
|
|
||||||
|
@ -308,9 +314,40 @@ sure both macros are undefined; an emulation function will then be used. */
|
||||||
/* Define to any value for valgrind support to find invalid memory reads. */
|
/* Define to any value for valgrind support to find invalid memory reads. */
|
||||||
/* #undef SUPPORT_VALGRIND */
|
/* #undef SUPPORT_VALGRIND */
|
||||||
|
|
||||||
|
/* Enable extensions on AIX 3, Interix. */
|
||||||
|
#ifndef _ALL_SOURCE
|
||||||
|
# define _ALL_SOURCE 1
|
||||||
|
#endif
|
||||||
|
/* Enable GNU extensions on systems that have them. */
|
||||||
|
#ifndef _GNU_SOURCE
|
||||||
|
# define _GNU_SOURCE 1
|
||||||
|
#endif
|
||||||
|
/* Enable threading extensions on Solaris. */
|
||||||
|
#ifndef _POSIX_PTHREAD_SEMANTICS
|
||||||
|
# define _POSIX_PTHREAD_SEMANTICS 1
|
||||||
|
#endif
|
||||||
|
/* Enable extensions on HP NonStop. */
|
||||||
|
#ifndef _TANDEM_SOURCE
|
||||||
|
# define _TANDEM_SOURCE 1
|
||||||
|
#endif
|
||||||
|
/* Enable general extensions on Solaris. */
|
||||||
|
#ifndef __EXTENSIONS__
|
||||||
|
# define __EXTENSIONS__ 1
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Version number of package */
|
/* Version number of package */
|
||||||
#define VERSION "10.23-RC1"
|
#define VERSION "10.23-RC1"
|
||||||
|
|
||||||
|
/* Define to 1 if on MINIX. */
|
||||||
|
/* #undef _MINIX */
|
||||||
|
|
||||||
|
/* Define to 2 if the system does not provide POSIX.1 features except with
|
||||||
|
this defined. */
|
||||||
|
/* #undef _POSIX_1_SOURCE */
|
||||||
|
|
||||||
|
/* Define to 1 if you need to in order for `stat' and other things to work. */
|
||||||
|
/* #undef _POSIX_SOURCE */
|
||||||
|
|
||||||
/* Define to empty if `const' does not conform to ANSI C. */
|
/* Define to empty if `const' does not conform to ANSI C. */
|
||||||
/* #undef const */
|
/* #undef const */
|
||||||
|
|
||||||
|
|
|
@ -44,7 +44,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define PCRE2_MAJOR 10
|
#define PCRE2_MAJOR 10
|
||||||
#define PCRE2_MINOR 23
|
#define PCRE2_MINOR 23
|
||||||
#define PCRE2_PRERELEASE -RC1
|
#define PCRE2_PRERELEASE -RC1
|
||||||
#define PCRE2_DATE 2016-08-01
|
#define PCRE2_DATE 2017-01-16
|
||||||
|
|
||||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||||
imported have to be identified as such. When building PCRE2, the appropriate
|
imported have to be identified as such. When building PCRE2, the appropriate
|
||||||
|
|
|
@ -44,7 +44,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define PCRE2_MAJOR 10
|
#define PCRE2_MAJOR 10
|
||||||
#define PCRE2_MINOR 23
|
#define PCRE2_MINOR 23
|
||||||
#define PCRE2_PRERELEASE -RC1
|
#define PCRE2_PRERELEASE -RC1
|
||||||
#define PCRE2_DATE 2016-08-01
|
#define PCRE2_DATE 2017-01-16
|
||||||
|
|
||||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||||
imported have to be identified as such. When building PCRE2, the appropriate
|
imported have to be identified as such. When building PCRE2, the appropriate
|
||||||
|
@ -465,7 +465,9 @@ PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
|
||||||
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
|
||||||
pcre2_code_free(pcre2_code *); \
|
pcre2_code_free(pcre2_code *); \
|
||||||
PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
|
PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
|
||||||
*pcre2_code_copy(const pcre2_code *);
|
*pcre2_code_copy(const pcre2_code *); \
|
||||||
|
PCRE2_EXP_DECL pcre2_code PCRE2_CALL_CONVENTION \
|
||||||
|
*pcre2_code_copy_with_tables(const pcre2_code *);
|
||||||
|
|
||||||
|
|
||||||
/* Functions that give information about a compiled pattern. */
|
/* Functions that give information about a compiled pattern. */
|
||||||
|
@ -629,6 +631,7 @@ pcre2_compile are called by application code. */
|
||||||
|
|
||||||
#define pcre2_callout_enumerate PCRE2_SUFFIX(pcre2_callout_enumerate_)
|
#define pcre2_callout_enumerate PCRE2_SUFFIX(pcre2_callout_enumerate_)
|
||||||
#define pcre2_code_copy PCRE2_SUFFIX(pcre2_code_copy_)
|
#define pcre2_code_copy PCRE2_SUFFIX(pcre2_code_copy_)
|
||||||
|
#define pcre2_code_copy_with_tables PCRE2_SUFFIX(pcre2_code_copy_with_tables_)
|
||||||
#define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_)
|
#define pcre2_code_free PCRE2_SUFFIX(pcre2_code_free_)
|
||||||
#define pcre2_compile PCRE2_SUFFIX(pcre2_compile_)
|
#define pcre2_compile PCRE2_SUFFIX(pcre2_compile_)
|
||||||
#define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_)
|
#define pcre2_compile_context_copy PCRE2_SUFFIX(pcre2_compile_context_copy_)
|
||||||
|
|
|
@ -1047,8 +1047,8 @@ but some compilers complain about an unreachable statement. */
|
||||||
/* Replaces single character iterations with their possessive alternatives
|
/* Replaces single character iterations with their possessive alternatives
|
||||||
if appropriate. This function modifies the compiled opcode! Hitting a
|
if appropriate. This function modifies the compiled opcode! Hitting a
|
||||||
non-existent opcode may indicate a bug in PCRE2, but it can also be caused if a
|
non-existent opcode may indicate a bug in PCRE2, but it can also be caused if a
|
||||||
bad UTF string was compiled with PCRE2_NO_UTF_CHECK. The rec_limit catches
|
bad UTF string was compiled with PCRE2_NO_UTF_CHECK. The rec_limit catches
|
||||||
overly complicated or large patterns. In these cases, the check just stops,
|
overly complicated or large patterns. In these cases, the check just stops,
|
||||||
leaving the remainder of the pattern unpossessified.
|
leaving the remainder of the pattern unpossessified.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
|
|
|
@ -116,9 +116,11 @@ them will be able to (i.e. assume a 64-bit world). */
|
||||||
|
|
||||||
/* Function definitions to allow mutual recursion */
|
/* Function definitions to allow mutual recursion */
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
static unsigned int
|
static unsigned int
|
||||||
add_list_to_class_internal(uint8_t *, PCRE2_UCHAR **, uint32_t, compile_block *,
|
add_list_to_class_internal(uint8_t *, PCRE2_UCHAR **, uint32_t,
|
||||||
const uint32_t *, unsigned int);
|
compile_block *, const uint32_t *, unsigned int);
|
||||||
|
#endif
|
||||||
|
|
||||||
static int
|
static int
|
||||||
compile_regex(uint32_t, PCRE2_UCHAR **, uint32_t **, int *, uint32_t,
|
compile_regex(uint32_t, PCRE2_UCHAR **, uint32_t **, int *, uint32_t,
|
||||||
|
@ -4381,16 +4383,14 @@ return n8; /* Number of 8-bit characters */
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef SUPPORT_UNICODE
|
||||||
/*************************************************
|
/*************************************************
|
||||||
* Add a list of characters to a class (internal) *
|
* Add a list of characters to a class (internal) *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* This function is used for adding a list of case-equivalent characters to a
|
/* This function is used for adding a list of case-equivalent characters to a
|
||||||
class, and also for adding a list of horizontal or vertical whitespace. If the
|
class when in UTF mode. This function is called only from within
|
||||||
list is in order (which it should be), ranges of characters are detected and
|
add_to_class_internal(), with which it is mutually recursive.
|
||||||
handled appropriately. This function is called (sometimes recursively) only
|
|
||||||
from within the "add to class" set of functions. The external entry point is
|
|
||||||
add_list_to_class().
|
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
classbits the bit map for characters < 256
|
classbits the bit map for characters < 256
|
||||||
|
@ -4423,6 +4423,7 @@ while (p[0] < NOTACHAR)
|
||||||
}
|
}
|
||||||
return n8;
|
return n8;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -4459,8 +4460,11 @@ return add_to_class_internal(classbits, uchardptr, options, cb, start, end);
|
||||||
* External entry point for add list to class *
|
* External entry point for add list to class *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* This function sets the overall range so that the internal functions can try
|
/* This function is used for adding a list of horizontal or vertical whitespace
|
||||||
to avoid duplication when handling case-independence.
|
characters to a class. The list must be in order so that ranges of characters
|
||||||
|
can be detected and handled appropriately. This function sets the overall range
|
||||||
|
so that the internal functions can try to avoid duplication when handling
|
||||||
|
case-independence.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
classbits the bit map for characters < 256
|
classbits the bit map for characters < 256
|
||||||
|
|
|
@ -3216,7 +3216,7 @@ if (mcontext == NULL)
|
||||||
{
|
{
|
||||||
mb->callout = NULL;
|
mb->callout = NULL;
|
||||||
mb->memctl = re->memctl;
|
mb->memctl = re->memctl;
|
||||||
mb->match_limit_recursion = PRIV(default_match_context).recursion_limit;
|
mb->match_limit_recursion = PRIV(default_match_context).recursion_limit;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -3229,7 +3229,7 @@ else
|
||||||
mb->callout = mcontext->callout;
|
mb->callout = mcontext->callout;
|
||||||
mb->callout_data = mcontext->callout_data;
|
mb->callout_data = mcontext->callout_data;
|
||||||
mb->memctl = mcontext->memctl;
|
mb->memctl = mcontext->memctl;
|
||||||
mb->match_limit_recursion = mcontext->recursion_limit;
|
mb->match_limit_recursion = mcontext->recursion_limit;
|
||||||
}
|
}
|
||||||
if (mb->match_limit_recursion > re->limit_recursion)
|
if (mb->match_limit_recursion > re->limit_recursion)
|
||||||
mb->match_limit_recursion = re->limit_recursion;
|
mb->match_limit_recursion = re->limit_recursion;
|
||||||
|
|
|
@ -173,9 +173,9 @@ static const unsigned char compile_error_texts[] =
|
||||||
"regular expression is too complicated\0"
|
"regular expression is too complicated\0"
|
||||||
"lookbehind assertion is too long\0"
|
"lookbehind assertion is too long\0"
|
||||||
"pattern string is longer than the limit set by the application\0"
|
"pattern string is longer than the limit set by the application\0"
|
||||||
"internal error: unknown code in parsed pattern\0"
|
"internal error: unknown code in parsed pattern\0"
|
||||||
/* 90 */
|
/* 90 */
|
||||||
"internal error: bad code value in parsed_skip()\0"
|
"internal error: bad code value in parsed_skip()\0"
|
||||||
;
|
;
|
||||||
|
|
||||||
/* Match-time and UTF error texts are in the same format. */
|
/* Match-time and UTF error texts are in the same format. */
|
||||||
|
|
|
@ -648,7 +648,7 @@ typedef struct pcre2_real_match_data {
|
||||||
|
|
||||||
#ifndef PCRE2_PCRE2TEST
|
#ifndef PCRE2_PCRE2TEST
|
||||||
|
|
||||||
/* Structures for checking for mutual recursion when scanning compiled or
|
/* Structures for checking for mutual recursion when scanning compiled or
|
||||||
parsed code. */
|
parsed code. */
|
||||||
|
|
||||||
typedef struct recurse_check {
|
typedef struct recurse_check {
|
||||||
|
@ -702,7 +702,7 @@ typedef struct compile_block {
|
||||||
PCRE2_UCHAR *name_table; /* The name/number table */
|
PCRE2_UCHAR *name_table; /* The name/number table */
|
||||||
PCRE2_SIZE workspace_size; /* Size of workspace */
|
PCRE2_SIZE workspace_size; /* Size of workspace */
|
||||||
PCRE2_SIZE small_ref_offset[10]; /* Offsets for \1 to \9 */
|
PCRE2_SIZE small_ref_offset[10]; /* Offsets for \1 to \9 */
|
||||||
PCRE2_SIZE erroroffset; /* Offset of error in pattern */
|
PCRE2_SIZE erroroffset; /* Offset of error in pattern */
|
||||||
uint16_t names_found; /* Number of entries so far */
|
uint16_t names_found; /* Number of entries so far */
|
||||||
uint16_t name_entry_size; /* Size of each entry */
|
uint16_t name_entry_size; /* Size of each entry */
|
||||||
open_capitem *open_caps; /* Chain of open capture items */
|
open_capitem *open_caps; /* Chain of open capture items */
|
||||||
|
@ -711,16 +711,16 @@ typedef struct compile_block {
|
||||||
uint32_t external_options; /* External (initial) options */
|
uint32_t external_options; /* External (initial) options */
|
||||||
uint32_t external_flags; /* External flag bits to be set */
|
uint32_t external_flags; /* External flag bits to be set */
|
||||||
uint32_t bracount; /* Count of capturing parentheses */
|
uint32_t bracount; /* Count of capturing parentheses */
|
||||||
uint32_t lastcapture; /* Last capture encountered */
|
uint32_t lastcapture; /* Last capture encountered */
|
||||||
uint32_t *parsed_pattern; /* Parsed pattern buffer */
|
uint32_t *parsed_pattern; /* Parsed pattern buffer */
|
||||||
uint32_t *parsed_pattern_end; /* Parsed pattern should not get here */
|
uint32_t *parsed_pattern_end; /* Parsed pattern should not get here */
|
||||||
uint32_t *groupinfo; /* Group info vector */
|
uint32_t *groupinfo; /* Group info vector */
|
||||||
uint32_t top_backref; /* Maximum back reference */
|
uint32_t top_backref; /* Maximum back reference */
|
||||||
uint32_t backref_map; /* Bitmap of low back refs */
|
uint32_t backref_map; /* Bitmap of low back refs */
|
||||||
uint32_t nltype; /* Newline type */
|
uint32_t nltype; /* Newline type */
|
||||||
uint32_t nllen; /* Newline string length */
|
uint32_t nllen; /* Newline string length */
|
||||||
uint32_t class_range_start; /* Overall class range start */
|
uint32_t class_range_start; /* Overall class range start */
|
||||||
uint32_t class_range_end; /* Overall class range end */
|
uint32_t class_range_end; /* Overall class range end */
|
||||||
PCRE2_UCHAR nl[4]; /* Newline string when fixed length */
|
PCRE2_UCHAR nl[4]; /* Newline string when fixed length */
|
||||||
int max_lookbehind; /* Maximum lookbehind (characters) */
|
int max_lookbehind; /* Maximum lookbehind (characters) */
|
||||||
int parens_depth; /* Depth of nested parentheses */
|
int parens_depth; /* Depth of nested parentheses */
|
||||||
|
|
|
@ -1937,6 +1937,8 @@ static int
|
||||||
fill_buffer(void *handle, int frtype, char *buffer, int length,
|
fill_buffer(void *handle, int frtype, char *buffer, int length,
|
||||||
BOOL input_line_buffered)
|
BOOL input_line_buffered)
|
||||||
{
|
{
|
||||||
|
(void)frtype; /* Avoid warning when not used */
|
||||||
|
|
||||||
#ifdef SUPPORT_LIBZ
|
#ifdef SUPPORT_LIBZ
|
||||||
if (frtype == FR_LIBZ)
|
if (frtype == FR_LIBZ)
|
||||||
return gzread((gzFile)handle, buffer, length);
|
return gzread((gzFile)handle, buffer, length);
|
||||||
|
@ -3015,8 +3017,8 @@ sprintf((char *)buffer, "%s%.*s%s", prefix[popts], patlen, ps, suffix[popts]);
|
||||||
p->compiled = pcre2_compile(buffer, PCRE2_ZERO_TERMINATED, options, &errcode,
|
p->compiled = pcre2_compile(buffer, PCRE2_ZERO_TERMINATED, options, &errcode,
|
||||||
&erroffset, compile_context);
|
&erroffset, compile_context);
|
||||||
|
|
||||||
/* Handle successful compile. Try JIT-compiling if supported and enabled. We
|
/* Handle successful compile. Try JIT-compiling if supported and enabled. We
|
||||||
ignore any JIT compiler errors, relying falling back to interpreting if
|
ignore any JIT compiler errors, relying falling back to interpreting if
|
||||||
anything goes wrong with JIT. */
|
anything goes wrong with JIT. */
|
||||||
|
|
||||||
if (p->compiled != NULL)
|
if (p->compiled != NULL)
|
||||||
|
@ -3577,10 +3579,10 @@ if (colour_option != NULL && strcmp(colour_option, "never") != 0)
|
||||||
if (cs == NULL) cs = getenv("PCREGREP_COLOR");
|
if (cs == NULL) cs = getenv("PCREGREP_COLOR");
|
||||||
if (cs == NULL) cs = parse_grep_colors(getenv("GREP_COLORS"));
|
if (cs == NULL) cs = parse_grep_colors(getenv("GREP_COLORS"));
|
||||||
if (cs == NULL) cs = getenv("GREP_COLOR");
|
if (cs == NULL) cs = getenv("GREP_COLOR");
|
||||||
if (cs != NULL)
|
if (cs != NULL)
|
||||||
{
|
{
|
||||||
if (strspn(cs, ";0123456789") == strlen(cs)) colour_string = cs;
|
if (strspn(cs, ";0123456789") == strlen(cs)) colour_string = cs;
|
||||||
}
|
}
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
init_colour_output();
|
init_colour_output();
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue