Add explicit heap limiting options to pcre2_match(), with associated features
for listing, configuring, etc.
This commit is contained in:
parent
f0126dc7ae
commit
14989bd454
|
@ -78,6 +78,7 @@
|
||||||
# fix by David Gaussmann
|
# fix by David Gaussmann
|
||||||
# 2016-10-07 PH added PCREGREP_MAX_BUFSIZE
|
# 2016-10-07 PH added PCREGREP_MAX_BUFSIZE
|
||||||
# 2017-03-11 PH turned HEAP_MATCH_RECURSE into a NO-OP for 10.30
|
# 2017-03-11 PH turned HEAP_MATCH_RECURSE into a NO-OP for 10.30
|
||||||
|
# 2017-04-08 PH added HEAP_LIMIT
|
||||||
|
|
||||||
PROJECT(PCRE2 C)
|
PROJECT(PCRE2 C)
|
||||||
|
|
||||||
|
@ -143,6 +144,9 @@ SET(PCRE2_LINK_SIZE "2" CACHE STRING
|
||||||
SET(PCRE2_PARENS_NEST_LIMIT "250" CACHE STRING
|
SET(PCRE2_PARENS_NEST_LIMIT "250" CACHE STRING
|
||||||
"Default nested parentheses limit. See PARENS_NEST_LIMIT in config.h.in for details.")
|
"Default nested parentheses limit. See PARENS_NEST_LIMIT in config.h.in for details.")
|
||||||
|
|
||||||
|
SET(PCRE2_HEAP_LIMIT "20000000" CACHE STRING
|
||||||
|
"Default limit on heap memory (kilobytes). See HEAP_LIMIT in config.h.in for details.")
|
||||||
|
|
||||||
SET(PCRE2_MATCH_LIMIT "10000000" CACHE STRING
|
SET(PCRE2_MATCH_LIMIT "10000000" CACHE STRING
|
||||||
"Default limit on internal looping. See MATCH_LIMIT in config.h.in for details.")
|
"Default limit on internal looping. See MATCH_LIMIT in config.h.in for details.")
|
||||||
|
|
||||||
|
@ -765,6 +769,7 @@ IF(PCRE2_SHOW_REPORT)
|
||||||
MESSAGE(STATUS " Rebuild char tables ............. : ${PCRE2_REBUILD_CHARTABLES}")
|
MESSAGE(STATUS " Rebuild char tables ............. : ${PCRE2_REBUILD_CHARTABLES}")
|
||||||
MESSAGE(STATUS " Internal link size .............. : ${PCRE2_LINK_SIZE}")
|
MESSAGE(STATUS " Internal link size .............. : ${PCRE2_LINK_SIZE}")
|
||||||
MESSAGE(STATUS " Parentheses nest limit .......... : ${PCRE2_PARENS_NEST_LIMIT}")
|
MESSAGE(STATUS " Parentheses nest limit .......... : ${PCRE2_PARENS_NEST_LIMIT}")
|
||||||
|
MESSAGE(STATUS " Heap limit ...................... : ${PCRE2_HEAP_LIMIT}")
|
||||||
MESSAGE(STATUS " Match limit ..................... : ${PCRE2_MATCH_LIMIT}")
|
MESSAGE(STATUS " Match limit ..................... : ${PCRE2_MATCH_LIMIT}")
|
||||||
MESSAGE(STATUS " Match depth limit ............... : ${PCRE2_MATCH_LIMIT_DEPTH}")
|
MESSAGE(STATUS " Match depth limit ............... : ${PCRE2_MATCH_LIMIT_DEPTH}")
|
||||||
MESSAGE(STATUS " Build shared libs ............... : ${BUILD_SHARED_LIBS}")
|
MESSAGE(STATUS " Build shared libs ............... : ${BUILD_SHARED_LIBS}")
|
||||||
|
|
|
@ -121,6 +121,11 @@ single-branch conditions with a false condition (e.g. DEFINE) at the start of a
|
||||||
branch. For example, /(?(DEFINE)...)^A/ and /(...){0}^B/ are now flagged as
|
branch. For example, /(?(DEFINE)...)^A/ and /(...){0}^B/ are now flagged as
|
||||||
anchored.
|
anchored.
|
||||||
|
|
||||||
|
22. Added an explicit limit on the amount of heap used by pcre2_match(), set by
|
||||||
|
pcre2_set_heap_limit() or (*LIMIT_HEAP=xxx). Upgraded pcre2test to show the
|
||||||
|
heap limit along with other pattern information, and to find the minimum when
|
||||||
|
the find_limits modifier is set.
|
||||||
|
|
||||||
|
|
||||||
Version 10.23 14-February-2017
|
Version 10.23 14-February-2017
|
||||||
------------------------------
|
------------------------------
|
||||||
|
|
|
@ -69,6 +69,7 @@ dist_html_DATA = \
|
||||||
doc/html/pcre2_set_character_tables.html \
|
doc/html/pcre2_set_character_tables.html \
|
||||||
doc/html/pcre2_set_compile_recursion_guard.html \
|
doc/html/pcre2_set_compile_recursion_guard.html \
|
||||||
doc/html/pcre2_set_depth_limit.html \
|
doc/html/pcre2_set_depth_limit.html \
|
||||||
|
doc/html/pcre2_set_heap_limit.html \
|
||||||
doc/html/pcre2_set_match_limit.html \
|
doc/html/pcre2_set_match_limit.html \
|
||||||
doc/html/pcre2_set_max_pattern_length.html \
|
doc/html/pcre2_set_max_pattern_length.html \
|
||||||
doc/html/pcre2_set_offset_limit.html \
|
doc/html/pcre2_set_offset_limit.html \
|
||||||
|
@ -152,6 +153,7 @@ dist_man_MANS = \
|
||||||
doc/pcre2_set_character_tables.3 \
|
doc/pcre2_set_character_tables.3 \
|
||||||
doc/pcre2_set_compile_recursion_guard.3 \
|
doc/pcre2_set_compile_recursion_guard.3 \
|
||||||
doc/pcre2_set_depth_limit.3 \
|
doc/pcre2_set_depth_limit.3 \
|
||||||
|
doc/pcre2_set_heap_limit.3 \
|
||||||
doc/pcre2_set_match_limit.3 \
|
doc/pcre2_set_match_limit.3 \
|
||||||
doc/pcre2_set_max_pattern_length.3 \
|
doc/pcre2_set_max_pattern_length.3 \
|
||||||
doc/pcre2_set_offset_limit.3 \
|
doc/pcre2_set_offset_limit.3 \
|
||||||
|
|
23
README
23
README
|
@ -223,10 +223,10 @@ library. They are also documented in the pcre2build man page.
|
||||||
|
|
||||||
--with-parens-nest-limit=500
|
--with-parens-nest-limit=500
|
||||||
|
|
||||||
. PCRE2 has a counter that can be set to limit the amount of resources it uses
|
. PCRE2 has a counter that can be set to limit the amount of computing resource
|
||||||
when matching a pattern. If the limit is exceeded during a match, the match
|
it uses when matching a pattern with the Perl-compatible matching function.
|
||||||
fails. The default is ten million. You can change the default by setting, for
|
If the limit is exceeded during a match, the match fails. The default is ten
|
||||||
example,
|
million. You can change the default by setting, for example,
|
||||||
|
|
||||||
--with-match-limit=500000
|
--with-match-limit=500000
|
||||||
|
|
||||||
|
@ -235,8 +235,8 @@ library. They are also documented in the pcre2build man page.
|
||||||
pcre2api man page (search for pcre2_set_match_limit).
|
pcre2api man page (search for pcre2_set_match_limit).
|
||||||
|
|
||||||
. There is a separate counter that limits the depth of nested backtracking
|
. There is a separate counter that limits the depth of nested backtracking
|
||||||
during a matching process, which in turn limits the amount of memory that is
|
during a matching process, which indirectly limits the amount of heap memory
|
||||||
used. This also has a default of ten million, which is essentially
|
that is used. This also has a default of ten million, which is essentially
|
||||||
"unlimited". You can change the default by setting, for example,
|
"unlimited". You can change the default by setting, for example,
|
||||||
|
|
||||||
--with-match-limit-depth=5000
|
--with-match-limit-depth=5000
|
||||||
|
@ -244,6 +244,15 @@ library. They are also documented in the pcre2build man page.
|
||||||
There is more discussion in the pcre2api man page (search for
|
There is more discussion in the pcre2api man page (search for
|
||||||
pcre2_set_depth_limit).
|
pcre2_set_depth_limit).
|
||||||
|
|
||||||
|
. You can also set an explicit limit on the amount of heap memory used by
|
||||||
|
the pcre2_match() interpreter:
|
||||||
|
|
||||||
|
--with-heap-limit=500
|
||||||
|
|
||||||
|
The units are kilobytes. This limit does not apply when the JIT optimization
|
||||||
|
(which has its own memory control features) is used. There is more discussion
|
||||||
|
on the pcre2api man page (search for pcre2_set_heap_limit).
|
||||||
|
|
||||||
. In the 8-bit library, the default maximum compiled pattern size is around
|
. In the 8-bit library, the default maximum compiled pattern size is around
|
||||||
64K bytes. You can increase this by adding --with-link-size=3 to the
|
64K bytes. You can increase this by adding --with-link-size=3 to the
|
||||||
"configure" command. PCRE2 then uses three bytes instead of two for offsets
|
"configure" command. PCRE2 then uses three bytes instead of two for offsets
|
||||||
|
@ -865,4 +874,4 @@ The distribution should contain the files listed below.
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
Email local part: ph10
|
Email local part: ph10
|
||||||
Email domain: cam.ac.uk
|
Email domain: cam.ac.uk
|
||||||
Last updated: 17 March 2017
|
Last updated: 11 April 2017
|
||||||
|
|
2
RunTest
2
RunTest
|
@ -489,7 +489,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
||||||
for opt in "" $jitopt; do
|
for opt in "" $jitopt; do
|
||||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput2 testtry
|
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput2 testtry
|
||||||
if [ $? = 0 ] ; then
|
if [ $? = 0 ] ; then
|
||||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -63,-62,-2,-1,0,100,188,189,190,191 >>testtry
|
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -64,-62,-2,-1,0,100,188,189,190,191 >>testtry
|
||||||
checkresult $? 2 "$opt"
|
checkresult $? 2 "$opt"
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
|
@ -36,6 +36,7 @@
|
||||||
#cmakedefine NEVER_BACKSLASH_C 1
|
#cmakedefine NEVER_BACKSLASH_C 1
|
||||||
|
|
||||||
#define LINK_SIZE @PCRE2_LINK_SIZE@
|
#define LINK_SIZE @PCRE2_LINK_SIZE@
|
||||||
|
#define HEAP_LIMIT @PCRE2_HEAP_LIMIT@
|
||||||
#define MATCH_LIMIT @PCRE2_MATCH_LIMIT@
|
#define MATCH_LIMIT @PCRE2_MATCH_LIMIT@
|
||||||
#define MATCH_LIMIT_DEPTH @PCRE2_MATCH_LIMIT_DEPTH@
|
#define MATCH_LIMIT_DEPTH @PCRE2_MATCH_LIMIT_DEPTH@
|
||||||
#define NEWLINE_DEFAULT @NEWLINE_DEFAULT@
|
#define NEWLINE_DEFAULT @NEWLINE_DEFAULT@
|
||||||
|
|
25
configure.ac
25
configure.ac
|
@ -263,6 +263,12 @@ AC_ARG_WITH(parens-nest-limit,
|
||||||
[nested parentheses limit (default=250)]),
|
[nested parentheses limit (default=250)]),
|
||||||
, with_parens_nest_limit=250)
|
, with_parens_nest_limit=250)
|
||||||
|
|
||||||
|
# Handle --with-heap-limit
|
||||||
|
AC_ARG_WITH(heap-limit,
|
||||||
|
AS_HELP_STRING([--with-heap-limit=N],
|
||||||
|
[default limit on heap memory (kilobytes, default=20000000)]),
|
||||||
|
, with_heap_limit=20000000)
|
||||||
|
|
||||||
# Handle --with-match-limit=N
|
# Handle --with-match-limit=N
|
||||||
AC_ARG_WITH(match-limit,
|
AC_ARG_WITH(match-limit,
|
||||||
AS_HELP_STRING([--with-match-limit=N],
|
AS_HELP_STRING([--with-match-limit=N],
|
||||||
|
@ -680,12 +686,12 @@ AC_DEFINE_UNQUOTED([PARENS_NEST_LIMIT], [$with_parens_nest_limit], [
|
||||||
stack that is used while compiling a pattern.])
|
stack that is used while compiling a pattern.])
|
||||||
|
|
||||||
AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [
|
AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [
|
||||||
The value of MATCH_LIMIT determines the default number of times the internal
|
The value of MATCH_LIMIT determines the default number of times the
|
||||||
match() function can record a backtrack position during a single matching
|
pcre2_match() function can record a backtrack position during a single
|
||||||
attempt. There is a runtime interface for setting a different limit. The
|
matching attempt. There is a runtime interface for setting a different limit.
|
||||||
limit exists in order to catch runaway regular expressions that take for ever
|
The limit exists in order to catch runaway regular expressions that take for
|
||||||
to determine that they do not match. The default is set very large so that it
|
ever to determine that they do not match. The default is set very large so
|
||||||
does not accidentally catch legitimate cases.])
|
that it does not accidentally catch legitimate cases.])
|
||||||
|
|
||||||
# --with-match-limit-recursion is an obsolete synonym for --with-match-limit-depth
|
# --with-match-limit-recursion is an obsolete synonym for --with-match-limit-depth
|
||||||
|
|
||||||
|
@ -694,7 +700,7 @@ cat <<EOF
|
||||||
|
|
||||||
WARNING: --with-match-limit-recursion is an obsolete option. Please use
|
WARNING: --with-match-limit-recursion is an obsolete option. Please use
|
||||||
--with-match-limit-depth in future. If both are set, --with-match-limit-depth
|
--with-match-limit-depth in future. If both are set, --with-match-limit-depth
|
||||||
will be used.
|
will be used. See also --with-heap-limit.
|
||||||
|
|
||||||
EOF
|
EOF
|
||||||
if test "$with_match_limit_depth" = "MATCH_LIMIT"; then
|
if test "$with_match_limit_depth" = "MATCH_LIMIT"; then
|
||||||
|
@ -711,6 +717,10 @@ AC_DEFINE_UNQUOTED([MATCH_LIMIT_DEPTH], [$with_match_limit_depth], [
|
||||||
be less than the value of MATCH_LIMIT. The default is to use the same value
|
be less than the value of MATCH_LIMIT. The default is to use the same value
|
||||||
as MATCH_LIMIT. There is a runtime method for setting a different limit.])
|
as MATCH_LIMIT. There is a runtime method for setting a different limit.])
|
||||||
|
|
||||||
|
AC_DEFINE_UNQUOTED([HEAP_LIMIT], [$with_heap_limit], [
|
||||||
|
This limits the amount of memory that pcre2_match() may use while matching
|
||||||
|
a pattern. The value is in kilobytes.])
|
||||||
|
|
||||||
AC_DEFINE([MAX_NAME_SIZE], [32], [
|
AC_DEFINE([MAX_NAME_SIZE], [32], [
|
||||||
This limit is parameterized just in case anybody ever wants to
|
This limit is parameterized just in case anybody ever wants to
|
||||||
change it. Care must be taken if it is increased, because it guards
|
change it. Care must be taken if it is increased, because it guards
|
||||||
|
@ -971,6 +981,7 @@ $PACKAGE-$VERSION configuration summary:
|
||||||
Rebuild char tables ................ : ${enable_rebuild_chartables}
|
Rebuild char tables ................ : ${enable_rebuild_chartables}
|
||||||
Internal link size ................. : ${with_link_size}
|
Internal link size ................. : ${with_link_size}
|
||||||
Nested parentheses limit ........... : ${with_parens_nest_limit}
|
Nested parentheses limit ........... : ${with_parens_nest_limit}
|
||||||
|
Heap limit ......................... : ${with_heap_limit} kilobytes
|
||||||
Match limit ........................ : ${with_match_limit}
|
Match limit ........................ : ${with_match_limit}
|
||||||
Match depth limit .................. : ${with_match_limit_depth}
|
Match depth limit .................. : ${with_match_limit_depth}
|
||||||
Build shared libs .................. : ${enable_shared}
|
Build shared libs .................. : ${enable_shared}
|
||||||
|
|
|
@ -223,10 +223,10 @@ library. They are also documented in the pcre2build man page.
|
||||||
|
|
||||||
--with-parens-nest-limit=500
|
--with-parens-nest-limit=500
|
||||||
|
|
||||||
. PCRE2 has a counter that can be set to limit the amount of resources it uses
|
. PCRE2 has a counter that can be set to limit the amount of computing resource
|
||||||
when matching a pattern. If the limit is exceeded during a match, the match
|
it uses when matching a pattern with the Perl-compatible matching function.
|
||||||
fails. The default is ten million. You can change the default by setting, for
|
If the limit is exceeded during a match, the match fails. The default is ten
|
||||||
example,
|
million. You can change the default by setting, for example,
|
||||||
|
|
||||||
--with-match-limit=500000
|
--with-match-limit=500000
|
||||||
|
|
||||||
|
@ -235,8 +235,8 @@ library. They are also documented in the pcre2build man page.
|
||||||
pcre2api man page (search for pcre2_set_match_limit).
|
pcre2api man page (search for pcre2_set_match_limit).
|
||||||
|
|
||||||
. There is a separate counter that limits the depth of nested backtracking
|
. There is a separate counter that limits the depth of nested backtracking
|
||||||
during a matching process, which in turn limits the amount of memory that is
|
during a matching process, which indirectly limits the amount of heap memory
|
||||||
used. This also has a default of ten million, which is essentially
|
that is used. This also has a default of ten million, which is essentially
|
||||||
"unlimited". You can change the default by setting, for example,
|
"unlimited". You can change the default by setting, for example,
|
||||||
|
|
||||||
--with-match-limit-depth=5000
|
--with-match-limit-depth=5000
|
||||||
|
@ -244,6 +244,15 @@ library. They are also documented in the pcre2build man page.
|
||||||
There is more discussion in the pcre2api man page (search for
|
There is more discussion in the pcre2api man page (search for
|
||||||
pcre2_set_depth_limit).
|
pcre2_set_depth_limit).
|
||||||
|
|
||||||
|
. You can also set an explicit limit on the amount of heap memory used by
|
||||||
|
the pcre2_match() interpreter:
|
||||||
|
|
||||||
|
--with-heap-limit=500
|
||||||
|
|
||||||
|
The units are kilobytes. This limit does not apply when the JIT optimization
|
||||||
|
(which has its own memory control features) is used. There is more discussion
|
||||||
|
on the pcre2api man page (search for pcre2_set_heap_limit).
|
||||||
|
|
||||||
. In the 8-bit library, the default maximum compiled pattern size is around
|
. In the 8-bit library, the default maximum compiled pattern size is around
|
||||||
64K bytes. You can increase this by adding --with-link-size=3 to the
|
64K bytes. You can increase this by adding --with-link-size=3 to the
|
||||||
"configure" command. PCRE2 then uses three bytes instead of two for offsets
|
"configure" command. PCRE2 then uses three bytes instead of two for offsets
|
||||||
|
@ -865,4 +874,4 @@ The distribution should contain the files listed below.
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
Email local part: ph10
|
Email local part: ph10
|
||||||
Email domain: cam.ac.uk
|
Email domain: cam.ac.uk
|
||||||
Last updated: 17 March 2017
|
Last updated: 11 April 2017
|
||||||
|
|
|
@ -213,6 +213,9 @@ in the library.
|
||||||
<tr><td><a href="pcre2_set_depth_limit.html">pcre2_set_depth_limit</a></td>
|
<tr><td><a href="pcre2_set_depth_limit.html">pcre2_set_depth_limit</a></td>
|
||||||
<td> Set the match backtracking depth limit</td></tr>
|
<td> Set the match backtracking depth limit</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre2_set_heap_limit.html">pcre2_set_heap_limit</a></td>
|
||||||
|
<td> Set the match backtracking heap limit</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre2_set_match_limit.html">pcre2_set_match_limit</a></td>
|
<tr><td><a href="pcre2_set_match_limit.html">pcre2_set_match_limit</a></td>
|
||||||
<td> Set the match limit</td></tr>
|
<td> Set the match limit</td></tr>
|
||||||
|
|
||||||
|
|
|
@ -45,6 +45,7 @@ point to a uint32_t integer variable. The available codes are:
|
||||||
PCRE2_CONFIG_BSR Indicates what \R matches by default:
|
PCRE2_CONFIG_BSR Indicates what \R matches by default:
|
||||||
PCRE2_BSR_UNICODE
|
PCRE2_BSR_UNICODE
|
||||||
PCRE2_BSR_ANYCRLF
|
PCRE2_BSR_ANYCRLF
|
||||||
|
PCRE2_CONFIG_HEAPLIMIT Default heap memory limit
|
||||||
PCRE2_CONFIG_DEPTHLIMIT Default backtracking depth limit
|
PCRE2_CONFIG_DEPTHLIMIT Default backtracking depth limit
|
||||||
PCRE2_CONFIG_JIT Availability of just-in-time compiler support (1=yes 0=no)
|
PCRE2_CONFIG_JIT Availability of just-in-time compiler support (1=yes 0=no)
|
||||||
PCRE2_CONFIG_JITTARGET Information (a string) about the target architecture for the JIT compiler
|
PCRE2_CONFIG_JITTARGET Information (a string) about the target architecture for the JIT compiler
|
||||||
|
|
|
@ -44,6 +44,7 @@ A match context is needed only if you want to:
|
||||||
<pre>
|
<pre>
|
||||||
Set up a callout function
|
Set up a callout function
|
||||||
Set a matching offset limit
|
Set a matching offset limit
|
||||||
|
Change the heap memory limit
|
||||||
Change the backtracking match limit
|
Change the backtracking match limit
|
||||||
Change the backtracking depth limit
|
Change the backtracking depth limit
|
||||||
Set custom memory management specifically for the match
|
Set custom memory management specifically for the match
|
||||||
|
|
|
@ -51,6 +51,7 @@ request are as follows:
|
||||||
PCRE2_INFO_FRAMESIZE Size of backtracking frame
|
PCRE2_INFO_FRAMESIZE Size of backtracking frame
|
||||||
PCRE2_INFO_HASBACKSLASHC Return 1 if pattern contains \C
|
PCRE2_INFO_HASBACKSLASHC Return 1 if pattern contains \C
|
||||||
PCRE2_INFO_HASCRORLF Return 1 if explicit CR or LF matches exist in the pattern
|
PCRE2_INFO_HASCRORLF Return 1 if explicit CR or LF matches exist in the pattern
|
||||||
|
PCRE2_INFO_HEAPLIMIT Heap memory limit if set, otherwise PCRE2_ERROR_UNSET
|
||||||
PCRE2_INFO_JCHANGED Return 1 if (?J) or (?-J) was used
|
PCRE2_INFO_JCHANGED Return 1 if (?J) or (?-J) was used
|
||||||
PCRE2_INFO_JITSIZE Size of JIT compiled code, or 0
|
PCRE2_INFO_JITSIZE Size of JIT compiled code, or 0
|
||||||
PCRE2_INFO_LASTCODETYPE Type of must-be-present information
|
PCRE2_INFO_LASTCODETYPE Type of must-be-present information
|
||||||
|
|
|
@ -182,6 +182,10 @@ document for an overview of all the PCRE2 documentation.
|
||||||
<b> PCRE2_SIZE <i>value</i>);</b>
|
<b> PCRE2_SIZE <i>value</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
|
<b>int pcre2_set_heap_limit(pcre2_match_context *<i>mcontext</i>,</b>
|
||||||
|
<b> uint32_t <i>value</i>);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
<b>int pcre2_set_match_limit(pcre2_match_context *<i>mcontext</i>,</b>
|
<b>int pcre2_set_match_limit(pcre2_match_context *<i>mcontext</i>,</b>
|
||||||
<b> uint32_t <i>value</i>);</b>
|
<b> uint32_t <i>value</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
|
@ -793,6 +797,7 @@ A match context is required if you want to:
|
||||||
<pre>
|
<pre>
|
||||||
Set up a callout function
|
Set up a callout function
|
||||||
Set an offset limit for matching an unanchored pattern
|
Set an offset limit for matching an unanchored pattern
|
||||||
|
Change the limit on the amount of heap used when matching
|
||||||
Change the backtracking match limit
|
Change the backtracking match limit
|
||||||
Change the backtracking depth limit
|
Change the backtracking depth limit
|
||||||
Set custom memory management specifically for the match
|
Set custom memory management specifically for the match
|
||||||
|
@ -851,14 +856,47 @@ subject strings. See also the PCRE2_FIRSTLINE option, which requires a match to
|
||||||
start within the first line of the subject. If this is set with an offset
|
start within the first line of the subject. If this is set with an offset
|
||||||
limit, a match must occur in the first line and also within the offset limit.
|
limit, a match must occur in the first line and also within the offset limit.
|
||||||
In other words, whichever limit comes first is used.
|
In other words, whichever limit comes first is used.
|
||||||
|
<b>int pcre2_set_heap_limit(pcre2_match_context *<i>mcontext</i>,</b>
|
||||||
|
<b> uint32_t <i>value</i>);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
The <i>heap_limit</i> parameter specifies, in units of kilobytes, the maximum
|
||||||
|
amount of heap memory that <b>pcre2_match()</b> may use to hold backtracking
|
||||||
|
information when running an interpretive match. This limit does not apply to
|
||||||
|
matching with the JIT optimization, which has its own memory control
|
||||||
|
arrangements (see the
|
||||||
|
<a href="pcre2jit.html"><b>pcre2jit</b></a>
|
||||||
|
documentation for more details), nor does it apply to <b>pcre2_dfa_match()</b>.
|
||||||
|
If the limit is reached, the negative error code PCRE2_ERROR_HEAPLIMIT is
|
||||||
|
returned. The default limit is set when PCRE2 is built; the default default is
|
||||||
|
very large and is essentially "unlimited".
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
A value for the heap limit may also be supplied by an item at the start of a
|
||||||
|
pattern of the form
|
||||||
|
<pre>
|
||||||
|
(*LIMIT_HEAP=ddd)
|
||||||
|
</pre>
|
||||||
|
where ddd is a decimal number. However, such a setting is ignored unless ddd is
|
||||||
|
less than the limit set by the caller of <b>pcre2_match()</b> or, if no such
|
||||||
|
limit is set, less than the default.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The <b>pcre2_match()</b> function starts out using a 20K vector on the system
|
||||||
|
stack for recording backtracking points. The more nested backtracking points
|
||||||
|
there are (that is, the deeper the search tree), the more memory is needed.
|
||||||
|
Heap memory is used only if the initial vector is too small. If the heap limit
|
||||||
|
is set to a value less than 21 (in particular, zero) no heap memory will be
|
||||||
|
used. In this case, only patterns that do not have a lot of nested backtracking
|
||||||
|
can be successfully processed.
|
||||||
<b>int pcre2_set_match_limit(pcre2_match_context *<i>mcontext</i>,</b>
|
<b>int pcre2_set_match_limit(pcre2_match_context *<i>mcontext</i>,</b>
|
||||||
<b> uint32_t <i>value</i>);</b>
|
<b> uint32_t <i>value</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
The <i>match_limit</i> parameter provides a means of preventing PCRE2 from using
|
The <i>match_limit</i> parameter provides a means of preventing PCRE2 from using
|
||||||
up too many resources when processing patterns that are not going to match, but
|
up too many computing resources when processing patterns that are not going to
|
||||||
which have a very large number of possibilities in their search trees. The
|
match, but which have a very large number of possibilities in their search
|
||||||
classic example is a pattern that uses nested unlimited repeats.
|
trees. The classic example is a pattern that uses nested unlimited repeats.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
There is an internal counter in <b>pcre2_match()</b> that is incremented each
|
There is an internal counter in <b>pcre2_match()</b> that is incremented each
|
||||||
|
@ -895,16 +933,20 @@ limit is set, less than the default.
|
||||||
This parameter limits the depth of nested backtracking in <b>pcre2_match()</b>.
|
This parameter limits the depth of nested backtracking in <b>pcre2_match()</b>.
|
||||||
Each time a nested backtracking point is passed, a new memory "frame" is used
|
Each time a nested backtracking point is passed, a new memory "frame" is used
|
||||||
to remember the state of matching at that point. Thus, this parameter
|
to remember the state of matching at that point. Thus, this parameter
|
||||||
indirectly limits the amount of memory that is used in a match.
|
indirectly limits the amount of memory that is used in a match. However,
|
||||||
|
because the size of each memory "frame" depends on the number of capturing
|
||||||
|
parentheses, the actual memory limit varies from pattern to pattern. This limit
|
||||||
|
was more useful in versions before 10.30, where function recursion was used for
|
||||||
|
backtracking.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
This limit is not relevant, and is ignored, when matching is done using JIT
|
The depth limit is not relevant, and is ignored, when matching is done using
|
||||||
compiled code. However, it is supported by <b>pcre2_dfa_match()</b>, which uses
|
JIT compiled code. However, it is supported by <b>pcre2_dfa_match()</b>, which
|
||||||
it to limit the depth of internal recursive function calls that implement
|
uses it to limit the depth of internal recursive function calls that implement
|
||||||
lookaround assertions and pattern recursions. This is, therefore, an indirect
|
atomic groups, lookaround assertions, and pattern recursions. This is,
|
||||||
limit on the amount of system stack that is used. A recursive pattern such as
|
therefore, an indirect limit on the amount of system stack that is used. A
|
||||||
/(.)(?1)/, when matched to a very long string using <b>pcre2_dfa_match()</b>,
|
recursive pattern such as /(.)(?1)/, when matched to a very long string using
|
||||||
can use a great deal of stack.
|
<b>pcre2_dfa_match()</b>, can use a great deal of stack.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The default value for the depth limit can be set when PCRE2 is built; the
|
The default value for the depth limit can be set when PCRE2 is built; the
|
||||||
|
@ -958,6 +1000,12 @@ The output is a uint32_t integer that gives the default limit for the depth of
|
||||||
nested backtracking in <b>pcre2_match()</b> or the depth of nested recursions
|
nested backtracking in <b>pcre2_match()</b> or the depth of nested recursions
|
||||||
and lookarounds in <b>pcre2_dfa_match()</b>. Further details are given with
|
and lookarounds in <b>pcre2_dfa_match()</b>. Further details are given with
|
||||||
<b>pcre2_set_depth_limit()</b> above.
|
<b>pcre2_set_depth_limit()</b> above.
|
||||||
|
<pre>
|
||||||
|
PCRE2_CONFIG_HEAPLIMIT
|
||||||
|
</pre>
|
||||||
|
The output is a uint32_t integer that gives, in kilobytes, the default limit
|
||||||
|
for the amount of heap memory used by <b>pcre2_match()</b>. Further details are
|
||||||
|
given with <b>pcre2_set_heap_limit()</b> above.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_CONFIG_JIT
|
PCRE2_CONFIG_JIT
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -1786,6 +1834,13 @@ Return 1 if the pattern contains any explicit matches for CR or LF characters,
|
||||||
otherwise 0. The third argument should point to an <b>uint32_t</b> variable. An
|
otherwise 0. The third argument should point to an <b>uint32_t</b> variable. An
|
||||||
explicit match is either a literal CR or LF character, or \r or \n or one of
|
explicit match is either a literal CR or LF character, or \r or \n or one of
|
||||||
the equivalent hexadecimal or octal escape sequences.
|
the equivalent hexadecimal or octal escape sequences.
|
||||||
|
<pre>
|
||||||
|
PCRE2_INFO_HEAPLIMIT
|
||||||
|
</pre>
|
||||||
|
If the pattern set a heap memory limit by including an item of the form
|
||||||
|
(*LIMIT_HEAP=nnnn) at the start, the value is returned. The third argument
|
||||||
|
should point to an unsigned 32-bit integer. If no such value has been set, the
|
||||||
|
call to <b>pcre2_pattern_info()</b> returns the error PCRE2_ERROR_UNSET.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_INFO_JCHANGED
|
PCRE2_INFO_JCHANGED
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -2554,7 +2609,8 @@ The backtracking match limit was reached.
|
||||||
</pre>
|
</pre>
|
||||||
If a pattern contains many nested backtracking points, heap memory is used to
|
If a pattern contains many nested backtracking points, heap memory is used to
|
||||||
remember them. This error is given when the memory allocation function (default
|
remember them. This error is given when the memory allocation function (default
|
||||||
or custom) fails.
|
or custom) fails. Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given
|
||||||
|
if the amount of memory needed exceeds the heap limit.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_ERROR_NULL
|
PCRE2_ERROR_NULL
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -3271,7 +3327,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC42" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC42" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 04 April 2017
|
Last updated: 11 April 2017
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2017 University of Cambridge.
|
Copyright © 1997-2017 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -265,17 +265,41 @@ to the <b>configure</b> command. This setting has no effect on the
|
||||||
(though the counting is done differently).
|
(though the counting is done differently).
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
In some environments it is desirable to limit the depth of nested backtracking
|
The <b>pcre2_match()</b> function starts out using a 20K vector on the system
|
||||||
in order to restrict the maximum amount of heap memory that is used. A second
|
stack to record backtracking points. The more nested backtracking points there
|
||||||
limit controls this; it defaults to the value that is set for
|
are (that is, the deeper the search tree), the more memory is needed. If the
|
||||||
--with-match-limit. You can set a lower default limit by adding, for example,
|
initial vector is not large enough, heap memory is used, up to a certain limit,
|
||||||
|
which is specified in kilobytes. The limit can be changed at run time, as
|
||||||
|
described in the
|
||||||
|
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||||
|
documentation. The default limit (in effect unlimited) is 20 million. You can
|
||||||
|
change this by a setting such as
|
||||||
|
<pre>
|
||||||
|
--with-heap-limit=500
|
||||||
|
</pre>
|
||||||
|
which limits the amount of heap to 500 kilobytes. This limit applies only to
|
||||||
|
interpretive matching in pcre2_match(). It does not apply when JIT (which has
|
||||||
|
its own memory arrangements) is used, nor does it apply to
|
||||||
|
<b>pcre2_dfa_match()</b>.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
You can also explicitly limit the depth of nested backtracking in the
|
||||||
|
<b>pcre2_match()</b> interpreter. This limit defaults to the value that is set
|
||||||
|
for --with-match-limit. You can set a lower default limit by adding, for
|
||||||
|
example,
|
||||||
<pre>
|
<pre>
|
||||||
--with-match-limit_depth=10000
|
--with-match-limit_depth=10000
|
||||||
</pre>
|
</pre>
|
||||||
to the <b>configure</b> command. This value can also be overridden at run time.
|
to the <b>configure</b> command. This value can be overridden at run time. This
|
||||||
As well as applying to <b>pcre2_match()</b>, this limit also controls the depth
|
depth limit indirectly limits the amount of heap memory that is used, but
|
||||||
of recursive function calls in <b>pcre2_dfa_match()</b>. These are used for
|
because the size of each backtracking "frame" depends on the number of
|
||||||
lookaround assertions, atomic groups, and recursion within patterns.
|
capturing parentheses in a pattern, the amount of heap that is used before the
|
||||||
|
limit is reached varies from pattern to pattern. This limit was more useful in
|
||||||
|
versions before 10.30, where function recursion was used for backtracking.
|
||||||
|
However, as well as applying to <b>pcre2_match()</b>, this limit also controls
|
||||||
|
the depth of recursive function calls in <b>pcre2_dfa_match()</b>. These are
|
||||||
|
used for lookaround assertions, atomic groups, and recursion within patterns.
|
||||||
|
The limit does not apply to JIT matching.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC12" href="#TOC1">CREATING CHARACTER TABLES AT BUILD TIME</a><br>
|
<br><a name="SEC12" href="#TOC1">CREATING CHARACTER TABLES AT BUILD TIME</a><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -530,7 +554,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC25" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC25" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 31 March 2017
|
Last updated: 10 April 2017
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2017 University of Cambridge.
|
Copyright © 1997-2017 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -404,6 +404,10 @@ file name is followed by a colon; for context lines, a hyphen separator is used.
|
||||||
If a line number is also being output, it follows the file name.
|
If a line number is also being output, it follows the file name.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
<b>--heap-limit</b>=<i>number</i>
|
||||||
|
See <b>--match-limit</b> below.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
<b>--help</b>
|
<b>--help</b>
|
||||||
Output a help message, giving brief details of the command options and file
|
Output a help message, giving brief details of the command options and file
|
||||||
type support, and then exit. Anything else on the command line is
|
type support, and then exit. Anything else on the command line is
|
||||||
|
@ -505,7 +509,7 @@ used. There is no short form for this option.
|
||||||
<b>--match-limit</b>=<i>number</i>
|
<b>--match-limit</b>=<i>number</i>
|
||||||
Processing some regular expression patterns may take a very long time to search
|
Processing some regular expression patterns may take a very long time to search
|
||||||
for all possible matching strings. Others may require a very large amount of
|
for all possible matching strings. Others may require a very large amount of
|
||||||
memory. There are two options that set resource limits for matching.
|
memory. There are three options that set resource limits for matching.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
The <b>--match-limit</b> option provides a means of limiting computing resource
|
The <b>--match-limit</b> option provides a means of limiting computing resource
|
||||||
|
@ -516,13 +520,24 @@ counter that is incremented each time around its main processing loop. If the
|
||||||
value set by <b>--match-limit</b> is reached, an error occurs.
|
value set by <b>--match-limit</b> is reached, an error occurs.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
|
The <b>--heap-limit</b> option specifies, as a number of kilobytes, the amount
|
||||||
|
of heap memory that may be used for matching. Heap memory is needed only if
|
||||||
|
matching the pattern requires a significant number of nested backtracking
|
||||||
|
points to be remembered. This parameter can be set to zero to forbid the use of
|
||||||
|
heap memory altogether.
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
The <b>--depth-limit</b> option limits the depth of nested backtracking points,
|
The <b>--depth-limit</b> option limits the depth of nested backtracking points,
|
||||||
which in turn limits the amount of memory that is used. This limit is of use
|
which indirectly limits the amount of memory that is used. The amount of memory
|
||||||
only if it is set smaller than <b>--match-limit</b>.
|
needed for each backtracking point depends on the number of capturing
|
||||||
|
parentheses in the pattern, so the amount of memory that is used before this
|
||||||
|
limit acts varies from pattern to pattern. This limit is of use only if it is
|
||||||
|
set smaller than <b>--match-limit</b>.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
There are no short forms for these options. The default settings are specified
|
There are no short forms for these options. The default settings are specified
|
||||||
when the PCRE2 library is compiled, with the default default being 10 million.
|
when the PCRE2 library is compiled, with the default defaults being very large
|
||||||
|
and so effectively unlimited.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
\fB--max-buffer-size=<i>number</i>
|
\fB--max-buffer-size=<i>number</i>
|
||||||
|
@ -764,11 +779,12 @@ Many of the short and long forms of <b>pcre2grep</b>'s options are the same
|
||||||
as in the GNU <b>grep</b> program. Any long option of the form
|
as in the GNU <b>grep</b> program. Any long option of the form
|
||||||
<b>--xxx-regexp</b> (GNU terminology) is also available as <b>--xxx-regex</b>
|
<b>--xxx-regexp</b> (GNU terminology) is also available as <b>--xxx-regex</b>
|
||||||
(PCRE2 terminology). However, the <b>--depth-limit</b>, <b>--file-list</b>,
|
(PCRE2 terminology). However, the <b>--depth-limit</b>, <b>--file-list</b>,
|
||||||
<b>--file-offsets</b>, <b>--include-dir</b>, <b>--line-offsets</b>,
|
<b>--file-offsets</b>, <b>--heap-limit</b>, <b>--include-dir</b>,
|
||||||
<b>--locale</b>, <b>--match-limit</b>, <b>-M</b>, <b>--multiline</b>, <b>-N</b>,
|
<b>--line-offsets</b>, <b>--locale</b>, <b>--match-limit</b>, <b>-M</b>,
|
||||||
<b>--newline</b>, <b>--om-separator</b>, <b>--output</b>, <b>-u</b>, and
|
<b>--multiline</b>, <b>-N</b>, <b>--newline</b>, <b>--om-separator</b>,
|
||||||
<b>--utf-8</b> options are specific to <b>pcre2grep</b>, as is the use of the
|
<b>--output</b>, <b>-u</b>, and <b>--utf-8</b> options are specific to
|
||||||
<b>--only-matching</b> option with a capturing parentheses number.
|
<b>pcre2grep</b>, as is the use of the <b>--only-matching</b> option with a
|
||||||
|
capturing parentheses number.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Although most of the common options work the same way, a few are different in
|
Although most of the common options work the same way, a few are different in
|
||||||
|
@ -891,9 +907,9 @@ there are more than 20 such errors, <b>pcre2grep</b> gives up.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <b>--match-limit</b> option of <b>pcre2grep</b> can be used to set the
|
The <b>--match-limit</b> option of <b>pcre2grep</b> can be used to set the
|
||||||
overall resource limit; there is a second option called <b>--depth-limit</b>
|
overall resource limit. There are also other limits that affect the amount of
|
||||||
that sets a limit on the amount of memory that is used (see the discussion of
|
memory used during matching; see the discussion of <b>--heap-limit</b> and
|
||||||
these options above).
|
<b>--depth-limit</b> above.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC12" href="#TOC1">DIAGNOSTICS</a><br>
|
<br><a name="SEC12" href="#TOC1">DIAGNOSTICS</a><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -918,7 +934,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC15" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC15" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 06 April 2017
|
Last updated: 11 April 2017
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2017 University of Cambridge.
|
Copyright © 1997-2017 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -170,14 +170,15 @@ the application to apply the JIT optimization by calling
|
||||||
<b>pcre2_jit_compile()</b> is ignored.
|
<b>pcre2_jit_compile()</b> is ignored.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
Setting match and backtracking depth limits
|
Setting match resource limits
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
The pcre2_match() function contains a counter that is incremented every time it
|
The pcre2_match() function contains a counter that is incremented every time it
|
||||||
goes round its main loop. The caller of <b>pcre2_match()</b> can set a limit on
|
goes round its main loop. The caller of <b>pcre2_match()</b> can set a limit on
|
||||||
this counter, which therefore limits the amount of computing resource used for
|
this counter, which therefore limits the amount of computing resource used for
|
||||||
a match. The maximum depth of nested backtracking can also be limited, and this
|
a match. The maximum depth of nested backtracking can also be limited; this
|
||||||
restricts the amount of heap memory that is used.
|
indirectly restricts the amount of heap memory that is used, but there is also
|
||||||
|
an explicit memory limit that can be set.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
These facilities are provided to catch runaway matches that are provoked by
|
These facilities are provided to catch runaway matches that are provoked by
|
||||||
|
@ -186,6 +187,7 @@ unlimited repeats applied to a long string that does not match). When one of
|
||||||
these limits is reached, <b>pcre2_match()</b> gives an error return. The limits
|
these limits is reached, <b>pcre2_match()</b> gives an error return. The limits
|
||||||
can also be set by items at the start of the pattern of the form
|
can also be set by items at the start of the pattern of the form
|
||||||
<pre>
|
<pre>
|
||||||
|
(*LIMIT_HEAP=d)
|
||||||
(*LIMIT_MATCH=d)
|
(*LIMIT_MATCH=d)
|
||||||
(*LIMIT_DEPTH=d)
|
(*LIMIT_DEPTH=d)
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -200,11 +202,13 @@ Prior to release 10.30, LIMIT_DEPTH was called LIMIT_RECURSION. This name is
|
||||||
still recognized for backwards compatibility.
|
still recognized for backwards compatibility.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The match limit is used (but in a different way) when JIT is being used, but it
|
The heap limit applies only when the <b>pcre2_match()</b> interpreter is used
|
||||||
is not relevant, and is ignored, when matching with <b>pcre2_dfa_match()</b>.
|
for matching. It does not apply to JIT or DFA matching. The match limit is used
|
||||||
However, the depth limit is relevant for DFA matching, which uses function
|
(but in a different way) when JIT is being used, but it is not relevant, and is
|
||||||
recursion for recursions within the pattern. In this case, the depth limit
|
ignored, when matching with <b>pcre2_dfa_match()</b>. The depth limit is ignored
|
||||||
controls the amount of system stack that is used.
|
by JIT but is relevant for DFA matching, which uses function recursion for
|
||||||
|
recursions within the pattern. In this case, the depth limit controls the
|
||||||
|
amount of system stack that is used.
|
||||||
<a name="newlines"></a></P>
|
<a name="newlines"></a></P>
|
||||||
<br><b>
|
<br><b>
|
||||||
Newline conventions
|
Newline conventions
|
||||||
|
@ -3434,7 +3438,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC30" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC30" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 03 April 2017
|
Last updated: 11 April 2017
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2017 University of Cambridge.
|
Copyright © 1997-2017 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -83,11 +83,12 @@ From release 10.30, the interpretive (non-JIT) version of <b>pcre2_match()</b>
|
||||||
uses very little system stack at run time. In earlier releases recursive
|
uses very little system stack at run time. In earlier releases recursive
|
||||||
function calls could use a great deal of stack, and this could cause problems,
|
function calls could use a great deal of stack, and this could cause problems,
|
||||||
but this usage has been eliminated. Backtracking positions are now explicitly
|
but this usage has been eliminated. Backtracking positions are now explicitly
|
||||||
remembered in memory frames controlled by the code. An initial 10K vector of
|
remembered in memory frames controlled by the code. An initial 20K vector of
|
||||||
frames is allocated on the system stack (enough for about 50 frames for small
|
frames is allocated on the system stack (enough for about 100 frames for small
|
||||||
patterns), but if this is insufficient, heap memory is used. Rewriting patterns
|
patterns), but if this is insufficient, heap memory is used. The amount of heap
|
||||||
to be time-efficient, as described below, may also reduce the memory
|
memory can be limited; if the limit is set to zero, only the initial stack
|
||||||
requirements.
|
vector is used. Rewriting patterns to be time-efficient, as described below,
|
||||||
|
may also reduce the memory requirements.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
In contrast to <b>pcre2_match()</b>, <b>pcre2_dfa_match()</b> does use recursive
|
In contrast to <b>pcre2_match()</b>, <b>pcre2_dfa_match()</b> does use recursive
|
||||||
|
@ -243,7 +244,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 31 March 2017
|
Last updated: 08 April 2017
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2017 University of Cambridge.
|
Copyright © 1997-2017 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -235,6 +235,12 @@ Behave as if each pattern line has the <b>jit</b> modifier; after successful
|
||||||
compilation, each pattern is passed to the just-in-time compiler, if available.
|
compilation, each pattern is passed to the just-in-time compiler, if available.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
<b>-jitverify</b>
|
||||||
|
Behave as if each pattern line has the <b>jitverify</b> modifier; after
|
||||||
|
successful compilation, each pattern is passed to the just-in-time compiler, if
|
||||||
|
available, and the use of JIT is verified.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
\fB-pattern\fB <i>modifier-list</i>
|
\fB-pattern\fB <i>modifier-list</i>
|
||||||
Behave as if each pattern line contains the given modifiers.
|
Behave as if each pattern line contains the given modifiers.
|
||||||
</P>
|
</P>
|
||||||
|
@ -1088,6 +1094,7 @@ pattern.
|
||||||
get=<number or name> extract captured substring
|
get=<number or name> extract captured substring
|
||||||
getall extract all captured substrings
|
getall extract all captured substrings
|
||||||
/g global global matching
|
/g global global matching
|
||||||
|
heap_limit=<n> set a limit on heap memory
|
||||||
jitstack=<n> set size of JIT stack
|
jitstack=<n> set size of JIT stack
|
||||||
mark show mark values
|
mark show mark values
|
||||||
match_limit=<n> set a match limit
|
match_limit=<n> set a match limit
|
||||||
|
@ -1330,11 +1337,11 @@ stack that is larger than the default 32K is necessary only for very
|
||||||
complicated patterns.
|
complicated patterns.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
Setting match and depth limits
|
Setting heap, match, and depth limits
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
The <b>match_limit</b> and <b>depth_limit</b> modifiers set the appropriate
|
The <b>heap_limit</b>, <b>match_limit</b>, and <b>depth_limit</b> modifiers set
|
||||||
limits in the match context. These values are ignored when the
|
the appropriate limits in the match context. These values are ignored when the
|
||||||
<b>find_limits</b> modifier is specified.
|
<b>find_limits</b> modifier is specified.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
|
@ -1343,8 +1350,8 @@ Finding minimum limits
|
||||||
<P>
|
<P>
|
||||||
If the <b>find_limits</b> modifier is present on a subject line, <b>pcre2test</b>
|
If the <b>find_limits</b> modifier is present on a subject line, <b>pcre2test</b>
|
||||||
calls the relevant matching function several times, setting different values in
|
calls the relevant matching function several times, setting different values in
|
||||||
the match context via <b>pcre2_set_match_limit()</b> or
|
the match context via <b>pcre2_set_heap_limit(), \fBpcre2_set_match_limit()</b>,
|
||||||
<b>pcre2_set_depth_limit()</b> until it finds the minimum values for each
|
or <b>pcre2_set_depth_limit()</b> until it finds the minimum values for each
|
||||||
parameter that allows the match to complete without error.
|
parameter that allows the match to complete without error.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -1360,8 +1367,8 @@ increasing length of subject string.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
For non-DFA matching, the minimum <i>depth_limit</i> number is a measure of how
|
For non-DFA matching, the minimum <i>depth_limit</i> number is a measure of how
|
||||||
much memory for recording backtracking points is needed to complete the match
|
much nested backtracking happens (that is, how deeply the pattern's tree is
|
||||||
attempt. In the case of DFA matching, <i>depth_limit</i> controls the depth of
|
searched). In the case of DFA matching, <i>depth_limit</i> controls the depth of
|
||||||
recursive calls of the internal function that is used for handling pattern
|
recursive calls of the internal function that is used for handling pattern
|
||||||
recursion, lookaround assertions, and atomic groups.
|
recursion, lookaround assertions, and atomic groups.
|
||||||
</P>
|
</P>
|
||||||
|
@ -1800,7 +1807,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 04 April 2017
|
Last updated: 11 April 2017
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2017 University of Cambridge.
|
Copyright © 1997-2017 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -213,6 +213,9 @@ in the library.
|
||||||
<tr><td><a href="pcre2_set_depth_limit.html">pcre2_set_depth_limit</a></td>
|
<tr><td><a href="pcre2_set_depth_limit.html">pcre2_set_depth_limit</a></td>
|
||||||
<td> Set the match backtracking depth limit</td></tr>
|
<td> Set the match backtracking depth limit</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre2_set_heap_limit.html">pcre2_set_heap_limit</a></td>
|
||||||
|
<td> Set the match backtracking heap limit</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre2_set_match_limit.html">pcre2_set_match_limit</a></td>
|
<tr><td><a href="pcre2_set_match_limit.html">pcre2_set_match_limit</a></td>
|
||||||
<td> Set the match limit</td></tr>
|
<td> Set the match limit</td></tr>
|
||||||
|
|
||||||
|
|
2160
doc/pcre2.txt
2160
doc/pcre2.txt
File diff suppressed because it is too large
Load Diff
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2_CONFIG 3 "24 March 2017" "PCRE2 10.30"
|
.TH PCRE2_CONFIG 3 "11 April 2017" "PCRE2 10.30"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -31,6 +31,7 @@ point to a uint32_t integer variable. The available codes are:
|
||||||
PCRE2_CONFIG_BSR Indicates what \eR matches by default:
|
PCRE2_CONFIG_BSR Indicates what \eR matches by default:
|
||||||
PCRE2_BSR_UNICODE
|
PCRE2_BSR_UNICODE
|
||||||
PCRE2_BSR_ANYCRLF
|
PCRE2_BSR_ANYCRLF
|
||||||
|
PCRE2_CONFIG_HEAPLIMIT Default heap memory limit
|
||||||
PCRE2_CONFIG_DEPTHLIMIT Default backtracking depth limit
|
PCRE2_CONFIG_DEPTHLIMIT Default backtracking depth limit
|
||||||
.\" JOIN
|
.\" JOIN
|
||||||
PCRE2_CONFIG_JIT Availability of just-in-time compiler
|
PCRE2_CONFIG_JIT Availability of just-in-time compiler
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2_MATCH 3 "04 April 2017" "PCRE2 10.30"
|
.TH PCRE2_MATCH 3 "11 April 2017" "PCRE2 10.30"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -32,6 +32,7 @@ A match context is needed only if you want to:
|
||||||
.sp
|
.sp
|
||||||
Set up a callout function
|
Set up a callout function
|
||||||
Set a matching offset limit
|
Set a matching offset limit
|
||||||
|
Change the heap memory limit
|
||||||
Change the backtracking match limit
|
Change the backtracking match limit
|
||||||
Change the backtracking depth limit
|
Change the backtracking depth limit
|
||||||
Set custom memory management specifically for the match
|
Set custom memory management specifically for the match
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2_PATTERN_INFO 3 "25 March 2017" "PCRE2 10.30"
|
.TH PCRE2_PATTERN_INFO 3 "11 April 2017" "PCRE2 10.30"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -43,6 +43,9 @@ request are as follows:
|
||||||
.\" JOIN
|
.\" JOIN
|
||||||
PCRE2_INFO_HASCRORLF Return 1 if explicit CR or LF matches
|
PCRE2_INFO_HASCRORLF Return 1 if explicit CR or LF matches
|
||||||
exist in the pattern
|
exist in the pattern
|
||||||
|
.\" JOIN
|
||||||
|
PCRE2_INFO_HEAPLIMIT Heap memory limit if set,
|
||||||
|
otherwise PCRE2_ERROR_UNSET
|
||||||
PCRE2_INFO_JCHANGED Return 1 if (?J) or (?-J) was used
|
PCRE2_INFO_JCHANGED Return 1 if (?J) or (?-J) was used
|
||||||
PCRE2_INFO_JITSIZE Size of JIT compiled code, or 0
|
PCRE2_INFO_JITSIZE Size of JIT compiled code, or 0
|
||||||
PCRE2_INFO_LASTCODETYPE Type of must-be-present information
|
PCRE2_INFO_LASTCODETYPE Type of must-be-present information
|
||||||
|
|
|
@ -0,0 +1,28 @@
|
||||||
|
.TH PCRE2_SET_DEPTH_LIMIT 3 "11 April 2017" "PCRE2 10.30"
|
||||||
|
.SH NAME
|
||||||
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
|
.SH SYNOPSIS
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
.B #include <pcre2.h>
|
||||||
|
.PP
|
||||||
|
.nf
|
||||||
|
.B int pcre2_set_heap_limit(pcre2_match_context *\fImcontext\fP,
|
||||||
|
.B " uint32_t \fIvalue\fP);"
|
||||||
|
.fi
|
||||||
|
.
|
||||||
|
.SH DESCRIPTION
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
This function sets the backtracking heap limit field in a match context. The
|
||||||
|
result is always zero.
|
||||||
|
.P
|
||||||
|
There is a complete description of the PCRE2 native API in the
|
||||||
|
.\" HREF
|
||||||
|
\fBpcre2api\fP
|
||||||
|
.\"
|
||||||
|
page and a description of the POSIX API in the
|
||||||
|
.\" HREF
|
||||||
|
\fBpcre2posix\fP
|
||||||
|
.\"
|
||||||
|
page.
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2API 3 "04 April 2017" "PCRE2 10.30"
|
.TH PCRE2API 3 "11 April 2017" "PCRE2 10.30"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.sp
|
.sp
|
||||||
|
@ -123,6 +123,9 @@ document for an overview of all the PCRE2 documentation.
|
||||||
.B int pcre2_set_offset_limit(pcre2_match_context *\fImcontext\fP,
|
.B int pcre2_set_offset_limit(pcre2_match_context *\fImcontext\fP,
|
||||||
.B " PCRE2_SIZE \fIvalue\fP);"
|
.B " PCRE2_SIZE \fIvalue\fP);"
|
||||||
.sp
|
.sp
|
||||||
|
.B int pcre2_set_heap_limit(pcre2_match_context *\fImcontext\fP,
|
||||||
|
.B " uint32_t \fIvalue\fP);"
|
||||||
|
.sp
|
||||||
.B int pcre2_set_match_limit(pcre2_match_context *\fImcontext\fP,
|
.B int pcre2_set_match_limit(pcre2_match_context *\fImcontext\fP,
|
||||||
.B " uint32_t \fIvalue\fP);"
|
.B " uint32_t \fIvalue\fP);"
|
||||||
.sp
|
.sp
|
||||||
|
@ -753,6 +756,7 @@ A match context is required if you want to:
|
||||||
.sp
|
.sp
|
||||||
Set up a callout function
|
Set up a callout function
|
||||||
Set an offset limit for matching an unanchored pattern
|
Set an offset limit for matching an unanchored pattern
|
||||||
|
Change the limit on the amount of heap used when matching
|
||||||
Change the backtracking match limit
|
Change the backtracking match limit
|
||||||
Change the backtracking depth limit
|
Change the backtracking depth limit
|
||||||
Set custom memory management specifically for the match
|
Set custom memory management specifically for the match
|
||||||
|
@ -816,14 +820,49 @@ limit, a match must occur in the first line and also within the offset limit.
|
||||||
In other words, whichever limit comes first is used.
|
In other words, whichever limit comes first is used.
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
|
.B int pcre2_set_heap_limit(pcre2_match_context *\fImcontext\fP,
|
||||||
|
.B " uint32_t \fIvalue\fP);"
|
||||||
|
.fi
|
||||||
|
.sp
|
||||||
|
The \fIheap_limit\fP parameter specifies, in units of kilobytes, the maximum
|
||||||
|
amount of heap memory that \fBpcre2_match()\fP may use to hold backtracking
|
||||||
|
information when running an interpretive match. This limit does not apply to
|
||||||
|
matching with the JIT optimization, which has its own memory control
|
||||||
|
arrangements (see the
|
||||||
|
.\" HREF
|
||||||
|
\fBpcre2jit\fP
|
||||||
|
.\"
|
||||||
|
documentation for more details), nor does it apply to \fBpcre2_dfa_match()\fP.
|
||||||
|
If the limit is reached, the negative error code PCRE2_ERROR_HEAPLIMIT is
|
||||||
|
returned. The default limit is set when PCRE2 is built; the default default is
|
||||||
|
very large and is essentially "unlimited".
|
||||||
|
.P
|
||||||
|
A value for the heap limit may also be supplied by an item at the start of a
|
||||||
|
pattern of the form
|
||||||
|
.sp
|
||||||
|
(*LIMIT_HEAP=ddd)
|
||||||
|
.sp
|
||||||
|
where ddd is a decimal number. However, such a setting is ignored unless ddd is
|
||||||
|
less than the limit set by the caller of \fBpcre2_match()\fP or, if no such
|
||||||
|
limit is set, less than the default.
|
||||||
|
.P
|
||||||
|
The \fBpcre2_match()\fP function starts out using a 20K vector on the system
|
||||||
|
stack for recording backtracking points. The more nested backtracking points
|
||||||
|
there are (that is, the deeper the search tree), the more memory is needed.
|
||||||
|
Heap memory is used only if the initial vector is too small. If the heap limit
|
||||||
|
is set to a value less than 21 (in particular, zero) no heap memory will be
|
||||||
|
used. In this case, only patterns that do not have a lot of nested backtracking
|
||||||
|
can be successfully processed.
|
||||||
|
.sp
|
||||||
|
.nf
|
||||||
.B int pcre2_set_match_limit(pcre2_match_context *\fImcontext\fP,
|
.B int pcre2_set_match_limit(pcre2_match_context *\fImcontext\fP,
|
||||||
.B " uint32_t \fIvalue\fP);"
|
.B " uint32_t \fIvalue\fP);"
|
||||||
.fi
|
.fi
|
||||||
.sp
|
.sp
|
||||||
The \fImatch_limit\fP parameter provides a means of preventing PCRE2 from using
|
The \fImatch_limit\fP parameter provides a means of preventing PCRE2 from using
|
||||||
up too many resources when processing patterns that are not going to match, but
|
up too many computing resources when processing patterns that are not going to
|
||||||
which have a very large number of possibilities in their search trees. The
|
match, but which have a very large number of possibilities in their search
|
||||||
classic example is a pattern that uses nested unlimited repeats.
|
trees. The classic example is a pattern that uses nested unlimited repeats.
|
||||||
.P
|
.P
|
||||||
There is an internal counter in \fBpcre2_match()\fP that is incremented each
|
There is an internal counter in \fBpcre2_match()\fP that is incremented each
|
||||||
time round its main matching loop. If this value reaches the match limit,
|
time round its main matching loop. If this value reaches the match limit,
|
||||||
|
@ -859,15 +898,19 @@ limit is set, less than the default.
|
||||||
This parameter limits the depth of nested backtracking in \fBpcre2_match()\fP.
|
This parameter limits the depth of nested backtracking in \fBpcre2_match()\fP.
|
||||||
Each time a nested backtracking point is passed, a new memory "frame" is used
|
Each time a nested backtracking point is passed, a new memory "frame" is used
|
||||||
to remember the state of matching at that point. Thus, this parameter
|
to remember the state of matching at that point. Thus, this parameter
|
||||||
indirectly limits the amount of memory that is used in a match.
|
indirectly limits the amount of memory that is used in a match. However,
|
||||||
|
because the size of each memory "frame" depends on the number of capturing
|
||||||
|
parentheses, the actual memory limit varies from pattern to pattern. This limit
|
||||||
|
was more useful in versions before 10.30, where function recursion was used for
|
||||||
|
backtracking.
|
||||||
.P
|
.P
|
||||||
This limit is not relevant, and is ignored, when matching is done using JIT
|
The depth limit is not relevant, and is ignored, when matching is done using
|
||||||
compiled code. However, it is supported by \fBpcre2_dfa_match()\fP, which uses
|
JIT compiled code. However, it is supported by \fBpcre2_dfa_match()\fP, which
|
||||||
it to limit the depth of internal recursive function calls that implement
|
uses it to limit the depth of internal recursive function calls that implement
|
||||||
lookaround assertions and pattern recursions. This is, therefore, an indirect
|
atomic groups, lookaround assertions, and pattern recursions. This is,
|
||||||
limit on the amount of system stack that is used. A recursive pattern such as
|
therefore, an indirect limit on the amount of system stack that is used. A
|
||||||
/(.)(?1)/, when matched to a very long string using \fBpcre2_dfa_match()\fP,
|
recursive pattern such as /(.)(?1)/, when matched to a very long string using
|
||||||
can use a great deal of stack.
|
\fBpcre2_dfa_match()\fP, can use a great deal of stack.
|
||||||
.P
|
.P
|
||||||
The default value for the depth limit can be set when PCRE2 is built; the
|
The default value for the depth limit can be set when PCRE2 is built; the
|
||||||
default default is the same value as the default for the match limit. If the
|
default default is the same value as the default for the match limit. If the
|
||||||
|
@ -921,6 +964,12 @@ The output is a uint32_t integer that gives the default limit for the depth of
|
||||||
nested backtracking in \fBpcre2_match()\fP or the depth of nested recursions
|
nested backtracking in \fBpcre2_match()\fP or the depth of nested recursions
|
||||||
and lookarounds in \fBpcre2_dfa_match()\fP. Further details are given with
|
and lookarounds in \fBpcre2_dfa_match()\fP. Further details are given with
|
||||||
\fBpcre2_set_depth_limit()\fP above.
|
\fBpcre2_set_depth_limit()\fP above.
|
||||||
|
.sp
|
||||||
|
PCRE2_CONFIG_HEAPLIMIT
|
||||||
|
.sp
|
||||||
|
The output is a uint32_t integer that gives, in kilobytes, the default limit
|
||||||
|
for the amount of heap memory used by \fBpcre2_match()\fP. Further details are
|
||||||
|
given with \fBpcre2_set_heap_limit()\fP above.
|
||||||
.sp
|
.sp
|
||||||
PCRE2_CONFIG_JIT
|
PCRE2_CONFIG_JIT
|
||||||
.sp
|
.sp
|
||||||
|
@ -1784,6 +1833,13 @@ Return 1 if the pattern contains any explicit matches for CR or LF characters,
|
||||||
otherwise 0. The third argument should point to an \fBuint32_t\fP variable. An
|
otherwise 0. The third argument should point to an \fBuint32_t\fP variable. An
|
||||||
explicit match is either a literal CR or LF character, or \er or \en or one of
|
explicit match is either a literal CR or LF character, or \er or \en or one of
|
||||||
the equivalent hexadecimal or octal escape sequences.
|
the equivalent hexadecimal or octal escape sequences.
|
||||||
|
.sp
|
||||||
|
PCRE2_INFO_HEAPLIMIT
|
||||||
|
.sp
|
||||||
|
If the pattern set a heap memory limit by including an item of the form
|
||||||
|
(*LIMIT_HEAP=nnnn) at the start, the value is returned. The third argument
|
||||||
|
should point to an unsigned 32-bit integer. If no such value has been set, the
|
||||||
|
call to \fBpcre2_pattern_info()\fP returns the error PCRE2_ERROR_UNSET.
|
||||||
.sp
|
.sp
|
||||||
PCRE2_INFO_JCHANGED
|
PCRE2_INFO_JCHANGED
|
||||||
.sp
|
.sp
|
||||||
|
@ -2603,7 +2659,8 @@ The backtracking match limit was reached.
|
||||||
.sp
|
.sp
|
||||||
If a pattern contains many nested backtracking points, heap memory is used to
|
If a pattern contains many nested backtracking points, heap memory is used to
|
||||||
remember them. This error is given when the memory allocation function (default
|
remember them. This error is given when the memory allocation function (default
|
||||||
or custom) fails.
|
or custom) fails. Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given
|
||||||
|
if the amount of memory needed exceeds the heap limit.
|
||||||
.sp
|
.sp
|
||||||
PCRE2_ERROR_NULL
|
PCRE2_ERROR_NULL
|
||||||
.sp
|
.sp
|
||||||
|
@ -3322,6 +3379,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 04 April 2017
|
Last updated: 11 April 2017
|
||||||
Copyright (c) 1997-2017 University of Cambridge.
|
Copyright (c) 1997-2017 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2BUILD 3 "31 March 2017" "PCRE2 10.30"
|
.TH PCRE2BUILD 3 "10 April 2017" "PCRE2 10.30"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.
|
.
|
||||||
|
@ -260,17 +260,42 @@ to the \fBconfigure\fP command. This setting has no effect on the
|
||||||
\fBpcre2_dfa_match()\fP matching function, but it does also limit JIT matching
|
\fBpcre2_dfa_match()\fP matching function, but it does also limit JIT matching
|
||||||
(though the counting is done differently).
|
(though the counting is done differently).
|
||||||
.P
|
.P
|
||||||
In some environments it is desirable to limit the depth of nested backtracking
|
The \fBpcre2_match()\fP function starts out using a 20K vector on the system
|
||||||
in order to restrict the maximum amount of heap memory that is used. A second
|
stack to record backtracking points. The more nested backtracking points there
|
||||||
limit controls this; it defaults to the value that is set for
|
are (that is, the deeper the search tree), the more memory is needed. If the
|
||||||
--with-match-limit. You can set a lower default limit by adding, for example,
|
initial vector is not large enough, heap memory is used, up to a certain limit,
|
||||||
|
which is specified in kilobytes. The limit can be changed at run time, as
|
||||||
|
described in the
|
||||||
|
.\" HREF
|
||||||
|
\fBpcre2api\fP
|
||||||
|
.\"
|
||||||
|
documentation. The default limit (in effect unlimited) is 20 million. You can
|
||||||
|
change this by a setting such as
|
||||||
|
.sp
|
||||||
|
--with-heap-limit=500
|
||||||
|
.sp
|
||||||
|
which limits the amount of heap to 500 kilobytes. This limit applies only to
|
||||||
|
interpretive matching in pcre2_match(). It does not apply when JIT (which has
|
||||||
|
its own memory arrangements) is used, nor does it apply to
|
||||||
|
\fBpcre2_dfa_match()\fP.
|
||||||
|
.P
|
||||||
|
You can also explicitly limit the depth of nested backtracking in the
|
||||||
|
\fBpcre2_match()\fP interpreter. This limit defaults to the value that is set
|
||||||
|
for --with-match-limit. You can set a lower default limit by adding, for
|
||||||
|
example,
|
||||||
.sp
|
.sp
|
||||||
--with-match-limit_depth=10000
|
--with-match-limit_depth=10000
|
||||||
.sp
|
.sp
|
||||||
to the \fBconfigure\fP command. This value can also be overridden at run time.
|
to the \fBconfigure\fP command. This value can be overridden at run time. This
|
||||||
As well as applying to \fBpcre2_match()\fP, this limit also controls the depth
|
depth limit indirectly limits the amount of heap memory that is used, but
|
||||||
of recursive function calls in \fBpcre2_dfa_match()\fP. These are used for
|
because the size of each backtracking "frame" depends on the number of
|
||||||
lookaround assertions, atomic groups, and recursion within patterns.
|
capturing parentheses in a pattern, the amount of heap that is used before the
|
||||||
|
limit is reached varies from pattern to pattern. This limit was more useful in
|
||||||
|
versions before 10.30, where function recursion was used for backtracking.
|
||||||
|
However, as well as applying to \fBpcre2_match()\fP, this limit also controls
|
||||||
|
the depth of recursive function calls in \fBpcre2_dfa_match()\fP. These are
|
||||||
|
used for lookaround assertions, atomic groups, and recursion within patterns.
|
||||||
|
The limit does not apply to JIT matching.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH "CREATING CHARACTER TABLES AT BUILD TIME"
|
.SH "CREATING CHARACTER TABLES AT BUILD TIME"
|
||||||
|
@ -547,6 +572,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 31 March 2017
|
Last updated: 10 April 2017
|
||||||
Copyright (c) 1997-2017 University of Cambridge.
|
Copyright (c) 1997-2017 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2GREP 1 "06 April 2017" "PCRE2 10.30"
|
.TH PCRE2GREP 1 "11 April 2017" "PCRE2 10.30"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
pcre2grep - a grep with Perl-compatible regular expressions.
|
pcre2grep - a grep with Perl-compatible regular expressions.
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -347,6 +347,9 @@ file names are shown when multiple files are searched. For matching lines, the
|
||||||
file name is followed by a colon; for context lines, a hyphen separator is used.
|
file name is followed by a colon; for context lines, a hyphen separator is used.
|
||||||
If a line number is also being output, it follows the file name.
|
If a line number is also being output, it follows the file name.
|
||||||
.TP
|
.TP
|
||||||
|
\fB--heap-limit\fP=\fInumber\fP
|
||||||
|
See \fB--match-limit\fP below.
|
||||||
|
.TP
|
||||||
\fB--help\fP
|
\fB--help\fP
|
||||||
Output a help message, giving brief details of the command options and file
|
Output a help message, giving brief details of the command options and file
|
||||||
type support, and then exit. Anything else on the command line is
|
type support, and then exit. Anything else on the command line is
|
||||||
|
@ -436,7 +439,7 @@ used. There is no short form for this option.
|
||||||
\fB--match-limit\fP=\fInumber\fP
|
\fB--match-limit\fP=\fInumber\fP
|
||||||
Processing some regular expression patterns may take a very long time to search
|
Processing some regular expression patterns may take a very long time to search
|
||||||
for all possible matching strings. Others may require a very large amount of
|
for all possible matching strings. Others may require a very large amount of
|
||||||
memory. There are two options that set resource limits for matching.
|
memory. There are three options that set resource limits for matching.
|
||||||
.sp
|
.sp
|
||||||
The \fB--match-limit\fP option provides a means of limiting computing resource
|
The \fB--match-limit\fP option provides a means of limiting computing resource
|
||||||
usage when processing patterns that are not going to match, but which have a
|
usage when processing patterns that are not going to match, but which have a
|
||||||
|
@ -445,12 +448,22 @@ is a pattern that uses nested unlimited repeats. Internally, PCRE2 has a
|
||||||
counter that is incremented each time around its main processing loop. If the
|
counter that is incremented each time around its main processing loop. If the
|
||||||
value set by \fB--match-limit\fP is reached, an error occurs.
|
value set by \fB--match-limit\fP is reached, an error occurs.
|
||||||
.sp
|
.sp
|
||||||
|
The \fB--heap-limit\fP option specifies, as a number of kilobytes, the amount
|
||||||
|
of heap memory that may be used for matching. Heap memory is needed only if
|
||||||
|
matching the pattern requires a significant number of nested backtracking
|
||||||
|
points to be remembered. This parameter can be set to zero to forbid the use of
|
||||||
|
heap memory altogether.
|
||||||
|
.sp
|
||||||
The \fB--depth-limit\fP option limits the depth of nested backtracking points,
|
The \fB--depth-limit\fP option limits the depth of nested backtracking points,
|
||||||
which in turn limits the amount of memory that is used. This limit is of use
|
which indirectly limits the amount of memory that is used. The amount of memory
|
||||||
only if it is set smaller than \fB--match-limit\fP.
|
needed for each backtracking point depends on the number of capturing
|
||||||
|
parentheses in the pattern, so the amount of memory that is used before this
|
||||||
|
limit acts varies from pattern to pattern. This limit is of use only if it is
|
||||||
|
set smaller than \fB--match-limit\fP.
|
||||||
.sp
|
.sp
|
||||||
There are no short forms for these options. The default settings are specified
|
There are no short forms for these options. The default settings are specified
|
||||||
when the PCRE2 library is compiled, with the default default being 10 million.
|
when the PCRE2 library is compiled, with the default defaults being very large
|
||||||
|
and so effectively unlimited.
|
||||||
.TP
|
.TP
|
||||||
\fB--max-buffer-size=\fInumber\fP
|
\fB--max-buffer-size=\fInumber\fP
|
||||||
This limits the expansion of the processing buffer, whose initial size can be
|
This limits the expansion of the processing buffer, whose initial size can be
|
||||||
|
@ -670,11 +683,12 @@ Many of the short and long forms of \fBpcre2grep\fP's options are the same
|
||||||
as in the GNU \fBgrep\fP program. Any long option of the form
|
as in the GNU \fBgrep\fP program. Any long option of the form
|
||||||
\fB--xxx-regexp\fP (GNU terminology) is also available as \fB--xxx-regex\fP
|
\fB--xxx-regexp\fP (GNU terminology) is also available as \fB--xxx-regex\fP
|
||||||
(PCRE2 terminology). However, the \fB--depth-limit\fP, \fB--file-list\fP,
|
(PCRE2 terminology). However, the \fB--depth-limit\fP, \fB--file-list\fP,
|
||||||
\fB--file-offsets\fP, \fB--include-dir\fP, \fB--line-offsets\fP,
|
\fB--file-offsets\fP, \fB--heap-limit\fP, \fB--include-dir\fP,
|
||||||
\fB--locale\fP, \fB--match-limit\fP, \fB-M\fP, \fB--multiline\fP, \fB-N\fP,
|
\fB--line-offsets\fP, \fB--locale\fP, \fB--match-limit\fP, \fB-M\fP,
|
||||||
\fB--newline\fP, \fB--om-separator\fP, \fB--output\fP, \fB-u\fP, and
|
\fB--multiline\fP, \fB-N\fP, \fB--newline\fP, \fB--om-separator\fP,
|
||||||
\fB--utf-8\fP options are specific to \fBpcre2grep\fP, as is the use of the
|
\fB--output\fP, \fB-u\fP, and \fB--utf-8\fP options are specific to
|
||||||
\fB--only-matching\fP option with a capturing parentheses number.
|
\fBpcre2grep\fP, as is the use of the \fB--only-matching\fP option with a
|
||||||
|
capturing parentheses number.
|
||||||
.P
|
.P
|
||||||
Although most of the common options work the same way, a few are different in
|
Although most of the common options work the same way, a few are different in
|
||||||
\fBpcre2grep\fP. For example, the \fB--include\fP option's argument is a glob
|
\fBpcre2grep\fP. For example, the \fB--include\fP option's argument is a glob
|
||||||
|
@ -799,9 +813,9 @@ message and the line that caused the problem to the standard error stream. If
|
||||||
there are more than 20 such errors, \fBpcre2grep\fP gives up.
|
there are more than 20 such errors, \fBpcre2grep\fP gives up.
|
||||||
.P
|
.P
|
||||||
The \fB--match-limit\fP option of \fBpcre2grep\fP can be used to set the
|
The \fB--match-limit\fP option of \fBpcre2grep\fP can be used to set the
|
||||||
overall resource limit; there is a second option called \fB--depth-limit\fP
|
overall resource limit. There are also other limits that affect the amount of
|
||||||
that sets a limit on the amount of memory that is used (see the discussion of
|
memory used during matching; see the discussion of \fB--heap-limit\fP and
|
||||||
these options above).
|
\fB--depth-limit\fP above.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SH DIAGNOSTICS
|
.SH DIAGNOSTICS
|
||||||
|
@ -834,6 +848,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 06 April 2017
|
Last updated: 11 April 2017
|
||||||
Copyright (c) 1997-2017 University of Cambridge.
|
Copyright (c) 1997-2017 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -383,6 +383,9 @@ OPTIONS
|
||||||
colon; for context lines, a hyphen separator is used. If a
|
colon; for context lines, a hyphen separator is used. If a
|
||||||
line number is also being output, it follows the file name.
|
line number is also being output, it follows the file name.
|
||||||
|
|
||||||
|
--heap-limit=number
|
||||||
|
See --match-limit below.
|
||||||
|
|
||||||
--help Output a help message, giving brief details of the command
|
--help Output a help message, giving brief details of the command
|
||||||
options and file type support, and then exit. Anything else
|
options and file type support, and then exit. Anything else
|
||||||
on the command line is ignored.
|
on the command line is ignored.
|
||||||
|
@ -482,7 +485,7 @@ OPTIONS
|
||||||
--match-limit=number
|
--match-limit=number
|
||||||
Processing some regular expression patterns may take a very
|
Processing some regular expression patterns may take a very
|
||||||
long time to search for all possible matching strings. Others
|
long time to search for all possible matching strings. Others
|
||||||
may require a very large amount of memory. There are two
|
may require a very large amount of memory. There are three
|
||||||
options that set resource limits for matching.
|
options that set resource limits for matching.
|
||||||
|
|
||||||
The --match-limit option provides a means of limiting comput-
|
The --match-limit option provides a means of limiting comput-
|
||||||
|
@ -494,237 +497,248 @@ OPTIONS
|
||||||
processing loop. If the value set by --match-limit is
|
processing loop. If the value set by --match-limit is
|
||||||
reached, an error occurs.
|
reached, an error occurs.
|
||||||
|
|
||||||
The --depth-limit option limits the depth of nested back-
|
The --heap-limit option specifies, as a number of kilobytes,
|
||||||
tracking points, which in turn limits the amount of memory
|
the amount of heap memory that may be used for matching. Heap
|
||||||
that is used. This limit is of use only if it is set smaller
|
memory is needed only if matching the pattern requires a sig-
|
||||||
than --match-limit.
|
nificant number of nested backtracking points to be remem-
|
||||||
|
bered. This parameter can be set to zero to forbid the use of
|
||||||
|
heap memory altogether.
|
||||||
|
|
||||||
|
The --depth-limit option limits the depth of nested back-
|
||||||
|
tracking points, which indirectly limits the amount of memory
|
||||||
|
that is used. The amount of memory needed for each backtrack-
|
||||||
|
ing point depends on the number of capturing parentheses in
|
||||||
|
the pattern, so the amount of memory that is used before this
|
||||||
|
limit acts varies from pattern to pattern. This limit is of
|
||||||
|
use only if it is set smaller than --match-limit.
|
||||||
|
|
||||||
There are no short forms for these options. The default set-
|
There are no short forms for these options. The default set-
|
||||||
tings are specified when the PCRE2 library is compiled, with
|
tings are specified when the PCRE2 library is compiled, with
|
||||||
the default default being 10 million.
|
the default defaults being very large and so effectively
|
||||||
|
unlimited.
|
||||||
|
|
||||||
--max-buffer-size=number
|
--max-buffer-size=number
|
||||||
This limits the expansion of the processing buffer, whose
|
This limits the expansion of the processing buffer, whose
|
||||||
initial size can be set by --buffer-size. The maximum buffer
|
initial size can be set by --buffer-size. The maximum buffer
|
||||||
size is silently forced to be no smaller than the starting
|
size is silently forced to be no smaller than the starting
|
||||||
buffer size.
|
buffer size.
|
||||||
|
|
||||||
-M, --multiline
|
-M, --multiline
|
||||||
Allow patterns to match more than one line. When this option
|
Allow patterns to match more than one line. When this option
|
||||||
is set, the PCRE2 library is called in "multiline" mode. This
|
is set, the PCRE2 library is called in "multiline" mode. This
|
||||||
allows a matched string to extend past the end of a line and
|
allows a matched string to extend past the end of a line and
|
||||||
continue on one or more subsequent lines. Patterns used with
|
continue on one or more subsequent lines. Patterns used with
|
||||||
-M may usefully contain literal newline characters and inter-
|
-M may usefully contain literal newline characters and inter-
|
||||||
nal occurrences of ^ and $ characters. The output for a suc-
|
nal occurrences of ^ and $ characters. The output for a suc-
|
||||||
cessful match may consist of more than one line. The first
|
cessful match may consist of more than one line. The first
|
||||||
line is the line in which the match started, and the last
|
line is the line in which the match started, and the last
|
||||||
line is the line in which the match ended. If the matched
|
line is the line in which the match ended. If the matched
|
||||||
string ends with a newline sequence, the output ends at the
|
string ends with a newline sequence, the output ends at the
|
||||||
end of that line. If -v is set, none of the lines in a
|
end of that line. If -v is set, none of the lines in a
|
||||||
multi-line match are output. Once a match has been handled,
|
multi-line match are output. Once a match has been handled,
|
||||||
scanning restarts at the beginning of the line after the one
|
scanning restarts at the beginning of the line after the one
|
||||||
in which the match ended.
|
in which the match ended.
|
||||||
|
|
||||||
The newline sequence that separates multiple lines must be
|
The newline sequence that separates multiple lines must be
|
||||||
matched as part of the pattern. For example, to find the
|
matched as part of the pattern. For example, to find the
|
||||||
phrase "regular expression" in a file where "regular" might
|
phrase "regular expression" in a file where "regular" might
|
||||||
be at the end of a line and "expression" at the start of the
|
be at the end of a line and "expression" at the start of the
|
||||||
next line, you could use this command:
|
next line, you could use this command:
|
||||||
|
|
||||||
pcre2grep -M 'regular\s+expression' <file>
|
pcre2grep -M 'regular\s+expression' <file>
|
||||||
|
|
||||||
The \s escape sequence matches any white space character,
|
The \s escape sequence matches any white space character,
|
||||||
including newlines, and is followed by + so as to match
|
including newlines, and is followed by + so as to match
|
||||||
trailing white space on the first line as well as possibly
|
trailing white space on the first line as well as possibly
|
||||||
handling a two-character newline sequence.
|
handling a two-character newline sequence.
|
||||||
|
|
||||||
There is a limit to the number of lines that can be matched,
|
There is a limit to the number of lines that can be matched,
|
||||||
imposed by the way that pcre2grep buffers the input file as
|
imposed by the way that pcre2grep buffers the input file as
|
||||||
it scans it. With a sufficiently large processing buffer,
|
it scans it. With a sufficiently large processing buffer,
|
||||||
this should not be a problem, but the -M option does not work
|
this should not be a problem, but the -M option does not work
|
||||||
when input is read line by line (see --line-buffered.)
|
when input is read line by line (see --line-buffered.)
|
||||||
|
|
||||||
-N newline-type, --newline=newline-type
|
-N newline-type, --newline=newline-type
|
||||||
The PCRE2 library supports five different conventions for
|
The PCRE2 library supports five different conventions for
|
||||||
indicating the ends of lines. They are the single-character
|
indicating the ends of lines. They are the single-character
|
||||||
sequences CR (carriage return) and LF (linefeed), the two-
|
sequences CR (carriage return) and LF (linefeed), the two-
|
||||||
character sequence CRLF, an "anycrlf" convention, which rec-
|
character sequence CRLF, an "anycrlf" convention, which rec-
|
||||||
ognizes any of the preceding three types, and an "any" con-
|
ognizes any of the preceding three types, and an "any" con-
|
||||||
vention, in which any Unicode line ending sequence is assumed
|
vention, in which any Unicode line ending sequence is assumed
|
||||||
to end a line. The Unicode sequences are the three just men-
|
to end a line. The Unicode sequences are the three just men-
|
||||||
tioned, plus VT (vertical tab, U+000B), FF (form feed,
|
tioned, plus VT (vertical tab, U+000B), FF (form feed,
|
||||||
U+000C), NEL (next line, U+0085), LS (line separator,
|
U+000C), NEL (next line, U+0085), LS (line separator,
|
||||||
U+2028), and PS (paragraph separator, U+2029).
|
U+2028), and PS (paragraph separator, U+2029).
|
||||||
|
|
||||||
When the PCRE2 library is built, a default line-ending
|
When the PCRE2 library is built, a default line-ending
|
||||||
sequence is specified. This is normally the standard
|
sequence is specified. This is normally the standard
|
||||||
sequence for the operating system. Unless otherwise specified
|
sequence for the operating system. Unless otherwise specified
|
||||||
by this option, pcre2grep uses the library's default. The
|
by this option, pcre2grep uses the library's default. The
|
||||||
possible values for this option are CR, LF, CRLF, ANYCRLF, or
|
possible values for this option are CR, LF, CRLF, ANYCRLF, or
|
||||||
ANY. This makes it possible to use pcre2grep to scan files
|
ANY. This makes it possible to use pcre2grep to scan files
|
||||||
that have come from other environments without having to mod-
|
that have come from other environments without having to mod-
|
||||||
ify their line endings. If the data that is being scanned
|
ify their line endings. If the data that is being scanned
|
||||||
does not agree with the convention set by this option,
|
does not agree with the convention set by this option,
|
||||||
pcre2grep may behave in strange ways. Note that this option
|
pcre2grep may behave in strange ways. Note that this option
|
||||||
does not apply to files specified by the -f, --exclude-from,
|
does not apply to files specified by the -f, --exclude-from,
|
||||||
or --include-from options, which are expected to use the
|
or --include-from options, which are expected to use the
|
||||||
operating system's standard newline sequence.
|
operating system's standard newline sequence.
|
||||||
|
|
||||||
-n, --line-number
|
-n, --line-number
|
||||||
Precede each output line by its line number in the file, fol-
|
Precede each output line by its line number in the file, fol-
|
||||||
lowed by a colon for matching lines or a hyphen for context
|
lowed by a colon for matching lines or a hyphen for context
|
||||||
lines. If the file name is also being output, it precedes the
|
lines. If the file name is also being output, it precedes the
|
||||||
line number. When the -M option causes a pattern to match
|
line number. When the -M option causes a pattern to match
|
||||||
more than one line, only the first is preceded by its line
|
more than one line, only the first is preceded by its line
|
||||||
number. This option is forced if --line-offsets is used.
|
number. This option is forced if --line-offsets is used.
|
||||||
|
|
||||||
--no-jit If the PCRE2 library is built with support for just-in-time
|
--no-jit If the PCRE2 library is built with support for just-in-time
|
||||||
compiling (which speeds up matching), pcre2grep automatically
|
compiling (which speeds up matching), pcre2grep automatically
|
||||||
makes use of this, unless it was explicitly disabled at build
|
makes use of this, unless it was explicitly disabled at build
|
||||||
time. This option can be used to disable the use of JIT at
|
time. This option can be used to disable the use of JIT at
|
||||||
run time. It is provided for testing and working round prob-
|
run time. It is provided for testing and working round prob-
|
||||||
lems. It should never be needed in normal use.
|
lems. It should never be needed in normal use.
|
||||||
|
|
||||||
-O text, --output=text
|
-O text, --output=text
|
||||||
When there is a match, instead of outputting the whole line
|
When there is a match, instead of outputting the whole line
|
||||||
that matched, output just the given text. This option is
|
that matched, output just the given text. This option is
|
||||||
mutually exclusive with --only-matching, --file-offsets, and
|
mutually exclusive with --only-matching, --file-offsets, and
|
||||||
--line-offsets. Escape sequences starting with a dollar char-
|
--line-offsets. Escape sequences starting with a dollar char-
|
||||||
acter may be used to insert the contents of the matched part
|
acter may be used to insert the contents of the matched part
|
||||||
of the line and/or captured substrings into the text.
|
of the line and/or captured substrings into the text.
|
||||||
|
|
||||||
$<digits> or ${<digits>} is replaced by the captured sub-
|
$<digits> or ${<digits>} is replaced by the captured sub-
|
||||||
string of the given decimal number; zero substitutes the
|
string of the given decimal number; zero substitutes the
|
||||||
whole match. If the number is greater than the number of cap-
|
whole match. If the number is greater than the number of cap-
|
||||||
turing substrings, or if the capture is unset, the replace-
|
turing substrings, or if the capture is unset, the replace-
|
||||||
ment is empty.
|
ment is empty.
|
||||||
|
|
||||||
$a is replaced by bell; $b by backspace; $e by escape; $f by
|
$a is replaced by bell; $b by backspace; $e by escape; $f by
|
||||||
form feed; $n by newline; $r by carriage return; $t by tab;
|
form feed; $n by newline; $r by carriage return; $t by tab;
|
||||||
$v by vertical tab.
|
$v by vertical tab.
|
||||||
|
|
||||||
$o<digits> is replaced by the character represented by the
|
$o<digits> is replaced by the character represented by the
|
||||||
given octal number; up to three digits are processed.
|
given octal number; up to three digits are processed.
|
||||||
|
|
||||||
$x<digits> is replaced by the character represented by the
|
$x<digits> is replaced by the character represented by the
|
||||||
given hexadecimal number; up to two digits are processed.
|
given hexadecimal number; up to two digits are processed.
|
||||||
|
|
||||||
Any other character is substituted by itself. In particular,
|
Any other character is substituted by itself. In particular,
|
||||||
$$ is replaced by a single dollar.
|
$$ is replaced by a single dollar.
|
||||||
|
|
||||||
-o, --only-matching
|
-o, --only-matching
|
||||||
Show only the part of the line that matched a pattern instead
|
Show only the part of the line that matched a pattern instead
|
||||||
of the whole line. In this mode, no context is shown. That
|
of the whole line. In this mode, no context is shown. That
|
||||||
is, the -A, -B, and -C options are ignored. If there is more
|
is, the -A, -B, and -C options are ignored. If there is more
|
||||||
than one match in a line, each of them is shown separately,
|
than one match in a line, each of them is shown separately,
|
||||||
on a separate line of output. If -o is combined with -v
|
on a separate line of output. If -o is combined with -v
|
||||||
(invert the sense of the match to find non-matching lines),
|
(invert the sense of the match to find non-matching lines),
|
||||||
no output is generated, but the return code is set appropri-
|
no output is generated, but the return code is set appropri-
|
||||||
ately. If the matched portion of the line is empty, nothing
|
ately. If the matched portion of the line is empty, nothing
|
||||||
is output unless the file name or line number are being
|
is output unless the file name or line number are being
|
||||||
printed, in which case they are shown on an otherwise empty
|
printed, in which case they are shown on an otherwise empty
|
||||||
line. This option is mutually exclusive with --output,
|
line. This option is mutually exclusive with --output,
|
||||||
--file-offsets and --line-offsets.
|
--file-offsets and --line-offsets.
|
||||||
|
|
||||||
-onumber, --only-matching=number
|
-onumber, --only-matching=number
|
||||||
Show only the part of the line that matched the capturing
|
Show only the part of the line that matched the capturing
|
||||||
parentheses of the given number. Up to 32 capturing parenthe-
|
parentheses of the given number. Up to 32 capturing parenthe-
|
||||||
ses are supported, and -o0 is equivalent to -o without a num-
|
ses are supported, and -o0 is equivalent to -o without a num-
|
||||||
ber. Because these options can be given without an argument
|
ber. Because these options can be given without an argument
|
||||||
(see above), if an argument is present, it must be given in
|
(see above), if an argument is present, it must be given in
|
||||||
the same shell item, for example, -o3 or --only-matching=2.
|
the same shell item, for example, -o3 or --only-matching=2.
|
||||||
The comments given for the non-argument case above also apply
|
The comments given for the non-argument case above also apply
|
||||||
to this option. If the specified capturing parentheses do not
|
to this option. If the specified capturing parentheses do not
|
||||||
exist in the pattern, or were not set in the match, nothing
|
exist in the pattern, or were not set in the match, nothing
|
||||||
is output unless the file name or line number are being out-
|
is output unless the file name or line number are being out-
|
||||||
put.
|
put.
|
||||||
|
|
||||||
If this option is given multiple times, multiple substrings
|
If this option is given multiple times, multiple substrings
|
||||||
are output for each match, in the order the options are
|
are output for each match, in the order the options are
|
||||||
given, and all on one line. For example, -o3 -o1 -o3 causes
|
given, and all on one line. For example, -o3 -o1 -o3 causes
|
||||||
the substrings matched by capturing parentheses 3 and 1 and
|
the substrings matched by capturing parentheses 3 and 1 and
|
||||||
then 3 again to be output. By default, there is no separator
|
then 3 again to be output. By default, there is no separator
|
||||||
(but see the next option).
|
(but see the next option).
|
||||||
|
|
||||||
--om-separator=text
|
--om-separator=text
|
||||||
Specify a separating string for multiple occurrences of -o.
|
Specify a separating string for multiple occurrences of -o.
|
||||||
The default is an empty string. Separating strings are never
|
The default is an empty string. Separating strings are never
|
||||||
coloured.
|
coloured.
|
||||||
|
|
||||||
-q, --quiet
|
-q, --quiet
|
||||||
Work quietly, that is, display nothing except error messages.
|
Work quietly, that is, display nothing except error messages.
|
||||||
The exit status indicates whether or not any matches were
|
The exit status indicates whether or not any matches were
|
||||||
found.
|
found.
|
||||||
|
|
||||||
-r, --recursive
|
-r, --recursive
|
||||||
If any given path is a directory, recursively scan the files
|
If any given path is a directory, recursively scan the files
|
||||||
it contains, taking note of any --include and --exclude set-
|
it contains, taking note of any --include and --exclude set-
|
||||||
tings. By default, a directory is read as a normal file; in
|
tings. By default, a directory is read as a normal file; in
|
||||||
some operating systems this gives an immediate end-of-file.
|
some operating systems this gives an immediate end-of-file.
|
||||||
This option is a shorthand for setting the -d option to
|
This option is a shorthand for setting the -d option to
|
||||||
"recurse".
|
"recurse".
|
||||||
|
|
||||||
--recursion-limit=number
|
--recursion-limit=number
|
||||||
See --match-limit above.
|
See --match-limit above.
|
||||||
|
|
||||||
-s, --no-messages
|
-s, --no-messages
|
||||||
Suppress error messages about non-existent or unreadable
|
Suppress error messages about non-existent or unreadable
|
||||||
files. Such files are quietly skipped. However, the return
|
files. Such files are quietly skipped. However, the return
|
||||||
code is still 2, even if matches were found in other files.
|
code is still 2, even if matches were found in other files.
|
||||||
|
|
||||||
-t, --total-count
|
-t, --total-count
|
||||||
This option is useful when scanning more than one file. If
|
This option is useful when scanning more than one file. If
|
||||||
used on its own, -t suppresses all output except for a grand
|
used on its own, -t suppresses all output except for a grand
|
||||||
total number of matching lines (or non-matching lines if -v
|
total number of matching lines (or non-matching lines if -v
|
||||||
is used) in all the files. If -t is used with -c, a grand
|
is used) in all the files. If -t is used with -c, a grand
|
||||||
total is output except when the previous output is just one
|
total is output except when the previous output is just one
|
||||||
line. In other words, it is not output when just one file's
|
line. In other words, it is not output when just one file's
|
||||||
count is listed. If file names are being output, the grand
|
count is listed. If file names are being output, the grand
|
||||||
total is preceded by "TOTAL:". Otherwise, it appears as just
|
total is preceded by "TOTAL:". Otherwise, it appears as just
|
||||||
another number. The -t option is ignored when used with -L
|
another number. The -t option is ignored when used with -L
|
||||||
(list files without matches), because the grand total would
|
(list files without matches), because the grand total would
|
||||||
always be zero.
|
always be zero.
|
||||||
|
|
||||||
-u, --utf-8
|
-u, --utf-8
|
||||||
Operate in UTF-8 mode. This option is available only if PCRE2
|
Operate in UTF-8 mode. This option is available only if PCRE2
|
||||||
has been compiled with UTF-8 support. All patterns (including
|
has been compiled with UTF-8 support. All patterns (including
|
||||||
those for any --exclude and --include options) and all sub-
|
those for any --exclude and --include options) and all sub-
|
||||||
ject lines that are scanned must be valid strings of UTF-8
|
ject lines that are scanned must be valid strings of UTF-8
|
||||||
characters.
|
characters.
|
||||||
|
|
||||||
-V, --version
|
-V, --version
|
||||||
Write the version numbers of pcre2grep and the PCRE2 library
|
Write the version numbers of pcre2grep and the PCRE2 library
|
||||||
to the standard output and then exit. Anything else on the
|
to the standard output and then exit. Anything else on the
|
||||||
command line is ignored.
|
command line is ignored.
|
||||||
|
|
||||||
-v, --invert-match
|
-v, --invert-match
|
||||||
Invert the sense of the match, so that lines which do not
|
Invert the sense of the match, so that lines which do not
|
||||||
match any of the patterns are the ones that are found.
|
match any of the patterns are the ones that are found.
|
||||||
|
|
||||||
-w, --word-regex, --word-regexp
|
-w, --word-regex, --word-regexp
|
||||||
Force the patterns to match only whole words. This is equiva-
|
Force the patterns to match only whole words. This is equiva-
|
||||||
lent to having \b at the start and end of the pattern. This
|
lent to having \b at the start and end of the pattern. This
|
||||||
option applies only to the patterns that are matched against
|
option applies only to the patterns that are matched against
|
||||||
the contents of files; it does not apply to patterns speci-
|
the contents of files; it does not apply to patterns speci-
|
||||||
fied by any of the --include or --exclude options.
|
fied by any of the --include or --exclude options.
|
||||||
|
|
||||||
-x, --line-regex, --line-regexp
|
-x, --line-regex, --line-regexp
|
||||||
Force the patterns to be anchored (each must start matching
|
Force the patterns to be anchored (each must start matching
|
||||||
at the beginning of a line) and in addition, require them to
|
at the beginning of a line) and in addition, require them to
|
||||||
match entire lines. In multiline mode the match may be more
|
match entire lines. In multiline mode the match may be more
|
||||||
than one line. This is equivalent to having \A and \Z charac-
|
than one line. This is equivalent to having \A and \Z charac-
|
||||||
ters at the start and end of each alternative top-level
|
ters at the start and end of each alternative top-level
|
||||||
branch in every pattern. This option applies only to the pat-
|
branch in every pattern. This option applies only to the pat-
|
||||||
terns that are matched against the contents of files; it does
|
terns that are matched against the contents of files; it does
|
||||||
not apply to patterns specified by any of the --include or
|
not apply to patterns specified by any of the --include or
|
||||||
--exclude options.
|
--exclude options.
|
||||||
|
|
||||||
|
|
||||||
ENVIRONMENT VARIABLES
|
ENVIRONMENT VARIABLES
|
||||||
|
|
||||||
The environment variables LC_ALL and LC_CTYPE are examined, in that
|
The environment variables LC_ALL and LC_CTYPE are examined, in that
|
||||||
order, for a locale. The first one that is set is used. This can be
|
order, for a locale. The first one that is set is used. This can be
|
||||||
overridden by the --locale option. If no locale is set, the PCRE2
|
overridden by the --locale option. If no locale is set, the PCRE2
|
||||||
library's default (usually the "C" locale) is used.
|
library's default (usually the "C" locale) is used.
|
||||||
|
|
||||||
|
|
||||||
|
@ -732,99 +746,99 @@ NEWLINES
|
||||||
|
|
||||||
The -N (--newline) option allows pcre2grep to scan files with different
|
The -N (--newline) option allows pcre2grep to scan files with different
|
||||||
newline conventions from the default. Any parts of the input files that
|
newline conventions from the default. Any parts of the input files that
|
||||||
are written to the standard output are copied identically, with what-
|
are written to the standard output are copied identically, with what-
|
||||||
ever newline sequences they have in the input. However, the setting of
|
ever newline sequences they have in the input. However, the setting of
|
||||||
this option does not affect the interpretation of files specified by
|
this option does not affect the interpretation of files specified by
|
||||||
the -f, --exclude-from, or --include-from options, which are assumed to
|
the -f, --exclude-from, or --include-from options, which are assumed to
|
||||||
use the operating system's standard newline sequence, nor does it
|
use the operating system's standard newline sequence, nor does it
|
||||||
affect the way in which pcre2grep writes informational messages to the
|
affect the way in which pcre2grep writes informational messages to the
|
||||||
standard error and output streams. For these it uses the string "\n" to
|
standard error and output streams. For these it uses the string "\n" to
|
||||||
indicate newlines, relying on the C I/O library to convert this to an
|
indicate newlines, relying on the C I/O library to convert this to an
|
||||||
appropriate sequence.
|
appropriate sequence.
|
||||||
|
|
||||||
|
|
||||||
OPTIONS COMPATIBILITY
|
OPTIONS COMPATIBILITY
|
||||||
|
|
||||||
Many of the short and long forms of pcre2grep's options are the same as
|
Many of the short and long forms of pcre2grep's options are the same as
|
||||||
in the GNU grep program. Any long option of the form --xxx-regexp (GNU
|
in the GNU grep program. Any long option of the form --xxx-regexp (GNU
|
||||||
terminology) is also available as --xxx-regex (PCRE2 terminology). How-
|
terminology) is also available as --xxx-regex (PCRE2 terminology). How-
|
||||||
ever, the --depth-limit, --file-list, --file-offsets, --include-dir,
|
ever, the --depth-limit, --file-list, --file-offsets, --heap-limit,
|
||||||
--line-offsets, --locale, --match-limit, -M, --multiline, -N, --new-
|
--include-dir, --line-offsets, --locale, --match-limit, -M, --multi-
|
||||||
line, --om-separator, --output, -u, and --utf-8 options are specific to
|
line, -N, --newline, --om-separator, --output, -u, and --utf-8 options
|
||||||
pcre2grep, as is the use of the --only-matching option with a capturing
|
are specific to pcre2grep, as is the use of the --only-matching option
|
||||||
parentheses number.
|
with a capturing parentheses number.
|
||||||
|
|
||||||
Although most of the common options work the same way, a few are dif-
|
Although most of the common options work the same way, a few are dif-
|
||||||
ferent in pcre2grep. For example, the --include option's argument is a
|
ferent in pcre2grep. For example, the --include option's argument is a
|
||||||
glob for GNU grep, but a regular expression for pcre2grep. If both the
|
glob for GNU grep, but a regular expression for pcre2grep. If both the
|
||||||
-c and -l options are given, GNU grep lists only file names, without
|
-c and -l options are given, GNU grep lists only file names, without
|
||||||
counts, but pcre2grep gives the counts as well.
|
counts, but pcre2grep gives the counts as well.
|
||||||
|
|
||||||
|
|
||||||
OPTIONS WITH DATA
|
OPTIONS WITH DATA
|
||||||
|
|
||||||
There are four different ways in which an option with data can be spec-
|
There are four different ways in which an option with data can be spec-
|
||||||
ified. If a short form option is used, the data may follow immedi-
|
ified. If a short form option is used, the data may follow immedi-
|
||||||
ately, or (with one exception) in the next command line item. For exam-
|
ately, or (with one exception) in the next command line item. For exam-
|
||||||
ple:
|
ple:
|
||||||
|
|
||||||
-f/some/file
|
-f/some/file
|
||||||
-f /some/file
|
-f /some/file
|
||||||
|
|
||||||
The exception is the -o option, which may appear with or without data.
|
The exception is the -o option, which may appear with or without data.
|
||||||
Because of this, if data is present, it must follow immediately in the
|
Because of this, if data is present, it must follow immediately in the
|
||||||
same item, for example -o3.
|
same item, for example -o3.
|
||||||
|
|
||||||
If a long form option is used, the data may appear in the same command
|
If a long form option is used, the data may appear in the same command
|
||||||
line item, separated by an equals character, or (with two exceptions)
|
line item, separated by an equals character, or (with two exceptions)
|
||||||
it may appear in the next command line item. For example:
|
it may appear in the next command line item. For example:
|
||||||
|
|
||||||
--file=/some/file
|
--file=/some/file
|
||||||
--file /some/file
|
--file /some/file
|
||||||
|
|
||||||
Note, however, that if you want to supply a file name beginning with ~
|
Note, however, that if you want to supply a file name beginning with ~
|
||||||
as data in a shell command, and have the shell expand ~ to a home
|
as data in a shell command, and have the shell expand ~ to a home
|
||||||
directory, you must separate the file name from the option, because the
|
directory, you must separate the file name from the option, because the
|
||||||
shell does not treat ~ specially unless it is at the start of an item.
|
shell does not treat ~ specially unless it is at the start of an item.
|
||||||
|
|
||||||
The exceptions to the above are the --colour (or --color) and --only-
|
The exceptions to the above are the --colour (or --color) and --only-
|
||||||
matching options, for which the data is optional. If one of these
|
matching options, for which the data is optional. If one of these
|
||||||
options does have data, it must be given in the first form, using an
|
options does have data, it must be given in the first form, using an
|
||||||
equals character. Otherwise pcre2grep will assume that it has no data.
|
equals character. Otherwise pcre2grep will assume that it has no data.
|
||||||
|
|
||||||
|
|
||||||
USING PCRE2'S CALLOUT FACILITY
|
USING PCRE2'S CALLOUT FACILITY
|
||||||
|
|
||||||
pcre2grep has, by default, support for calling external programs or
|
pcre2grep has, by default, support for calling external programs or
|
||||||
scripts or echoing specific strings during matching by making use of
|
scripts or echoing specific strings during matching by making use of
|
||||||
PCRE2's callout facility. However, this support can be disabled when
|
PCRE2's callout facility. However, this support can be disabled when
|
||||||
pcre2grep is built. You can find out whether your binary has support
|
pcre2grep is built. You can find out whether your binary has support
|
||||||
for callouts by running it with the --help option. If the support is
|
for callouts by running it with the --help option. If the support is
|
||||||
not enabled, all callouts in patterns are ignored by pcre2grep.
|
not enabled, all callouts in patterns are ignored by pcre2grep.
|
||||||
|
|
||||||
A callout in a PCRE2 pattern is of the form (?C<arg>) where the argu-
|
A callout in a PCRE2 pattern is of the form (?C<arg>) where the argu-
|
||||||
ment is either a number or a quoted string (see the pcre2callout docu-
|
ment is either a number or a quoted string (see the pcre2callout docu-
|
||||||
mentation for details). Numbered callouts are ignored by pcre2grep;
|
mentation for details). Numbered callouts are ignored by pcre2grep;
|
||||||
only callouts with string arguments are useful.
|
only callouts with string arguments are useful.
|
||||||
|
|
||||||
Calling external programs or scripts
|
Calling external programs or scripts
|
||||||
|
|
||||||
If the callout string does not start with a pipe (vertical bar) charac-
|
If the callout string does not start with a pipe (vertical bar) charac-
|
||||||
ter, it is parsed into a list of substrings separated by pipe charac-
|
ter, it is parsed into a list of substrings separated by pipe charac-
|
||||||
ters. The first substring must be an executable name, with the follow-
|
ters. The first substring must be an executable name, with the follow-
|
||||||
ing substrings specifying arguments:
|
ing substrings specifying arguments:
|
||||||
|
|
||||||
executable_name|arg1|arg2|...
|
executable_name|arg1|arg2|...
|
||||||
|
|
||||||
Any substring (including the executable name) may contain escape
|
Any substring (including the executable name) may contain escape
|
||||||
sequences started by a dollar character: $<digits> or ${<digits>} is
|
sequences started by a dollar character: $<digits> or ${<digits>} is
|
||||||
replaced by the captured substring of the given decimal number, which
|
replaced by the captured substring of the given decimal number, which
|
||||||
must be greater than zero. If the number is greater than the number of
|
must be greater than zero. If the number is greater than the number of
|
||||||
capturing substrings, or if the capture is unset, the replacement is
|
capturing substrings, or if the capture is unset, the replacement is
|
||||||
empty.
|
empty.
|
||||||
|
|
||||||
Any other character is substituted by itself. In particular, $$ is
|
Any other character is substituted by itself. In particular, $$ is
|
||||||
replaced by a single dollar and $| is replaced by a pipe character.
|
replaced by a single dollar and $| is replaced by a pipe character.
|
||||||
Here is an example:
|
Here is an example:
|
||||||
|
|
||||||
echo -e "abcde\n12345" | pcre2grep \
|
echo -e "abcde\n12345" | pcre2grep \
|
||||||
|
@ -840,49 +854,49 @@ USING PCRE2'S CALLOUT FACILITY
|
||||||
|
|
||||||
The parameters for the execv() system call that is used to run the pro-
|
The parameters for the execv() system call that is used to run the pro-
|
||||||
gram or script are zero-terminated strings. This means that binary zero
|
gram or script are zero-terminated strings. This means that binary zero
|
||||||
characters in the callout argument will cause premature termination of
|
characters in the callout argument will cause premature termination of
|
||||||
their substrings, and therefore should not be present. Any syntax
|
their substrings, and therefore should not be present. Any syntax
|
||||||
errors in the string (for example, a dollar not followed by another
|
errors in the string (for example, a dollar not followed by another
|
||||||
character) cause the callout to be ignored. If running the program
|
character) cause the callout to be ignored. If running the program
|
||||||
fails for any reason (including the non-existence of the executable), a
|
fails for any reason (including the non-existence of the executable), a
|
||||||
local matching failure occurs and the matcher backtracks in the normal
|
local matching failure occurs and the matcher backtracks in the normal
|
||||||
way.
|
way.
|
||||||
|
|
||||||
Echoing a specific string
|
Echoing a specific string
|
||||||
|
|
||||||
If the callout string starts with a pipe (vertical bar) character, the
|
If the callout string starts with a pipe (vertical bar) character, the
|
||||||
rest of the string is written to the output, having been passed through
|
rest of the string is written to the output, having been passed through
|
||||||
the same escape processing as text from the --output option. This pro-
|
the same escape processing as text from the --output option. This pro-
|
||||||
vides a simple echoing facility that avoids calling an external program
|
vides a simple echoing facility that avoids calling an external program
|
||||||
or script. No terminator is added to the string, so if you want a new-
|
or script. No terminator is added to the string, so if you want a new-
|
||||||
line, you must include it explicitly. Matching continues normally
|
line, you must include it explicitly. Matching continues normally
|
||||||
after the string is output. If you want to see only the callout output
|
after the string is output. If you want to see only the callout output
|
||||||
but not any output from an actual match, you should end the relevant
|
but not any output from an actual match, you should end the relevant
|
||||||
pattern with (*FAIL).
|
pattern with (*FAIL).
|
||||||
|
|
||||||
|
|
||||||
MATCHING ERRORS
|
MATCHING ERRORS
|
||||||
|
|
||||||
It is possible to supply a regular expression that takes a very long
|
It is possible to supply a regular expression that takes a very long
|
||||||
time to fail to match certain lines. Such patterns normally involve
|
time to fail to match certain lines. Such patterns normally involve
|
||||||
nested indefinite repeats, for example: (a+)*\d when matched against a
|
nested indefinite repeats, for example: (a+)*\d when matched against a
|
||||||
line of a's with no final digit. The PCRE2 matching function has a
|
line of a's with no final digit. The PCRE2 matching function has a
|
||||||
resource limit that causes it to abort in these circumstances. If this
|
resource limit that causes it to abort in these circumstances. If this
|
||||||
happens, pcre2grep outputs an error message and the line that caused
|
happens, pcre2grep outputs an error message and the line that caused
|
||||||
the problem to the standard error stream. If there are more than 20
|
the problem to the standard error stream. If there are more than 20
|
||||||
such errors, pcre2grep gives up.
|
such errors, pcre2grep gives up.
|
||||||
|
|
||||||
The --match-limit option of pcre2grep can be used to set the overall
|
The --match-limit option of pcre2grep can be used to set the overall
|
||||||
resource limit; there is a second option called --depth-limit that sets
|
resource limit. There are also other limits that affect the amount of
|
||||||
a limit on the amount of memory that is used (see the discussion of
|
memory used during matching; see the discussion of --heap-limit and
|
||||||
these options above).
|
--depth-limit above.
|
||||||
|
|
||||||
|
|
||||||
DIAGNOSTICS
|
DIAGNOSTICS
|
||||||
|
|
||||||
Exit status is 0 if any matches were found, 1 if no matches were found,
|
Exit status is 0 if any matches were found, 1 if no matches were found,
|
||||||
and 2 for syntax errors, overlong lines, non-existent or inaccessible
|
and 2 for syntax errors, overlong lines, non-existent or inaccessible
|
||||||
files (even if matches were found in other files) or too many matching
|
files (even if matches were found in other files) or too many matching
|
||||||
errors. Using the -s option to suppress error messages about inaccessi-
|
errors. Using the -s option to suppress error messages about inaccessi-
|
||||||
ble files does not affect the return code.
|
ble files does not affect the return code.
|
||||||
|
|
||||||
|
@ -901,5 +915,5 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 06 April 2017
|
Last updated: 11 April 2017
|
||||||
Copyright (c) 1997-2017 University of Cambridge.
|
Copyright (c) 1997-2017 University of Cambridge.
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2PATTERN 3 "03 April 2017" "PCRE2 10.30"
|
.TH PCRE2PATTERN 3 "11 April 2017" "PCRE2 10.30"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.SH "PCRE2 REGULAR EXPRESSION DETAILS"
|
.SH "PCRE2 REGULAR EXPRESSION DETAILS"
|
||||||
|
@ -138,14 +138,15 @@ the application to apply the JIT optimization by calling
|
||||||
\fBpcre2_jit_compile()\fP is ignored.
|
\fBpcre2_jit_compile()\fP is ignored.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SS "Setting match and backtracking depth limits"
|
.SS "Setting match resource limits"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
The pcre2_match() function contains a counter that is incremented every time it
|
The pcre2_match() function contains a counter that is incremented every time it
|
||||||
goes round its main loop. The caller of \fBpcre2_match()\fP can set a limit on
|
goes round its main loop. The caller of \fBpcre2_match()\fP can set a limit on
|
||||||
this counter, which therefore limits the amount of computing resource used for
|
this counter, which therefore limits the amount of computing resource used for
|
||||||
a match. The maximum depth of nested backtracking can also be limited, and this
|
a match. The maximum depth of nested backtracking can also be limited; this
|
||||||
restricts the amount of heap memory that is used.
|
indirectly restricts the amount of heap memory that is used, but there is also
|
||||||
|
an explicit memory limit that can be set.
|
||||||
.P
|
.P
|
||||||
These facilities are provided to catch runaway matches that are provoked by
|
These facilities are provided to catch runaway matches that are provoked by
|
||||||
patterns with huge matching trees (a typical example is a pattern with nested
|
patterns with huge matching trees (a typical example is a pattern with nested
|
||||||
|
@ -153,6 +154,7 @@ unlimited repeats applied to a long string that does not match). When one of
|
||||||
these limits is reached, \fBpcre2_match()\fP gives an error return. The limits
|
these limits is reached, \fBpcre2_match()\fP gives an error return. The limits
|
||||||
can also be set by items at the start of the pattern of the form
|
can also be set by items at the start of the pattern of the form
|
||||||
.sp
|
.sp
|
||||||
|
(*LIMIT_HEAP=d)
|
||||||
(*LIMIT_MATCH=d)
|
(*LIMIT_MATCH=d)
|
||||||
(*LIMIT_DEPTH=d)
|
(*LIMIT_DEPTH=d)
|
||||||
.sp
|
.sp
|
||||||
|
@ -165,11 +167,13 @@ setting of one of these limits, the lower value is used.
|
||||||
Prior to release 10.30, LIMIT_DEPTH was called LIMIT_RECURSION. This name is
|
Prior to release 10.30, LIMIT_DEPTH was called LIMIT_RECURSION. This name is
|
||||||
still recognized for backwards compatibility.
|
still recognized for backwards compatibility.
|
||||||
.P
|
.P
|
||||||
The match limit is used (but in a different way) when JIT is being used, but it
|
The heap limit applies only when the \fBpcre2_match()\fP interpreter is used
|
||||||
is not relevant, and is ignored, when matching with \fBpcre2_dfa_match()\fP.
|
for matching. It does not apply to JIT or DFA matching. The match limit is used
|
||||||
However, the depth limit is relevant for DFA matching, which uses function
|
(but in a different way) when JIT is being used, but it is not relevant, and is
|
||||||
recursion for recursions within the pattern. In this case, the depth limit
|
ignored, when matching with \fBpcre2_dfa_match()\fP. The depth limit is ignored
|
||||||
controls the amount of system stack that is used.
|
by JIT but is relevant for DFA matching, which uses function recursion for
|
||||||
|
recursions within the pattern. In this case, the depth limit controls the
|
||||||
|
amount of system stack that is used.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.\" HTML <a name="newlines"></a>
|
.\" HTML <a name="newlines"></a>
|
||||||
|
@ -3465,6 +3469,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 03 April 2017
|
Last updated: 11 April 2017
|
||||||
Copyright (c) 1997-2017 University of Cambridge.
|
Copyright (c) 1997-2017 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2PERFORM 3 "31 March 2017" "PCRE2 10.30"
|
.TH PCRE2PERFORM 3 "08 April 2017" "PCRE2 10.30"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.SH "PCRE2 PERFORMANCE"
|
.SH "PCRE2 PERFORMANCE"
|
||||||
|
@ -69,11 +69,12 @@ From release 10.30, the interpretive (non-JIT) version of \fBpcre2_match()\fP
|
||||||
uses very little system stack at run time. In earlier releases recursive
|
uses very little system stack at run time. In earlier releases recursive
|
||||||
function calls could use a great deal of stack, and this could cause problems,
|
function calls could use a great deal of stack, and this could cause problems,
|
||||||
but this usage has been eliminated. Backtracking positions are now explicitly
|
but this usage has been eliminated. Backtracking positions are now explicitly
|
||||||
remembered in memory frames controlled by the code. An initial 10K vector of
|
remembered in memory frames controlled by the code. An initial 20K vector of
|
||||||
frames is allocated on the system stack (enough for about 50 frames for small
|
frames is allocated on the system stack (enough for about 100 frames for small
|
||||||
patterns), but if this is insufficient, heap memory is used. Rewriting patterns
|
patterns), but if this is insufficient, heap memory is used. The amount of heap
|
||||||
to be time-efficient, as described below, may also reduce the memory
|
memory can be limited; if the limit is set to zero, only the initial stack
|
||||||
requirements.
|
vector is used. Rewriting patterns to be time-efficient, as described below,
|
||||||
|
may also reduce the memory requirements.
|
||||||
.P
|
.P
|
||||||
In contrast to \fBpcre2_match()\fP, \fBpcre2_dfa_match()\fP does use recursive
|
In contrast to \fBpcre2_match()\fP, \fBpcre2_dfa_match()\fP does use recursive
|
||||||
function calls, but only for processing atomic groups, lookaround assertions,
|
function calls, but only for processing atomic groups, lookaround assertions,
|
||||||
|
@ -231,6 +232,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 31 March 2017
|
Last updated: 08 April 2017
|
||||||
Copyright (c) 1997-2017 University of Cambridge.
|
Copyright (c) 1997-2017 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2TEST 1 "08 April 2017" "PCRE 10.30"
|
.TH PCRE2TEST 1 "11 April 2017" "PCRE 10.30"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
pcre2test - a program for testing Perl-compatible regular expressions.
|
pcre2test - a program for testing Perl-compatible regular expressions.
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -1063,6 +1063,7 @@ pattern.
|
||||||
get=<number or name> extract captured substring
|
get=<number or name> extract captured substring
|
||||||
getall extract all captured substrings
|
getall extract all captured substrings
|
||||||
/g global global matching
|
/g global global matching
|
||||||
|
heap_limit=<n> set a limit on heap memory
|
||||||
jitstack=<n> set size of JIT stack
|
jitstack=<n> set size of JIT stack
|
||||||
mark show mark values
|
mark show mark values
|
||||||
match_limit=<n> set a match limit
|
match_limit=<n> set a match limit
|
||||||
|
@ -1293,11 +1294,11 @@ stack that is larger than the default 32K is necessary only for very
|
||||||
complicated patterns.
|
complicated patterns.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.SS "Setting match and depth limits"
|
.SS "Setting heap, match, and depth limits"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
The \fBmatch_limit\fP and \fBdepth_limit\fP modifiers set the appropriate
|
The \fBheap_limit\fP, \fBmatch_limit\fP, and \fBdepth_limit\fP modifiers set
|
||||||
limits in the match context. These values are ignored when the
|
the appropriate limits in the match context. These values are ignored when the
|
||||||
\fBfind_limits\fP modifier is specified.
|
\fBfind_limits\fP modifier is specified.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
@ -1306,8 +1307,8 @@ limits in the match context. These values are ignored when the
|
||||||
.sp
|
.sp
|
||||||
If the \fBfind_limits\fP modifier is present on a subject line, \fBpcre2test\fP
|
If the \fBfind_limits\fP modifier is present on a subject line, \fBpcre2test\fP
|
||||||
calls the relevant matching function several times, setting different values in
|
calls the relevant matching function several times, setting different values in
|
||||||
the match context via \fBpcre2_set_match_limit()\fP or
|
the match context via \fBpcre2_set_heap_limit(), \fBpcre2_set_match_limit()\fP,
|
||||||
\fBpcre2_set_depth_limit()\fP until it finds the minimum values for each
|
or \fBpcre2_set_depth_limit()\fP until it finds the minimum values for each
|
||||||
parameter that allows the match to complete without error.
|
parameter that allows the match to complete without error.
|
||||||
.P
|
.P
|
||||||
If JIT is being used, only the match limit is relevant. If DFA matching is
|
If JIT is being used, only the match limit is relevant. If DFA matching is
|
||||||
|
@ -1320,8 +1321,8 @@ numbers of matching possibilities, it can become large very quickly with
|
||||||
increasing length of subject string.
|
increasing length of subject string.
|
||||||
.P
|
.P
|
||||||
For non-DFA matching, the minimum \fIdepth_limit\fP number is a measure of how
|
For non-DFA matching, the minimum \fIdepth_limit\fP number is a measure of how
|
||||||
much memory for recording backtracking points is needed to complete the match
|
much nested backtracking happens (that is, how deeply the pattern's tree is
|
||||||
attempt. In the case of DFA matching, \fIdepth_limit\fP controls the depth of
|
searched). In the case of DFA matching, \fIdepth_limit\fP controls the depth of
|
||||||
recursive calls of the internal function that is used for handling pattern
|
recursive calls of the internal function that is used for handling pattern
|
||||||
recursion, lookaround assertions, and atomic groups.
|
recursion, lookaround assertions, and atomic groups.
|
||||||
.
|
.
|
||||||
|
@ -1782,6 +1783,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 08 April 2017
|
Last updated: 11 April 2017
|
||||||
Copyright (c) 1997-2017 University of Cambridge.
|
Copyright (c) 1997-2017 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -132,6 +132,10 @@ sure both macros are undefined; an emulation function will then be used. */
|
||||||
/* Define to 1 if you have the <zlib.h> header file. */
|
/* Define to 1 if you have the <zlib.h> header file. */
|
||||||
#undef HAVE_ZLIB_H
|
#undef HAVE_ZLIB_H
|
||||||
|
|
||||||
|
/* This limits the amount of memory that pcre2_match() may use while matching
|
||||||
|
a pattern. The value is in kilobytes. */
|
||||||
|
#undef HEAP_LIMIT
|
||||||
|
|
||||||
/* The value of LINK_SIZE determines the number of bytes used to store links
|
/* The value of LINK_SIZE determines the number of bytes used to store links
|
||||||
as offsets within the compiled regex. The default is 2, which allows for
|
as offsets within the compiled regex. The default is 2, which allows for
|
||||||
compiled patterns up to 64K long. This covers the vast majority of cases.
|
compiled patterns up to 64K long. This covers the vast majority of cases.
|
||||||
|
@ -143,7 +147,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
||||||
#undef LT_OBJDIR
|
#undef LT_OBJDIR
|
||||||
|
|
||||||
/* The value of MATCH_LIMIT determines the default number of times the
|
/* The value of MATCH_LIMIT determines the default number of times the
|
||||||
internal match() function can record a backtrack position during a single
|
pcre2_match() function can record a backtrack position during a single
|
||||||
matching attempt. There is a runtime interface for setting a different
|
matching attempt. There is a runtime interface for setting a different
|
||||||
limit. The limit exists in order to catch runaway regular expressions that
|
limit. The limit exists in order to catch runaway regular expressions that
|
||||||
take for ever to determine that they do not match. The default is set very
|
take for ever to determine that they do not match. The default is set very
|
||||||
|
|
|
@ -268,6 +268,7 @@ numbers must not be changed. */
|
||||||
#define PCRE2_ERROR_BADSUBSPATTERN (-60)
|
#define PCRE2_ERROR_BADSUBSPATTERN (-60)
|
||||||
#define PCRE2_ERROR_TOOMANYREPLACE (-61)
|
#define PCRE2_ERROR_TOOMANYREPLACE (-61)
|
||||||
#define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
|
#define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
|
||||||
|
#define PCRE2_ERROR_HEAPLIMIT (-63)
|
||||||
|
|
||||||
/* Request types for pcre2_pattern_info() */
|
/* Request types for pcre2_pattern_info() */
|
||||||
|
|
||||||
|
@ -297,6 +298,7 @@ numbers must not be changed. */
|
||||||
#define PCRE2_INFO_SIZE 22
|
#define PCRE2_INFO_SIZE 22
|
||||||
#define PCRE2_INFO_HASBACKSLASHC 23
|
#define PCRE2_INFO_HASBACKSLASHC 23
|
||||||
#define PCRE2_INFO_FRAMESIZE 24
|
#define PCRE2_INFO_FRAMESIZE 24
|
||||||
|
#define PCRE2_INFO_HEAPLIMIT 25
|
||||||
|
|
||||||
/* Request types for pcre2_config(). */
|
/* Request types for pcre2_config(). */
|
||||||
|
|
||||||
|
@ -313,6 +315,7 @@ numbers must not be changed. */
|
||||||
#define PCRE2_CONFIG_UNICODE 9
|
#define PCRE2_CONFIG_UNICODE 9
|
||||||
#define PCRE2_CONFIG_UNICODE_VERSION 10
|
#define PCRE2_CONFIG_UNICODE_VERSION 10
|
||||||
#define PCRE2_CONFIG_VERSION 11
|
#define PCRE2_CONFIG_VERSION 11
|
||||||
|
#define PCRE2_CONFIG_HEAPLIMIT 12
|
||||||
|
|
||||||
/* Types for code units in patterns and subject strings. */
|
/* Types for code units in patterns and subject strings. */
|
||||||
|
|
||||||
|
@ -452,6 +455,8 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
int (*)(pcre2_callout_block *, void *), void *); \
|
int (*)(pcre2_callout_block *, void *), void *); \
|
||||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \
|
pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_heap_limit(pcre2_match_context *, uint32_t); \
|
||||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
pcre2_set_match_limit(pcre2_match_context *, uint32_t); \
|
pcre2_set_match_limit(pcre2_match_context *, uint32_t); \
|
||||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
@ -676,6 +681,7 @@ pcre2_compile are called by application code. */
|
||||||
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
|
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
|
||||||
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
|
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
|
||||||
#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
|
#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
|
||||||
|
#define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_)
|
||||||
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
|
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
|
||||||
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
|
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
|
||||||
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
|
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
|
||||||
|
|
|
@ -268,6 +268,7 @@ numbers must not be changed. */
|
||||||
#define PCRE2_ERROR_BADSUBSPATTERN (-60)
|
#define PCRE2_ERROR_BADSUBSPATTERN (-60)
|
||||||
#define PCRE2_ERROR_TOOMANYREPLACE (-61)
|
#define PCRE2_ERROR_TOOMANYREPLACE (-61)
|
||||||
#define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
|
#define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
|
||||||
|
#define PCRE2_ERROR_HEAPLIMIT (-63)
|
||||||
|
|
||||||
/* Request types for pcre2_pattern_info() */
|
/* Request types for pcre2_pattern_info() */
|
||||||
|
|
||||||
|
@ -297,6 +298,7 @@ numbers must not be changed. */
|
||||||
#define PCRE2_INFO_SIZE 22
|
#define PCRE2_INFO_SIZE 22
|
||||||
#define PCRE2_INFO_HASBACKSLASHC 23
|
#define PCRE2_INFO_HASBACKSLASHC 23
|
||||||
#define PCRE2_INFO_FRAMESIZE 24
|
#define PCRE2_INFO_FRAMESIZE 24
|
||||||
|
#define PCRE2_INFO_HEAPLIMIT 25
|
||||||
|
|
||||||
/* Request types for pcre2_config(). */
|
/* Request types for pcre2_config(). */
|
||||||
|
|
||||||
|
@ -313,6 +315,7 @@ numbers must not be changed. */
|
||||||
#define PCRE2_CONFIG_UNICODE 9
|
#define PCRE2_CONFIG_UNICODE 9
|
||||||
#define PCRE2_CONFIG_UNICODE_VERSION 10
|
#define PCRE2_CONFIG_UNICODE_VERSION 10
|
||||||
#define PCRE2_CONFIG_VERSION 11
|
#define PCRE2_CONFIG_VERSION 11
|
||||||
|
#define PCRE2_CONFIG_HEAPLIMIT 12
|
||||||
|
|
||||||
/* Types for code units in patterns and subject strings. */
|
/* Types for code units in patterns and subject strings. */
|
||||||
|
|
||||||
|
@ -452,6 +455,8 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
int (*)(pcre2_callout_block *, void *), void *); \
|
int (*)(pcre2_callout_block *, void *), void *); \
|
||||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \
|
pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_heap_limit(pcre2_match_context *, uint32_t); \
|
||||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
pcre2_set_match_limit(pcre2_match_context *, uint32_t); \
|
pcre2_set_match_limit(pcre2_match_context *, uint32_t); \
|
||||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
@ -676,6 +681,7 @@ pcre2_compile are called by application code. */
|
||||||
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
|
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
|
||||||
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
|
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
|
||||||
#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
|
#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
|
||||||
|
#define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_)
|
||||||
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
|
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
|
||||||
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
|
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
|
||||||
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
|
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
|
||||||
|
|
|
@ -727,6 +727,7 @@ enum { PSO_OPT, /* Value is an option bit */
|
||||||
PSO_FLG, /* Value is a flag bit */
|
PSO_FLG, /* Value is a flag bit */
|
||||||
PSO_NL, /* Value is a newline type */
|
PSO_NL, /* Value is a newline type */
|
||||||
PSO_BSR, /* Value is a \R type */
|
PSO_BSR, /* Value is a \R type */
|
||||||
|
PSO_LIMH, /* Read integer value for heap limit */
|
||||||
PSO_LIMM, /* Read integer value for match limit */
|
PSO_LIMM, /* Read integer value for match limit */
|
||||||
PSO_LIMD }; /* Read integer value for depth limit */
|
PSO_LIMD }; /* Read integer value for depth limit */
|
||||||
|
|
||||||
|
@ -749,6 +750,7 @@ static pso pso_list[] = {
|
||||||
{ (uint8_t *)STRING_NO_DOTSTAR_ANCHOR_RIGHTPAR, 18, PSO_OPT, PCRE2_NO_DOTSTAR_ANCHOR },
|
{ (uint8_t *)STRING_NO_DOTSTAR_ANCHOR_RIGHTPAR, 18, PSO_OPT, PCRE2_NO_DOTSTAR_ANCHOR },
|
||||||
{ (uint8_t *)STRING_NO_JIT_RIGHTPAR, 7, PSO_FLG, PCRE2_NOJIT },
|
{ (uint8_t *)STRING_NO_JIT_RIGHTPAR, 7, PSO_FLG, PCRE2_NOJIT },
|
||||||
{ (uint8_t *)STRING_NO_START_OPT_RIGHTPAR, 13, PSO_OPT, PCRE2_NO_START_OPTIMIZE },
|
{ (uint8_t *)STRING_NO_START_OPT_RIGHTPAR, 13, PSO_OPT, PCRE2_NO_START_OPTIMIZE },
|
||||||
|
{ (uint8_t *)STRING_LIMIT_HEAP_EQ, 11, PSO_LIMH, 0 },
|
||||||
{ (uint8_t *)STRING_LIMIT_MATCH_EQ, 12, PSO_LIMM, 0 },
|
{ (uint8_t *)STRING_LIMIT_MATCH_EQ, 12, PSO_LIMM, 0 },
|
||||||
{ (uint8_t *)STRING_LIMIT_DEPTH_EQ, 12, PSO_LIMD, 0 },
|
{ (uint8_t *)STRING_LIMIT_DEPTH_EQ, 12, PSO_LIMD, 0 },
|
||||||
{ (uint8_t *)STRING_LIMIT_RECURSION_EQ, 16, PSO_LIMD, 0 },
|
{ (uint8_t *)STRING_LIMIT_RECURSION_EQ, 16, PSO_LIMD, 0 },
|
||||||
|
@ -8853,6 +8855,7 @@ uint32_t firstcu, reqcu; /* Value of first/req code unit */
|
||||||
uint32_t setflags = 0; /* NL and BSR set flags */
|
uint32_t setflags = 0; /* NL and BSR set flags */
|
||||||
|
|
||||||
uint32_t skipatstart; /* When checking (*UTF) etc */
|
uint32_t skipatstart; /* When checking (*UTF) etc */
|
||||||
|
uint32_t limit_heap = UINT32_MAX;
|
||||||
uint32_t limit_match = UINT32_MAX; /* Unset match limits */
|
uint32_t limit_match = UINT32_MAX; /* Unset match limits */
|
||||||
uint32_t limit_depth = UINT32_MAX;
|
uint32_t limit_depth = UINT32_MAX;
|
||||||
|
|
||||||
|
@ -9026,6 +9029,7 @@ while (patlen - skipatstart >= 2 &&
|
||||||
|
|
||||||
case PSO_LIMM:
|
case PSO_LIMM:
|
||||||
case PSO_LIMD:
|
case PSO_LIMD:
|
||||||
|
case PSO_LIMH:
|
||||||
c = 0;
|
c = 0;
|
||||||
pp = skipatstart;
|
pp = skipatstart;
|
||||||
if (!IS_DIGIT(ptr[pp]))
|
if (!IS_DIGIT(ptr[pp]))
|
||||||
|
@ -9045,7 +9049,8 @@ while (patlen - skipatstart >= 2 &&
|
||||||
ptr += pp;
|
ptr += pp;
|
||||||
goto HAD_EARLY_ERROR;
|
goto HAD_EARLY_ERROR;
|
||||||
}
|
}
|
||||||
if (p->type == PSO_LIMM) limit_match = c;
|
if (p->type == PSO_LIMH) limit_heap = c;
|
||||||
|
else if (p->type == PSO_LIMM) limit_match = c;
|
||||||
else limit_depth = c;
|
else limit_depth = c;
|
||||||
skipatstart += pp - skipatstart;
|
skipatstart += pp - skipatstart;
|
||||||
break;
|
break;
|
||||||
|
@ -9288,6 +9293,7 @@ re->magic_number = MAGIC_NUMBER;
|
||||||
re->compile_options = options;
|
re->compile_options = options;
|
||||||
re->overall_options = cb.external_options;
|
re->overall_options = cb.external_options;
|
||||||
re->flags = PCRE2_CODE_UNIT_WIDTH/8 | cb.external_flags | setflags;
|
re->flags = PCRE2_CODE_UNIT_WIDTH/8 | cb.external_flags | setflags;
|
||||||
|
re->limit_heap = limit_heap;
|
||||||
re->limit_match = limit_match;
|
re->limit_match = limit_match;
|
||||||
re->limit_depth = limit_depth;
|
re->limit_depth = limit_depth;
|
||||||
re->first_codeunit = 0;
|
re->first_codeunit = 0;
|
||||||
|
|
|
@ -84,6 +84,7 @@ if (where == NULL) /* Requests a length */
|
||||||
return PCRE2_ERROR_BADOPTION;
|
return PCRE2_ERROR_BADOPTION;
|
||||||
|
|
||||||
case PCRE2_CONFIG_BSR:
|
case PCRE2_CONFIG_BSR:
|
||||||
|
case PCRE2_CONFIG_HEAPLIMIT:
|
||||||
case PCRE2_CONFIG_JIT:
|
case PCRE2_CONFIG_JIT:
|
||||||
case PCRE2_CONFIG_LINKSIZE:
|
case PCRE2_CONFIG_LINKSIZE:
|
||||||
case PCRE2_CONFIG_MATCHLIMIT:
|
case PCRE2_CONFIG_MATCHLIMIT:
|
||||||
|
@ -116,6 +117,10 @@ switch (what)
|
||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case PCRE2_CONFIG_HEAPLIMIT:
|
||||||
|
*((uint32_t *)where) = HEAP_LIMIT;
|
||||||
|
break;
|
||||||
|
|
||||||
case PCRE2_CONFIG_JIT:
|
case PCRE2_CONFIG_JIT:
|
||||||
#ifdef SUPPORT_JIT
|
#ifdef SUPPORT_JIT
|
||||||
*((uint32_t *)where) = 1;
|
*((uint32_t *)where) = 1;
|
||||||
|
|
|
@ -168,6 +168,7 @@ const pcre2_match_context PRIV(default_match_context) = {
|
||||||
NULL,
|
NULL,
|
||||||
NULL,
|
NULL,
|
||||||
PCRE2_UNSET, /* Offset limit */
|
PCRE2_UNSET, /* Offset limit */
|
||||||
|
HEAP_LIMIT,
|
||||||
MATCH_LIMIT,
|
MATCH_LIMIT,
|
||||||
MATCH_LIMIT_DEPTH };
|
MATCH_LIMIT_DEPTH };
|
||||||
|
|
||||||
|
@ -346,6 +347,13 @@ mcontext->callout_data = callout_data;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_set_heap_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||||
|
{
|
||||||
|
mcontext->heap_limit = limit;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
pcre2_set_match_limit(pcre2_match_context *mcontext, uint32_t limit)
|
pcre2_set_match_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||||
{
|
{
|
||||||
|
|
|
@ -256,6 +256,7 @@ static const unsigned char match_error_texts[] =
|
||||||
"match with end before start is not supported\0"
|
"match with end before start is not supported\0"
|
||||||
"too many replacements (more than INT_MAX)\0"
|
"too many replacements (more than INT_MAX)\0"
|
||||||
"bad serialized data\0"
|
"bad serialized data\0"
|
||||||
|
"heap limit exceeded\0"
|
||||||
;
|
;
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -240,6 +240,16 @@ not rely on this. */
|
||||||
|
|
||||||
#define COMPILE_ERROR_BASE 100
|
#define COMPILE_ERROR_BASE 100
|
||||||
|
|
||||||
|
/* The initial frames vector for remembering backtracking points in
|
||||||
|
pcre2_match() is allocated on the system stack, of this size (bytes). The size
|
||||||
|
must be a multiple of sizeof(PCRE2_SPTR) in all environments, so making it a
|
||||||
|
multiple of 8 is best. Typical frame sizes are a few hundred bytes (it depends
|
||||||
|
on the number of capturing parentheses) so 20K handles quite a few frames. A
|
||||||
|
larger vector on the heap is obtained for patterns that need more frames. The
|
||||||
|
maximum size of this can be limited. */
|
||||||
|
|
||||||
|
#define START_FRAMES_SIZE 20480
|
||||||
|
|
||||||
/* Define the default BSR convention. */
|
/* Define the default BSR convention. */
|
||||||
|
|
||||||
#ifdef BSR_ANYCRLF
|
#ifdef BSR_ANYCRLF
|
||||||
|
@ -922,6 +932,7 @@ a positive value. */
|
||||||
#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)"
|
#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)"
|
||||||
#define STRING_NOTEMPTY_RIGHTPAR "NOTEMPTY)"
|
#define STRING_NOTEMPTY_RIGHTPAR "NOTEMPTY)"
|
||||||
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR "NOTEMPTY_ATSTART)"
|
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR "NOTEMPTY_ATSTART)"
|
||||||
|
#define STRING_LIMIT_HEAP_EQ "LIMIT_HEAP="
|
||||||
#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH="
|
#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH="
|
||||||
#define STRING_LIMIT_DEPTH_EQ "LIMIT_DEPTH="
|
#define STRING_LIMIT_DEPTH_EQ "LIMIT_DEPTH="
|
||||||
#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION="
|
#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION="
|
||||||
|
@ -1196,6 +1207,7 @@ only. */
|
||||||
#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
|
#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
|
||||||
#define STRING_NOTEMPTY_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_RIGHT_PARENTHESIS
|
#define STRING_NOTEMPTY_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_RIGHT_PARENTHESIS
|
||||||
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_UNDERSCORE STR_A STR_T STR_S STR_T STR_A STR_R STR_T STR_RIGHT_PARENTHESIS
|
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_UNDERSCORE STR_A STR_T STR_S STR_T STR_A STR_R STR_T STR_RIGHT_PARENTHESIS
|
||||||
|
#define STRING_LIMIT_HEAP_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_H STR_E STR_A STR_P STR_EQUALS_SIGN
|
||||||
#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
|
#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
|
||||||
#define STRING_LIMIT_DEPTH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_D STR_E STR_P STR_T STR_H STR_EQUALS_SIGN
|
#define STRING_LIMIT_DEPTH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_D STR_E STR_P STR_T STR_H STR_EQUALS_SIGN
|
||||||
#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
|
#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
|
||||||
|
|
|
@ -585,6 +585,7 @@ typedef struct pcre2_real_match_context {
|
||||||
int (*callout)(pcre2_callout_block *, void *);
|
int (*callout)(pcre2_callout_block *, void *);
|
||||||
void *callout_data;
|
void *callout_data;
|
||||||
PCRE2_SIZE offset_limit;
|
PCRE2_SIZE offset_limit;
|
||||||
|
uint32_t heap_limit;
|
||||||
uint32_t match_limit;
|
uint32_t match_limit;
|
||||||
uint32_t depth_limit;
|
uint32_t depth_limit;
|
||||||
} pcre2_real_match_context;
|
} pcre2_real_match_context;
|
||||||
|
@ -614,6 +615,7 @@ typedef struct pcre2_real_code {
|
||||||
uint32_t compile_options; /* Options passed to pcre2_compile() */
|
uint32_t compile_options; /* Options passed to pcre2_compile() */
|
||||||
uint32_t overall_options; /* Options after processing the pattern */
|
uint32_t overall_options; /* Options after processing the pattern */
|
||||||
uint32_t flags; /* Various state flags */
|
uint32_t flags; /* Various state flags */
|
||||||
|
uint32_t limit_heap; /* Limit set in the pattern */
|
||||||
uint32_t limit_match; /* Limit set in the pattern */
|
uint32_t limit_match; /* Limit set in the pattern */
|
||||||
uint32_t limit_depth; /* Limit set in the pattern */
|
uint32_t limit_depth; /* Limit set in the pattern */
|
||||||
uint32_t first_codeunit; /* Starting code unit */
|
uint32_t first_codeunit; /* Starting code unit */
|
||||||
|
@ -808,9 +810,10 @@ typedef struct match_block {
|
||||||
heapframe *match_frames; /* Points to vector of frames */
|
heapframe *match_frames; /* Points to vector of frames */
|
||||||
heapframe *match_frames_top; /* Points after the end of the vector */
|
heapframe *match_frames_top; /* Points after the end of the vector */
|
||||||
heapframe *stack_frames; /* The original vector on the stack */
|
heapframe *stack_frames; /* The original vector on the stack */
|
||||||
uint32_t match_call_count; /* Number of times a new frame is created */
|
PCRE2_SIZE heap_limit; /* As it says */
|
||||||
uint32_t match_limit; /* As it says */
|
uint32_t match_limit; /* As it says */
|
||||||
uint32_t match_limit_depth; /* As it says */
|
uint32_t match_limit_depth; /* As it says */
|
||||||
|
uint32_t match_call_count; /* Number of times a new frame is created */
|
||||||
BOOL hitend; /* Hit the end of the subject at some point */
|
BOOL hitend; /* Hit the end of the subject at some point */
|
||||||
BOOL hasthen; /* Pattern contains (*THEN) */
|
BOOL hasthen; /* Pattern contains (*THEN) */
|
||||||
const uint8_t *lcc; /* Points to lower casing table */
|
const uint8_t *lcc; /* Points to lower casing table */
|
||||||
|
|
|
@ -64,15 +64,6 @@ information, and fields within it. */
|
||||||
|
|
||||||
#define RECURSE_UNSET 0xffffffffu /* Bigger than max group number */
|
#define RECURSE_UNSET 0xffffffffu /* Bigger than max group number */
|
||||||
|
|
||||||
/* The initial frames vector for remembering backtracking points is allocated
|
|
||||||
on the system stack, of this size (bytes). The size must be a multiple of
|
|
||||||
sizeof(PCRE2_SPTR) in all environments, so making it a multiple of 8 is best.
|
|
||||||
Typical frame sizes are a few hundred bytes (it depends on the number of
|
|
||||||
capturing parentheses) so 10K handles quite a few frames. A larger vector on
|
|
||||||
the heap is obtained for patterns that need more frames. */
|
|
||||||
|
|
||||||
#define START_FRAMES_SIZE 10240
|
|
||||||
|
|
||||||
/* Masks for identifying the public options that are permitted at match time. */
|
/* Masks for identifying the public options that are permitted at match time. */
|
||||||
|
|
||||||
#define PUBLIC_MATCH_OPTIONS \
|
#define PUBLIC_MATCH_OPTIONS \
|
||||||
|
@ -618,14 +609,22 @@ backtracking point. */
|
||||||
MATCH_RECURSE:
|
MATCH_RECURSE:
|
||||||
|
|
||||||
/* Set up a new backtracking frame. If the vector is full, get a new one
|
/* Set up a new backtracking frame. If the vector is full, get a new one
|
||||||
on the heap, doubling the size. */
|
on the heap, doubling the size, but constrained by the heap limit. */
|
||||||
|
|
||||||
N = (heapframe *)((char *)F + frame_size);
|
N = (heapframe *)((char *)F + frame_size);
|
||||||
if (N >= mb->match_frames_top)
|
if (N >= mb->match_frames_top)
|
||||||
{
|
{
|
||||||
PCRE2_SIZE newsize = mb->frame_vector_size * 2;
|
PCRE2_SIZE newsize = mb->frame_vector_size * 2;
|
||||||
heapframe *new = mb->memctl.malloc(newsize, mb->memctl.memory_data);
|
heapframe *new;
|
||||||
|
|
||||||
|
if ((newsize / 1024) > mb->heap_limit)
|
||||||
|
{
|
||||||
|
PCRE2_SIZE maxsize = ((mb->heap_limit * 1024)/frame_size) * frame_size;
|
||||||
|
if (mb->frame_vector_size == maxsize) return PCRE2_ERROR_HEAPLIMIT;
|
||||||
|
newsize = maxsize;
|
||||||
|
}
|
||||||
|
|
||||||
|
new = mb->memctl.malloc(newsize, mb->memctl.memory_data);
|
||||||
if (new == NULL) return PCRE2_ERROR_NOMEMORY;
|
if (new == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||||
memcpy(new, mb->match_frames, mb->frame_vector_size);
|
memcpy(new, mb->match_frames, mb->frame_vector_size);
|
||||||
|
|
||||||
|
@ -6266,9 +6265,22 @@ correct when calling match() more than once for non-anchored patterns. */
|
||||||
|
|
||||||
frame_size = sizeof(heapframe) + ((re->top_bracket - 1) * 2 * sizeof(PCRE2_SIZE));
|
frame_size = sizeof(heapframe) + ((re->top_bracket - 1) * 2 * sizeof(PCRE2_SIZE));
|
||||||
|
|
||||||
|
/* Limits set in the pattern override the match context only if they are
|
||||||
|
smaller. */
|
||||||
|
|
||||||
|
mb->heap_limit = (mcontext->heap_limit < re->limit_heap)?
|
||||||
|
mcontext->heap_limit : re->limit_heap;
|
||||||
|
|
||||||
|
mb->match_limit = (mcontext->match_limit < re->limit_match)?
|
||||||
|
mcontext->match_limit : re->limit_match;
|
||||||
|
|
||||||
|
mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)?
|
||||||
|
mcontext->depth_limit : re->limit_depth;
|
||||||
|
|
||||||
/* If a pattern has very many capturing parentheses, the frame size may be very
|
/* If a pattern has very many capturing parentheses, the frame size may be very
|
||||||
large. Ensure that there are at least 10 available frames by getting an initial
|
large. Ensure that there are at least 10 available frames by getting an initial
|
||||||
vector on the heap if necessary. */
|
vector on the heap if necessary, except when the heap limit prevents this. Get
|
||||||
|
fewer if possible. (The heap limit is in kilobytes.) */
|
||||||
|
|
||||||
if (frame_size <= START_FRAMES_SIZE/10)
|
if (frame_size <= START_FRAMES_SIZE/10)
|
||||||
{
|
{
|
||||||
|
@ -6278,6 +6290,11 @@ if (frame_size <= START_FRAMES_SIZE/10)
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
mb->frame_vector_size = frame_size * 10;
|
mb->frame_vector_size = frame_size * 10;
|
||||||
|
if ((mb->frame_vector_size / 1024) > mb->heap_limit)
|
||||||
|
{
|
||||||
|
if (frame_size > mb->heap_limit * 1024) return PCRE2_ERROR_HEAPLIMIT;
|
||||||
|
mb->frame_vector_size = ((mb->heap_limit * 1024)/frame_size) * frame_size;
|
||||||
|
}
|
||||||
mb->match_frames = mb->memctl.malloc(mb->frame_vector_size,
|
mb->match_frames = mb->memctl.malloc(mb->frame_vector_size,
|
||||||
mb->memctl.memory_data);
|
mb->memctl.memory_data);
|
||||||
if (mb->match_frames == NULL) return PCRE2_ERROR_NOMEMORY;
|
if (mb->match_frames == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||||
|
@ -6292,14 +6309,6 @@ to avoid uninitialized memory read errors when it is copied to a new frame. */
|
||||||
memset((char *)(mb->match_frames) + offsetof(heapframe, ovector), 0xff,
|
memset((char *)(mb->match_frames) + offsetof(heapframe, ovector), 0xff,
|
||||||
re->top_bracket * 2 * sizeof(PCRE2_SIZE));
|
re->top_bracket * 2 * sizeof(PCRE2_SIZE));
|
||||||
|
|
||||||
/* Limits set in the pattern override the match context only if they are
|
|
||||||
smaller. */
|
|
||||||
|
|
||||||
mb->match_limit = (mcontext->match_limit < re->limit_match)?
|
|
||||||
mcontext->match_limit : re->limit_match;
|
|
||||||
mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)?
|
|
||||||
mcontext->depth_limit : re->limit_depth;
|
|
||||||
|
|
||||||
/* Pointers to the individual character tables */
|
/* Pointers to the individual character tables */
|
||||||
|
|
||||||
mb->lcc = re->tables + lcc_offset;
|
mb->lcc = re->tables + lcc_offset;
|
||||||
|
|
|
@ -80,6 +80,7 @@ if (where == NULL) /* Requests field length */
|
||||||
case PCRE2_INFO_FIRSTCODEUNIT:
|
case PCRE2_INFO_FIRSTCODEUNIT:
|
||||||
case PCRE2_INFO_HASBACKSLASHC:
|
case PCRE2_INFO_HASBACKSLASHC:
|
||||||
case PCRE2_INFO_HASCRORLF:
|
case PCRE2_INFO_HASCRORLF:
|
||||||
|
case PCRE2_INFO_HEAPLIMIT:
|
||||||
case PCRE2_INFO_JCHANGED:
|
case PCRE2_INFO_JCHANGED:
|
||||||
case PCRE2_INFO_LASTCODETYPE:
|
case PCRE2_INFO_LASTCODETYPE:
|
||||||
case PCRE2_INFO_LASTCODEUNIT:
|
case PCRE2_INFO_LASTCODEUNIT:
|
||||||
|
@ -171,6 +172,11 @@ switch(what)
|
||||||
*((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0;
|
*((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case PCRE2_INFO_HEAPLIMIT:
|
||||||
|
*((uint32_t *)where) = re->limit_heap;
|
||||||
|
if (re->limit_heap == UINT32_MAX) return PCRE2_ERROR_UNSET;
|
||||||
|
break;
|
||||||
|
|
||||||
case PCRE2_INFO_JCHANGED:
|
case PCRE2_INFO_JCHANGED:
|
||||||
*((uint32_t *)where) = (re->flags & PCRE2_JCHANGED) != 0;
|
*((uint32_t *)where) = (re->flags & PCRE2_JCHANGED) != 0;
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -212,6 +212,7 @@ static const uint8_t *character_tables = NULL;
|
||||||
|
|
||||||
static uint32_t pcre2_options = 0;
|
static uint32_t pcre2_options = 0;
|
||||||
static uint32_t process_options = 0;
|
static uint32_t process_options = 0;
|
||||||
|
static PCRE2_SIZE heap_limit = PCRE2_UNSET;
|
||||||
static uint32_t match_limit = 0;
|
static uint32_t match_limit = 0;
|
||||||
static uint32_t depth_limit = 0;
|
static uint32_t depth_limit = 0;
|
||||||
|
|
||||||
|
@ -330,7 +331,7 @@ static const char *incexname[4] = { "--include", "--exclude",
|
||||||
|
|
||||||
/* Structure for options and list of them */
|
/* Structure for options and list of them */
|
||||||
|
|
||||||
enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER,
|
enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER, OP_SIZE,
|
||||||
OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
|
OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
|
||||||
|
|
||||||
typedef struct option_item {
|
typedef struct option_item {
|
||||||
|
@ -356,16 +357,17 @@ used to identify them. */
|
||||||
#define N_LOFFSETS (-10)
|
#define N_LOFFSETS (-10)
|
||||||
#define N_FOFFSETS (-11)
|
#define N_FOFFSETS (-11)
|
||||||
#define N_LBUFFER (-12)
|
#define N_LBUFFER (-12)
|
||||||
#define N_M_LIMIT (-13)
|
#define N_H_LIMIT (-13)
|
||||||
#define N_M_LIMIT_DEP (-14)
|
#define N_M_LIMIT (-14)
|
||||||
#define N_BUFSIZE (-15)
|
#define N_M_LIMIT_DEP (-15)
|
||||||
#define N_NOJIT (-16)
|
#define N_BUFSIZE (-16)
|
||||||
#define N_FILE_LIST (-17)
|
#define N_NOJIT (-17)
|
||||||
#define N_BINARY_FILES (-18)
|
#define N_FILE_LIST (-18)
|
||||||
#define N_EXCLUDE_FROM (-19)
|
#define N_BINARY_FILES (-19)
|
||||||
#define N_INCLUDE_FROM (-20)
|
#define N_EXCLUDE_FROM (-20)
|
||||||
#define N_OM_SEPARATOR (-21)
|
#define N_INCLUDE_FROM (-21)
|
||||||
#define N_MAX_BUFSIZE (-22)
|
#define N_OM_SEPARATOR (-22)
|
||||||
|
#define N_MAX_BUFSIZE (-23)
|
||||||
|
|
||||||
static option_item optionlist[] = {
|
static option_item optionlist[] = {
|
||||||
{ OP_NODATA, N_NULL, NULL, "", "terminate options" },
|
{ OP_NODATA, N_NULL, NULL, "", "terminate options" },
|
||||||
|
@ -397,6 +399,7 @@ static option_item optionlist[] = {
|
||||||
{ OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
|
{ OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
|
||||||
{ OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
|
{ OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
|
||||||
{ OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
|
{ OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
|
||||||
|
{ OP_SIZE, N_H_LIMIT, &heap_limit, "heap-limit=number", "set PCRE2 heap limit option (kilobytes)" },
|
||||||
{ OP_U32NUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE2 match limit option" },
|
{ OP_U32NUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE2 match limit option" },
|
||||||
{ OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" },
|
{ OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" },
|
||||||
{ OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsolete synonym for depth-limit" },
|
{ OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsolete synonym for depth-limit" },
|
||||||
|
@ -525,9 +528,9 @@ pcre2grep_exit(int rc)
|
||||||
{
|
{
|
||||||
if (resource_error)
|
if (resource_error)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "pcre2grep: Error %d, %d or %d means that a resource limit "
|
fprintf(stderr, "pcre2grep: Error %d, %d, %d or %d means that a resource "
|
||||||
"was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
|
"limit was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
|
||||||
PCRE2_ERROR_DEPTHLIMIT);
|
PCRE2_ERROR_DEPTHLIMIT, PCRE2_ERROR_HEAPLIMIT);
|
||||||
fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n");
|
fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n");
|
||||||
}
|
}
|
||||||
exit(rc);
|
exit(rc);
|
||||||
|
@ -1647,7 +1650,7 @@ for (i = 1; p != NULL; p = p->next, i++)
|
||||||
FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
|
FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
|
||||||
fprintf(stderr, "\n\n");
|
fprintf(stderr, "\n\n");
|
||||||
if (*mrc == PCRE2_ERROR_MATCHLIMIT || *mrc == PCRE2_ERROR_DEPTHLIMIT ||
|
if (*mrc == PCRE2_ERROR_MATCHLIMIT || *mrc == PCRE2_ERROR_DEPTHLIMIT ||
|
||||||
*mrc == PCRE2_ERROR_JIT_STACKLIMIT)
|
*mrc == PCRE2_ERROR_HEAPLIMIT || *mrc == PCRE2_ERROR_JIT_STACKLIMIT)
|
||||||
resource_error = TRUE;
|
resource_error = TRUE;
|
||||||
if (error_count++ > 20)
|
if (error_count++ > 20)
|
||||||
{
|
{
|
||||||
|
@ -3796,7 +3799,7 @@ for (i = 1; i < argc; i++)
|
||||||
/* Otherwise, deal with a single string or numeric data value. */
|
/* Otherwise, deal with a single string or numeric data value. */
|
||||||
|
|
||||||
else if (op->type != OP_NUMBER && op->type != OP_U32NUMBER &&
|
else if (op->type != OP_NUMBER && op->type != OP_U32NUMBER &&
|
||||||
op->type != OP_OP_NUMBER)
|
op->type != OP_OP_NUMBER && op->type != OP_SIZE)
|
||||||
{
|
{
|
||||||
*((char **)op->dataptr) = option_data;
|
*((char **)op->dataptr) = option_data;
|
||||||
}
|
}
|
||||||
|
@ -3804,6 +3807,7 @@ for (i = 1; i < argc; i++)
|
||||||
{
|
{
|
||||||
unsigned long int n = decode_number(option_data, op, longop);
|
unsigned long int n = decode_number(option_data, op, longop);
|
||||||
if (op->type == OP_U32NUMBER) *((uint32_t *)op->dataptr) = n;
|
if (op->type == OP_U32NUMBER) *((uint32_t *)op->dataptr) = n;
|
||||||
|
else if (op->type == OP_SIZE) *((PCRE2_SIZE *)op->dataptr) = n;
|
||||||
else *((int *)op->dataptr) = n;
|
else *((int *)op->dataptr) = n;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3839,6 +3843,7 @@ if (output_text != NULL &&
|
||||||
|
|
||||||
/* Put limits into the match data block. */
|
/* Put limits into the match data block. */
|
||||||
|
|
||||||
|
if (heap_limit != PCRE2_UNSET) pcre2_set_heap_limit(match_context, heap_limit);
|
||||||
if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
|
if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
|
||||||
if (depth_limit > 0) pcre2_set_depth_limit(match_context, depth_limit);
|
if (depth_limit > 0) pcre2_set_depth_limit(match_context, depth_limit);
|
||||||
|
|
||||||
|
|
|
@ -588,6 +588,7 @@ static modstruct modlist[] = {
|
||||||
{ "get", MOD_DAT, MOD_NN, DO(get_numbers), DO(get_names) },
|
{ "get", MOD_DAT, MOD_NN, DO(get_numbers), DO(get_names) },
|
||||||
{ "getall", MOD_DAT, MOD_CTL, CTL_GETALL, DO(control) },
|
{ "getall", MOD_DAT, MOD_CTL, CTL_GETALL, DO(control) },
|
||||||
{ "global", MOD_PNDP, MOD_CTL, CTL_GLOBAL, PO(control) },
|
{ "global", MOD_PNDP, MOD_CTL, CTL_GLOBAL, PO(control) },
|
||||||
|
{ "heap_limit", MOD_CTM, MOD_INT, 0, MO(heap_limit) },
|
||||||
{ "hex", MOD_PAT, MOD_CTL, CTL_HEXPAT, PO(control) },
|
{ "hex", MOD_PAT, MOD_CTL, CTL_HEXPAT, PO(control) },
|
||||||
{ "info", MOD_PAT, MOD_CTL, CTL_INFO, PO(control) },
|
{ "info", MOD_PAT, MOD_CTL, CTL_INFO, PO(control) },
|
||||||
{ "jit", MOD_PAT, MOD_IND, 7, PO(jit) },
|
{ "jit", MOD_PAT, MOD_IND, 7, PO(jit) },
|
||||||
|
@ -1207,6 +1208,14 @@ are supported. */
|
||||||
else \
|
else \
|
||||||
pcre2_set_depth_limit_32(G(a,32),b)
|
pcre2_set_depth_limit_32(G(a,32),b)
|
||||||
|
|
||||||
|
#define PCRE2_SET_HEAP_LIMIT(a,b) \
|
||||||
|
if (test_mode == PCRE8_MODE) \
|
||||||
|
pcre2_set_heap_limit_8(G(a,8),b); \
|
||||||
|
else if (test_mode == PCRE16_MODE) \
|
||||||
|
pcre2_set_heap_limit_16(G(a,16),b); \
|
||||||
|
else \
|
||||||
|
pcre2_set_heap_limit_32(G(a,32),b)
|
||||||
|
|
||||||
#define PCRE2_SET_MATCH_LIMIT(a,b) \
|
#define PCRE2_SET_MATCH_LIMIT(a,b) \
|
||||||
if (test_mode == PCRE8_MODE) \
|
if (test_mode == PCRE8_MODE) \
|
||||||
pcre2_set_match_limit_8(G(a,8),b); \
|
pcre2_set_match_limit_8(G(a,8),b); \
|
||||||
|
@ -1643,6 +1652,12 @@ the three different cases. */
|
||||||
else \
|
else \
|
||||||
G(pcre2_set_depth_limit_,BITTWO)(G(a,BITTWO),b)
|
G(pcre2_set_depth_limit_,BITTWO)(G(a,BITTWO),b)
|
||||||
|
|
||||||
|
#define PCRE2_SET_HEAP_LIMIT(a,b) \
|
||||||
|
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
||||||
|
G(pcre2_set_heap_limit_,BITONE)(G(a,BITONE),b); \
|
||||||
|
else \
|
||||||
|
G(pcre2_set_heap_limit_,BITTWO)(G(a,BITTWO),b)
|
||||||
|
|
||||||
#define PCRE2_SET_MATCH_LIMIT(a,b) \
|
#define PCRE2_SET_MATCH_LIMIT(a,b) \
|
||||||
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
||||||
G(pcre2_set_match_limit_,BITONE)(G(a,BITONE),b); \
|
G(pcre2_set_match_limit_,BITONE)(G(a,BITONE),b); \
|
||||||
|
@ -1856,6 +1871,7 @@ the three different cases. */
|
||||||
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
|
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
|
||||||
pcre2_set_compile_recursion_guard_8(G(a,8),b,c)
|
pcre2_set_compile_recursion_guard_8(G(a,8),b,c)
|
||||||
#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_8(G(a,8),b)
|
#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_8(G(a,8),b)
|
||||||
|
#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_8(G(a,8),b)
|
||||||
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
|
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
|
||||||
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b)
|
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b)
|
||||||
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b)
|
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b)
|
||||||
|
@ -1952,6 +1968,7 @@ the three different cases. */
|
||||||
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
|
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
|
||||||
pcre2_set_compile_recursion_guard_16(G(a,16),b,c)
|
pcre2_set_compile_recursion_guard_16(G(a,16),b,c)
|
||||||
#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_16(G(a,16),b)
|
#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_16(G(a,16),b)
|
||||||
|
#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_16(G(a,16),b)
|
||||||
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
|
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
|
||||||
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_16(G(a,16),b)
|
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_16(G(a,16),b)
|
||||||
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b)
|
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b)
|
||||||
|
@ -2048,6 +2065,7 @@ the three different cases. */
|
||||||
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
|
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
|
||||||
pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
|
pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
|
||||||
#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_32(G(a,32),b)
|
#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_32(G(a,32),b)
|
||||||
|
#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_32(G(a,32),b)
|
||||||
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
|
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
|
||||||
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_32(G(a,32),b)
|
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_32(G(a,32),b)
|
||||||
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b)
|
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b)
|
||||||
|
@ -4040,14 +4058,28 @@ if ((pat_patctl.control & CTL_INFO) != 0)
|
||||||
{
|
{
|
||||||
void *nametable;
|
void *nametable;
|
||||||
uint8_t *start_bits;
|
uint8_t *start_bits;
|
||||||
BOOL match_limit_set, depth_limit_set;
|
BOOL heap_limit_set, match_limit_set, depth_limit_set;
|
||||||
uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit,
|
uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit,
|
||||||
hasbackslashc, hascrorlf, jchanged, last_ctype, last_cunit, match_empty,
|
hasbackslashc, hascrorlf, jchanged, last_ctype, last_cunit, match_empty,
|
||||||
match_limit, minlength, nameentrysize, namecount, newline_convention,
|
depth_limit, heap_limit, match_limit, minlength, nameentrysize, namecount,
|
||||||
depth_limit;
|
newline_convention;
|
||||||
|
|
||||||
/* These info requests may return PCRE2_ERROR_UNSET. */
|
/* These info requests may return PCRE2_ERROR_UNSET. */
|
||||||
|
|
||||||
|
switch(pattern_info(PCRE2_INFO_HEAPLIMIT, &heap_limit, TRUE))
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
heap_limit_set = TRUE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PCRE2_ERROR_UNSET:
|
||||||
|
heap_limit_set = FALSE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
return PR_ABEND;
|
||||||
|
}
|
||||||
|
|
||||||
switch(pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit, TRUE))
|
switch(pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit, TRUE))
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
|
@ -4106,6 +4138,9 @@ if ((pat_patctl.control & CTL_INFO) != 0)
|
||||||
if (maxlookbehind > 0)
|
if (maxlookbehind > 0)
|
||||||
fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
|
fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
|
||||||
|
|
||||||
|
if (heap_limit_set)
|
||||||
|
fprintf(outfile, "Heap limit = %u\n", heap_limit);
|
||||||
|
|
||||||
if (match_limit_set)
|
if (match_limit_set)
|
||||||
fprintf(outfile, "Match limit = %u\n", match_limit);
|
fprintf(outfile, "Match limit = %u\n", match_limit);
|
||||||
|
|
||||||
|
@ -5353,10 +5388,15 @@ uint32_t max = UINT32_MAX;
|
||||||
|
|
||||||
PCRE2_SET_MATCH_LIMIT(dat_context, max);
|
PCRE2_SET_MATCH_LIMIT(dat_context, max);
|
||||||
PCRE2_SET_DEPTH_LIMIT(dat_context, max);
|
PCRE2_SET_DEPTH_LIMIT(dat_context, max);
|
||||||
|
PCRE2_SET_HEAP_LIMIT(dat_context, max);
|
||||||
|
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
if (errnumber == PCRE2_ERROR_MATCHLIMIT)
|
if (errnumber == PCRE2_ERROR_HEAPLIMIT)
|
||||||
|
{
|
||||||
|
PCRE2_SET_HEAP_LIMIT(dat_context, mid);
|
||||||
|
}
|
||||||
|
else if (errnumber == PCRE2_ERROR_MATCHLIMIT)
|
||||||
{
|
{
|
||||||
PCRE2_SET_MATCH_LIMIT(dat_context, mid);
|
PCRE2_SET_MATCH_LIMIT(dat_context, mid);
|
||||||
}
|
}
|
||||||
|
@ -5393,13 +5433,23 @@ for (;;)
|
||||||
capcount == PCRE2_ERROR_NOMATCH ||
|
capcount == PCRE2_ERROR_NOMATCH ||
|
||||||
capcount == PCRE2_ERROR_PARTIAL)
|
capcount == PCRE2_ERROR_PARTIAL)
|
||||||
{
|
{
|
||||||
|
/* If we've not hit the error with a heap limit less than the size of the
|
||||||
|
initial stack frame vector, the heap is not being used, so the minimum
|
||||||
|
limit is zero; there's no need to go on. The other limits are always
|
||||||
|
greater than zero. */
|
||||||
|
|
||||||
|
if (errnumber == PCRE2_ERROR_HEAPLIMIT && mid < START_FRAMES_SIZE/1024)
|
||||||
|
{
|
||||||
|
fprintf(outfile, "Minimum %s limit = 0\n", msg);
|
||||||
|
break;
|
||||||
|
}
|
||||||
if (mid == min + 1)
|
if (mid == min + 1)
|
||||||
{
|
{
|
||||||
fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
|
fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
max = mid;
|
max = mid;
|
||||||
mid = (min + mid)/2;
|
mid = (min + max)/2;
|
||||||
}
|
}
|
||||||
else break; /* Some other error */
|
else break; /* Some other error */
|
||||||
}
|
}
|
||||||
|
@ -6662,20 +6712,32 @@ else for (gmatched = 0;; gmatched++)
|
||||||
(double)CLOCKS_PER_SEC);
|
(double)CLOCKS_PER_SEC);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Find the match and depth limits if requested. The match limit is not
|
/* Find the heap, match and depth limits if requested. The match and heap
|
||||||
relevant for DFA matching and the depth limit is not relevant for JIT. */
|
limits are not relevant for DFA matching and the depth limit is not relevant
|
||||||
|
for JIT. */
|
||||||
|
|
||||||
if ((dat_datctl.control & CTL_FINDLIMITS) != 0)
|
if ((dat_datctl.control & CTL_FINDLIMITS) != 0)
|
||||||
{
|
{
|
||||||
if ((dat_datctl.control & CTL_DFA) == 0)
|
if ((dat_datctl.control & CTL_DFA) == 0)
|
||||||
|
{
|
||||||
|
if (FLD(compiled_code, executable_jit) == NULL ||
|
||||||
|
(dat_datctl.options & PCRE2_NO_JIT) != 0)
|
||||||
|
{
|
||||||
|
capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_HEAPLIMIT,
|
||||||
|
"heap");
|
||||||
|
}
|
||||||
capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_MATCHLIMIT,
|
capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_MATCHLIMIT,
|
||||||
"match");
|
"match");
|
||||||
|
}
|
||||||
else capcount = 0;
|
else capcount = 0;
|
||||||
|
|
||||||
if (FLD(compiled_code, executable_jit) == NULL ||
|
if (FLD(compiled_code, executable_jit) == NULL ||
|
||||||
(dat_datctl.options & PCRE2_NO_JIT) != 0 ||
|
(dat_datctl.options & PCRE2_NO_JIT) != 0 ||
|
||||||
(dat_datctl.control & CTL_DFA) != 0)
|
(dat_datctl.control & CTL_DFA) != 0)
|
||||||
|
{
|
||||||
capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_DEPTHLIMIT,
|
capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_DEPTHLIMIT,
|
||||||
"depth");
|
"depth");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Otherwise just run a single match, setting up a callout if required (the
|
/* Otherwise just run a single match, setting up a callout if required (the
|
||||||
|
@ -7402,6 +7464,8 @@ printf(" \\C is supported\n");
|
||||||
printf(" Internal link size = %d\n", optval);
|
printf(" Internal link size = %d\n", optval);
|
||||||
(void)PCRE2_CONFIG(PCRE2_CONFIG_PARENSLIMIT, &optval);
|
(void)PCRE2_CONFIG(PCRE2_CONFIG_PARENSLIMIT, &optval);
|
||||||
printf(" Parentheses nest limit = %d\n", optval);
|
printf(" Parentheses nest limit = %d\n", optval);
|
||||||
|
(void)PCRE2_CONFIG(PCRE2_CONFIG_HEAPLIMIT, &optval);
|
||||||
|
printf(" Default heap limit = %d\n", optval);
|
||||||
(void)PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, &optval);
|
(void)PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, &optval);
|
||||||
printf(" Default match limit = %d\n", optval);
|
printf(" Default match limit = %d\n", optval);
|
||||||
(void)PCRE2_CONFIG(PCRE2_CONFIG_DEPTHLIMIT, &optval);
|
(void)PCRE2_CONFIG(PCRE2_CONFIG_DEPTHLIMIT, &optval);
|
||||||
|
|
|
@ -12,11 +12,13 @@ Starting code units: a z
|
||||||
Last code unit = 'z'
|
Last code unit = 'z'
|
||||||
Subject length lower bound = 2
|
Subject length lower bound = 2
|
||||||
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits
|
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits
|
||||||
|
Minimum heap limit = 0
|
||||||
Minimum match limit = 7
|
Minimum match limit = 7
|
||||||
Minimum depth limit = 7
|
Minimum depth limit = 7
|
||||||
0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazz
|
0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazz
|
||||||
1: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
|
1: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
|
||||||
aaaaaaaaaaaaaz\=find_limits
|
aaaaaaaaaaaaaz\=find_limits
|
||||||
|
Minimum heap limit = 0
|
||||||
Minimum match limit = 20481
|
Minimum match limit = 20481
|
||||||
Minimum depth limit = 30
|
Minimum depth limit = 30
|
||||||
No match
|
No match
|
||||||
|
@ -26,6 +28,7 @@ Capturing subpattern count = 1
|
||||||
May match empty string
|
May match empty string
|
||||||
Subject length lower bound = 0
|
Subject length lower bound = 0
|
||||||
/* this is a C style comment */\=find_limits
|
/* this is a C style comment */\=find_limits
|
||||||
|
Minimum heap limit = 0
|
||||||
Minimum match limit = 64
|
Minimum match limit = 64
|
||||||
Minimum depth limit = 7
|
Minimum depth limit = 7
|
||||||
0: /* this is a C style comment */
|
0: /* this is a C style comment */
|
||||||
|
@ -33,21 +36,25 @@ Minimum depth limit = 7
|
||||||
|
|
||||||
/^(?>a)++/
|
/^(?>a)++/
|
||||||
aa\=find_limits
|
aa\=find_limits
|
||||||
|
Minimum heap limit = 0
|
||||||
Minimum match limit = 5
|
Minimum match limit = 5
|
||||||
Minimum depth limit = 3
|
Minimum depth limit = 3
|
||||||
0: aa
|
0: aa
|
||||||
aaaaaaaaa\=find_limits
|
aaaaaaaaa\=find_limits
|
||||||
|
Minimum heap limit = 0
|
||||||
Minimum match limit = 12
|
Minimum match limit = 12
|
||||||
Minimum depth limit = 3
|
Minimum depth limit = 3
|
||||||
0: aaaaaaaaa
|
0: aaaaaaaaa
|
||||||
|
|
||||||
/(a)(?1)++/
|
/(a)(?1)++/
|
||||||
aa\=find_limits
|
aa\=find_limits
|
||||||
|
Minimum heap limit = 0
|
||||||
Minimum match limit = 7
|
Minimum match limit = 7
|
||||||
Minimum depth limit = 5
|
Minimum depth limit = 5
|
||||||
0: aa
|
0: aa
|
||||||
1: a
|
1: a
|
||||||
aaaaaaaaa\=find_limits
|
aaaaaaaaa\=find_limits
|
||||||
|
Minimum heap limit = 0
|
||||||
Minimum match limit = 21
|
Minimum match limit = 21
|
||||||
Minimum depth limit = 5
|
Minimum depth limit = 5
|
||||||
0: aaaaaaaaa
|
0: aaaaaaaaa
|
||||||
|
@ -55,30 +62,35 @@ Minimum depth limit = 5
|
||||||
|
|
||||||
/a(?:.)*?a/ims
|
/a(?:.)*?a/ims
|
||||||
abbbbbbbbbbbbbbbbbbbbba\=find_limits
|
abbbbbbbbbbbbbbbbbbbbba\=find_limits
|
||||||
|
Minimum heap limit = 0
|
||||||
Minimum match limit = 24
|
Minimum match limit = 24
|
||||||
Minimum depth limit = 3
|
Minimum depth limit = 3
|
||||||
0: abbbbbbbbbbbbbbbbbbbbba
|
0: abbbbbbbbbbbbbbbbbbbbba
|
||||||
|
|
||||||
/a(?:.(*THEN))*?a/ims
|
/a(?:.(*THEN))*?a/ims
|
||||||
abbbbbbbbbbbbbbbbbbbbba\=find_limits
|
abbbbbbbbbbbbbbbbbbbbba\=find_limits
|
||||||
|
Minimum heap limit = 0
|
||||||
Minimum match limit = 66
|
Minimum match limit = 66
|
||||||
Minimum depth limit = 45
|
Minimum depth limit = 45
|
||||||
0: abbbbbbbbbbbbbbbbbbbbba
|
0: abbbbbbbbbbbbbbbbbbbbba
|
||||||
|
|
||||||
/a(?:.(*THEN:ABC))*?a/ims
|
/a(?:.(*THEN:ABC))*?a/ims
|
||||||
abbbbbbbbbbbbbbbbbbbbba\=find_limits
|
abbbbbbbbbbbbbbbbbbbbba\=find_limits
|
||||||
|
Minimum heap limit = 0
|
||||||
Minimum match limit = 66
|
Minimum match limit = 66
|
||||||
Minimum depth limit = 45
|
Minimum depth limit = 45
|
||||||
0: abbbbbbbbbbbbbbbbbbbbba
|
0: abbbbbbbbbbbbbbbbbbbbba
|
||||||
|
|
||||||
/^(?>a+)(?>b+)(?>c+)(?>d+)(?>e+)/
|
/^(?>a+)(?>b+)(?>c+)(?>d+)(?>e+)/
|
||||||
aabbccddee\=find_limits
|
aabbccddee\=find_limits
|
||||||
|
Minimum heap limit = 0
|
||||||
Minimum match limit = 7
|
Minimum match limit = 7
|
||||||
Minimum depth limit = 7
|
Minimum depth limit = 7
|
||||||
0: aabbccddee
|
0: aabbccddee
|
||||||
|
|
||||||
/^(?>(a+))(?>(b+))(?>(c+))(?>(d+))(?>(e+))/
|
/^(?>(a+))(?>(b+))(?>(c+))(?>(d+))(?>(e+))/
|
||||||
aabbccddee\=find_limits
|
aabbccddee\=find_limits
|
||||||
|
Minimum heap limit = 0
|
||||||
Minimum match limit = 12
|
Minimum match limit = 12
|
||||||
Minimum depth limit = 12
|
Minimum depth limit = 12
|
||||||
0: aabbccddee
|
0: aabbccddee
|
||||||
|
@ -90,6 +102,7 @@ Minimum depth limit = 12
|
||||||
|
|
||||||
/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/
|
/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/
|
||||||
aabbccddee\=find_limits
|
aabbccddee\=find_limits
|
||||||
|
Minimum heap limit = 0
|
||||||
Minimum match limit = 10
|
Minimum match limit = 10
|
||||||
Minimum depth limit = 10
|
Minimum depth limit = 10
|
||||||
0: aabbccddee
|
0: aabbccddee
|
||||||
|
|
|
@ -15609,7 +15609,7 @@ Last code unit = 'c'
|
||||||
Subject length lower bound = 4
|
Subject length lower bound = 4
|
||||||
|
|
||||||
# End of testinput2
|
# End of testinput2
|
||||||
Error -63: PCRE2_ERROR_BADDATA (unknown error number)
|
Error -64: PCRE2_ERROR_BADDATA (unknown error number)
|
||||||
Error -62: bad serialized data
|
Error -62: bad serialized data
|
||||||
Error -2: partial match
|
Error -2: partial match
|
||||||
Error -1: no match
|
Error -1: no match
|
||||||
|
|
Loading…
Reference in New Issue