Add explicit heap limiting options to pcre2_match(), with associated features

for listing, configuring, etc.
This commit is contained in:
Philip.Hazel 2017-04-11 11:47:25 +00:00
parent f0126dc7ae
commit 14989bd454
47 changed files with 2322 additions and 1778 deletions

View File

@ -78,6 +78,7 @@
# fix by David Gaussmann # fix by David Gaussmann
# 2016-10-07 PH added PCREGREP_MAX_BUFSIZE # 2016-10-07 PH added PCREGREP_MAX_BUFSIZE
# 2017-03-11 PH turned HEAP_MATCH_RECURSE into a NO-OP for 10.30 # 2017-03-11 PH turned HEAP_MATCH_RECURSE into a NO-OP for 10.30
# 2017-04-08 PH added HEAP_LIMIT
PROJECT(PCRE2 C) PROJECT(PCRE2 C)
@ -143,6 +144,9 @@ SET(PCRE2_LINK_SIZE "2" CACHE STRING
SET(PCRE2_PARENS_NEST_LIMIT "250" CACHE STRING SET(PCRE2_PARENS_NEST_LIMIT "250" CACHE STRING
"Default nested parentheses limit. See PARENS_NEST_LIMIT in config.h.in for details.") "Default nested parentheses limit. See PARENS_NEST_LIMIT in config.h.in for details.")
SET(PCRE2_HEAP_LIMIT "20000000" CACHE STRING
"Default limit on heap memory (kilobytes). See HEAP_LIMIT in config.h.in for details.")
SET(PCRE2_MATCH_LIMIT "10000000" CACHE STRING SET(PCRE2_MATCH_LIMIT "10000000" CACHE STRING
"Default limit on internal looping. See MATCH_LIMIT in config.h.in for details.") "Default limit on internal looping. See MATCH_LIMIT in config.h.in for details.")
@ -765,6 +769,7 @@ IF(PCRE2_SHOW_REPORT)
MESSAGE(STATUS " Rebuild char tables ............. : ${PCRE2_REBUILD_CHARTABLES}") MESSAGE(STATUS " Rebuild char tables ............. : ${PCRE2_REBUILD_CHARTABLES}")
MESSAGE(STATUS " Internal link size .............. : ${PCRE2_LINK_SIZE}") MESSAGE(STATUS " Internal link size .............. : ${PCRE2_LINK_SIZE}")
MESSAGE(STATUS " Parentheses nest limit .......... : ${PCRE2_PARENS_NEST_LIMIT}") MESSAGE(STATUS " Parentheses nest limit .......... : ${PCRE2_PARENS_NEST_LIMIT}")
MESSAGE(STATUS " Heap limit ...................... : ${PCRE2_HEAP_LIMIT}")
MESSAGE(STATUS " Match limit ..................... : ${PCRE2_MATCH_LIMIT}") MESSAGE(STATUS " Match limit ..................... : ${PCRE2_MATCH_LIMIT}")
MESSAGE(STATUS " Match depth limit ............... : ${PCRE2_MATCH_LIMIT_DEPTH}") MESSAGE(STATUS " Match depth limit ............... : ${PCRE2_MATCH_LIMIT_DEPTH}")
MESSAGE(STATUS " Build shared libs ............... : ${BUILD_SHARED_LIBS}") MESSAGE(STATUS " Build shared libs ............... : ${BUILD_SHARED_LIBS}")

View File

@ -121,6 +121,11 @@ single-branch conditions with a false condition (e.g. DEFINE) at the start of a
branch. For example, /(?(DEFINE)...)^A/ and /(...){0}^B/ are now flagged as branch. For example, /(?(DEFINE)...)^A/ and /(...){0}^B/ are now flagged as
anchored. anchored.
22. Added an explicit limit on the amount of heap used by pcre2_match(), set by
pcre2_set_heap_limit() or (*LIMIT_HEAP=xxx). Upgraded pcre2test to show the
heap limit along with other pattern information, and to find the minimum when
the find_limits modifier is set.
Version 10.23 14-February-2017 Version 10.23 14-February-2017
------------------------------ ------------------------------

View File

@ -69,6 +69,7 @@ dist_html_DATA = \
doc/html/pcre2_set_character_tables.html \ doc/html/pcre2_set_character_tables.html \
doc/html/pcre2_set_compile_recursion_guard.html \ doc/html/pcre2_set_compile_recursion_guard.html \
doc/html/pcre2_set_depth_limit.html \ doc/html/pcre2_set_depth_limit.html \
doc/html/pcre2_set_heap_limit.html \
doc/html/pcre2_set_match_limit.html \ doc/html/pcre2_set_match_limit.html \
doc/html/pcre2_set_max_pattern_length.html \ doc/html/pcre2_set_max_pattern_length.html \
doc/html/pcre2_set_offset_limit.html \ doc/html/pcre2_set_offset_limit.html \
@ -152,6 +153,7 @@ dist_man_MANS = \
doc/pcre2_set_character_tables.3 \ doc/pcre2_set_character_tables.3 \
doc/pcre2_set_compile_recursion_guard.3 \ doc/pcre2_set_compile_recursion_guard.3 \
doc/pcre2_set_depth_limit.3 \ doc/pcre2_set_depth_limit.3 \
doc/pcre2_set_heap_limit.3 \
doc/pcre2_set_match_limit.3 \ doc/pcre2_set_match_limit.3 \
doc/pcre2_set_max_pattern_length.3 \ doc/pcre2_set_max_pattern_length.3 \
doc/pcre2_set_offset_limit.3 \ doc/pcre2_set_offset_limit.3 \

23
README
View File

@ -223,10 +223,10 @@ library. They are also documented in the pcre2build man page.
--with-parens-nest-limit=500 --with-parens-nest-limit=500
. PCRE2 has a counter that can be set to limit the amount of resources it uses . PCRE2 has a counter that can be set to limit the amount of computing resource
when matching a pattern. If the limit is exceeded during a match, the match it uses when matching a pattern with the Perl-compatible matching function.
fails. The default is ten million. You can change the default by setting, for If the limit is exceeded during a match, the match fails. The default is ten
example, million. You can change the default by setting, for example,
--with-match-limit=500000 --with-match-limit=500000
@ -235,8 +235,8 @@ library. They are also documented in the pcre2build man page.
pcre2api man page (search for pcre2_set_match_limit). pcre2api man page (search for pcre2_set_match_limit).
. There is a separate counter that limits the depth of nested backtracking . There is a separate counter that limits the depth of nested backtracking
during a matching process, which in turn limits the amount of memory that is during a matching process, which indirectly limits the amount of heap memory
used. This also has a default of ten million, which is essentially that is used. This also has a default of ten million, which is essentially
"unlimited". You can change the default by setting, for example, "unlimited". You can change the default by setting, for example,
--with-match-limit-depth=5000 --with-match-limit-depth=5000
@ -244,6 +244,15 @@ library. They are also documented in the pcre2build man page.
There is more discussion in the pcre2api man page (search for There is more discussion in the pcre2api man page (search for
pcre2_set_depth_limit). pcre2_set_depth_limit).
. You can also set an explicit limit on the amount of heap memory used by
the pcre2_match() interpreter:
--with-heap-limit=500
The units are kilobytes. This limit does not apply when the JIT optimization
(which has its own memory control features) is used. There is more discussion
on the pcre2api man page (search for pcre2_set_heap_limit).
. In the 8-bit library, the default maximum compiled pattern size is around . In the 8-bit library, the default maximum compiled pattern size is around
64K bytes. You can increase this by adding --with-link-size=3 to the 64K bytes. You can increase this by adding --with-link-size=3 to the
"configure" command. PCRE2 then uses three bytes instead of two for offsets "configure" command. PCRE2 then uses three bytes instead of two for offsets
@ -865,4 +874,4 @@ The distribution should contain the files listed below.
Philip Hazel Philip Hazel
Email local part: ph10 Email local part: ph10
Email domain: cam.ac.uk Email domain: cam.ac.uk
Last updated: 17 March 2017 Last updated: 11 April 2017

View File

@ -489,7 +489,7 @@ for bmode in "$test8" "$test16" "$test32"; do
for opt in "" $jitopt; do for opt in "" $jitopt; do
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput2 testtry $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput2 testtry
if [ $? = 0 ] ; then if [ $? = 0 ] ; then
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -63,-62,-2,-1,0,100,188,189,190,191 >>testtry $sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -64,-62,-2,-1,0,100,188,189,190,191 >>testtry
checkresult $? 2 "$opt" checkresult $? 2 "$opt"
fi fi
done done

View File

@ -36,6 +36,7 @@
#cmakedefine NEVER_BACKSLASH_C 1 #cmakedefine NEVER_BACKSLASH_C 1
#define LINK_SIZE @PCRE2_LINK_SIZE@ #define LINK_SIZE @PCRE2_LINK_SIZE@
#define HEAP_LIMIT @PCRE2_HEAP_LIMIT@
#define MATCH_LIMIT @PCRE2_MATCH_LIMIT@ #define MATCH_LIMIT @PCRE2_MATCH_LIMIT@
#define MATCH_LIMIT_DEPTH @PCRE2_MATCH_LIMIT_DEPTH@ #define MATCH_LIMIT_DEPTH @PCRE2_MATCH_LIMIT_DEPTH@
#define NEWLINE_DEFAULT @NEWLINE_DEFAULT@ #define NEWLINE_DEFAULT @NEWLINE_DEFAULT@

View File

@ -263,6 +263,12 @@ AC_ARG_WITH(parens-nest-limit,
[nested parentheses limit (default=250)]), [nested parentheses limit (default=250)]),
, with_parens_nest_limit=250) , with_parens_nest_limit=250)
# Handle --with-heap-limit
AC_ARG_WITH(heap-limit,
AS_HELP_STRING([--with-heap-limit=N],
[default limit on heap memory (kilobytes, default=20000000)]),
, with_heap_limit=20000000)
# Handle --with-match-limit=N # Handle --with-match-limit=N
AC_ARG_WITH(match-limit, AC_ARG_WITH(match-limit,
AS_HELP_STRING([--with-match-limit=N], AS_HELP_STRING([--with-match-limit=N],
@ -680,12 +686,12 @@ AC_DEFINE_UNQUOTED([PARENS_NEST_LIMIT], [$with_parens_nest_limit], [
stack that is used while compiling a pattern.]) stack that is used while compiling a pattern.])
AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [ AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [
The value of MATCH_LIMIT determines the default number of times the internal The value of MATCH_LIMIT determines the default number of times the
match() function can record a backtrack position during a single matching pcre2_match() function can record a backtrack position during a single
attempt. There is a runtime interface for setting a different limit. The matching attempt. There is a runtime interface for setting a different limit.
limit exists in order to catch runaway regular expressions that take for ever The limit exists in order to catch runaway regular expressions that take for
to determine that they do not match. The default is set very large so that it ever to determine that they do not match. The default is set very large so
does not accidentally catch legitimate cases.]) that it does not accidentally catch legitimate cases.])
# --with-match-limit-recursion is an obsolete synonym for --with-match-limit-depth # --with-match-limit-recursion is an obsolete synonym for --with-match-limit-depth
@ -694,7 +700,7 @@ cat <<EOF
WARNING: --with-match-limit-recursion is an obsolete option. Please use WARNING: --with-match-limit-recursion is an obsolete option. Please use
--with-match-limit-depth in future. If both are set, --with-match-limit-depth --with-match-limit-depth in future. If both are set, --with-match-limit-depth
will be used. will be used. See also --with-heap-limit.
EOF EOF
if test "$with_match_limit_depth" = "MATCH_LIMIT"; then if test "$with_match_limit_depth" = "MATCH_LIMIT"; then
@ -711,6 +717,10 @@ AC_DEFINE_UNQUOTED([MATCH_LIMIT_DEPTH], [$with_match_limit_depth], [
be less than the value of MATCH_LIMIT. The default is to use the same value be less than the value of MATCH_LIMIT. The default is to use the same value
as MATCH_LIMIT. There is a runtime method for setting a different limit.]) as MATCH_LIMIT. There is a runtime method for setting a different limit.])
AC_DEFINE_UNQUOTED([HEAP_LIMIT], [$with_heap_limit], [
This limits the amount of memory that pcre2_match() may use while matching
a pattern. The value is in kilobytes.])
AC_DEFINE([MAX_NAME_SIZE], [32], [ AC_DEFINE([MAX_NAME_SIZE], [32], [
This limit is parameterized just in case anybody ever wants to This limit is parameterized just in case anybody ever wants to
change it. Care must be taken if it is increased, because it guards change it. Care must be taken if it is increased, because it guards
@ -971,6 +981,7 @@ $PACKAGE-$VERSION configuration summary:
Rebuild char tables ................ : ${enable_rebuild_chartables} Rebuild char tables ................ : ${enable_rebuild_chartables}
Internal link size ................. : ${with_link_size} Internal link size ................. : ${with_link_size}
Nested parentheses limit ........... : ${with_parens_nest_limit} Nested parentheses limit ........... : ${with_parens_nest_limit}
Heap limit ......................... : ${with_heap_limit} kilobytes
Match limit ........................ : ${with_match_limit} Match limit ........................ : ${with_match_limit}
Match depth limit .................. : ${with_match_limit_depth} Match depth limit .................. : ${with_match_limit_depth}
Build shared libs .................. : ${enable_shared} Build shared libs .................. : ${enable_shared}

View File

@ -223,10 +223,10 @@ library. They are also documented in the pcre2build man page.
--with-parens-nest-limit=500 --with-parens-nest-limit=500
. PCRE2 has a counter that can be set to limit the amount of resources it uses . PCRE2 has a counter that can be set to limit the amount of computing resource
when matching a pattern. If the limit is exceeded during a match, the match it uses when matching a pattern with the Perl-compatible matching function.
fails. The default is ten million. You can change the default by setting, for If the limit is exceeded during a match, the match fails. The default is ten
example, million. You can change the default by setting, for example,
--with-match-limit=500000 --with-match-limit=500000
@ -235,8 +235,8 @@ library. They are also documented in the pcre2build man page.
pcre2api man page (search for pcre2_set_match_limit). pcre2api man page (search for pcre2_set_match_limit).
. There is a separate counter that limits the depth of nested backtracking . There is a separate counter that limits the depth of nested backtracking
during a matching process, which in turn limits the amount of memory that is during a matching process, which indirectly limits the amount of heap memory
used. This also has a default of ten million, which is essentially that is used. This also has a default of ten million, which is essentially
"unlimited". You can change the default by setting, for example, "unlimited". You can change the default by setting, for example,
--with-match-limit-depth=5000 --with-match-limit-depth=5000
@ -244,6 +244,15 @@ library. They are also documented in the pcre2build man page.
There is more discussion in the pcre2api man page (search for There is more discussion in the pcre2api man page (search for
pcre2_set_depth_limit). pcre2_set_depth_limit).
. You can also set an explicit limit on the amount of heap memory used by
the pcre2_match() interpreter:
--with-heap-limit=500
The units are kilobytes. This limit does not apply when the JIT optimization
(which has its own memory control features) is used. There is more discussion
on the pcre2api man page (search for pcre2_set_heap_limit).
. In the 8-bit library, the default maximum compiled pattern size is around . In the 8-bit library, the default maximum compiled pattern size is around
64K bytes. You can increase this by adding --with-link-size=3 to the 64K bytes. You can increase this by adding --with-link-size=3 to the
"configure" command. PCRE2 then uses three bytes instead of two for offsets "configure" command. PCRE2 then uses three bytes instead of two for offsets
@ -865,4 +874,4 @@ The distribution should contain the files listed below.
Philip Hazel Philip Hazel
Email local part: ph10 Email local part: ph10
Email domain: cam.ac.uk Email domain: cam.ac.uk
Last updated: 17 March 2017 Last updated: 11 April 2017

View File

@ -213,6 +213,9 @@ in the library.
<tr><td><a href="pcre2_set_depth_limit.html">pcre2_set_depth_limit</a></td> <tr><td><a href="pcre2_set_depth_limit.html">pcre2_set_depth_limit</a></td>
<td>&nbsp;&nbsp;Set the match backtracking depth limit</td></tr> <td>&nbsp;&nbsp;Set the match backtracking depth limit</td></tr>
<tr><td><a href="pcre2_set_heap_limit.html">pcre2_set_heap_limit</a></td>
<td>&nbsp;&nbsp;Set the match backtracking heap limit</td></tr>
<tr><td><a href="pcre2_set_match_limit.html">pcre2_set_match_limit</a></td> <tr><td><a href="pcre2_set_match_limit.html">pcre2_set_match_limit</a></td>
<td>&nbsp;&nbsp;Set the match limit</td></tr> <td>&nbsp;&nbsp;Set the match limit</td></tr>

View File

@ -45,6 +45,7 @@ point to a uint32_t integer variable. The available codes are:
PCRE2_CONFIG_BSR Indicates what \R matches by default: PCRE2_CONFIG_BSR Indicates what \R matches by default:
PCRE2_BSR_UNICODE PCRE2_BSR_UNICODE
PCRE2_BSR_ANYCRLF PCRE2_BSR_ANYCRLF
PCRE2_CONFIG_HEAPLIMIT Default heap memory limit
PCRE2_CONFIG_DEPTHLIMIT Default backtracking depth limit PCRE2_CONFIG_DEPTHLIMIT Default backtracking depth limit
PCRE2_CONFIG_JIT Availability of just-in-time compiler support (1=yes 0=no) PCRE2_CONFIG_JIT Availability of just-in-time compiler support (1=yes 0=no)
PCRE2_CONFIG_JITTARGET Information (a string) about the target architecture for the JIT compiler PCRE2_CONFIG_JITTARGET Information (a string) about the target architecture for the JIT compiler

View File

@ -44,6 +44,7 @@ A match context is needed only if you want to:
<pre> <pre>
Set up a callout function Set up a callout function
Set a matching offset limit Set a matching offset limit
Change the heap memory limit
Change the backtracking match limit Change the backtracking match limit
Change the backtracking depth limit Change the backtracking depth limit
Set custom memory management specifically for the match Set custom memory management specifically for the match

View File

@ -51,6 +51,7 @@ request are as follows:
PCRE2_INFO_FRAMESIZE Size of backtracking frame PCRE2_INFO_FRAMESIZE Size of backtracking frame
PCRE2_INFO_HASBACKSLASHC Return 1 if pattern contains \C PCRE2_INFO_HASBACKSLASHC Return 1 if pattern contains \C
PCRE2_INFO_HASCRORLF Return 1 if explicit CR or LF matches exist in the pattern PCRE2_INFO_HASCRORLF Return 1 if explicit CR or LF matches exist in the pattern
PCRE2_INFO_HEAPLIMIT Heap memory limit if set, otherwise PCRE2_ERROR_UNSET
PCRE2_INFO_JCHANGED Return 1 if (?J) or (?-J) was used PCRE2_INFO_JCHANGED Return 1 if (?J) or (?-J) was used
PCRE2_INFO_JITSIZE Size of JIT compiled code, or 0 PCRE2_INFO_JITSIZE Size of JIT compiled code, or 0
PCRE2_INFO_LASTCODETYPE Type of must-be-present information PCRE2_INFO_LASTCODETYPE Type of must-be-present information

View File

@ -182,6 +182,10 @@ document for an overview of all the PCRE2 documentation.
<b> PCRE2_SIZE <i>value</i>);</b> <b> PCRE2_SIZE <i>value</i>);</b>
<br> <br>
<br> <br>
<b>int pcre2_set_heap_limit(pcre2_match_context *<i>mcontext</i>,</b>
<b> uint32_t <i>value</i>);</b>
<br>
<br>
<b>int pcre2_set_match_limit(pcre2_match_context *<i>mcontext</i>,</b> <b>int pcre2_set_match_limit(pcre2_match_context *<i>mcontext</i>,</b>
<b> uint32_t <i>value</i>);</b> <b> uint32_t <i>value</i>);</b>
<br> <br>
@ -793,6 +797,7 @@ A match context is required if you want to:
<pre> <pre>
Set up a callout function Set up a callout function
Set an offset limit for matching an unanchored pattern Set an offset limit for matching an unanchored pattern
Change the limit on the amount of heap used when matching
Change the backtracking match limit Change the backtracking match limit
Change the backtracking depth limit Change the backtracking depth limit
Set custom memory management specifically for the match Set custom memory management specifically for the match
@ -851,14 +856,47 @@ subject strings. See also the PCRE2_FIRSTLINE option, which requires a match to
start within the first line of the subject. If this is set with an offset start within the first line of the subject. If this is set with an offset
limit, a match must occur in the first line and also within the offset limit. limit, a match must occur in the first line and also within the offset limit.
In other words, whichever limit comes first is used. In other words, whichever limit comes first is used.
<b>int pcre2_set_heap_limit(pcre2_match_context *<i>mcontext</i>,</b>
<b> uint32_t <i>value</i>);</b>
<br>
<br>
The <i>heap_limit</i> parameter specifies, in units of kilobytes, the maximum
amount of heap memory that <b>pcre2_match()</b> may use to hold backtracking
information when running an interpretive match. This limit does not apply to
matching with the JIT optimization, which has its own memory control
arrangements (see the
<a href="pcre2jit.html"><b>pcre2jit</b></a>
documentation for more details), nor does it apply to <b>pcre2_dfa_match()</b>.
If the limit is reached, the negative error code PCRE2_ERROR_HEAPLIMIT is
returned. The default limit is set when PCRE2 is built; the default default is
very large and is essentially "unlimited".
</P>
<P>
A value for the heap limit may also be supplied by an item at the start of a
pattern of the form
<pre>
(*LIMIT_HEAP=ddd)
</pre>
where ddd is a decimal number. However, such a setting is ignored unless ddd is
less than the limit set by the caller of <b>pcre2_match()</b> or, if no such
limit is set, less than the default.
</P>
<P>
The <b>pcre2_match()</b> function starts out using a 20K vector on the system
stack for recording backtracking points. The more nested backtracking points
there are (that is, the deeper the search tree), the more memory is needed.
Heap memory is used only if the initial vector is too small. If the heap limit
is set to a value less than 21 (in particular, zero) no heap memory will be
used. In this case, only patterns that do not have a lot of nested backtracking
can be successfully processed.
<b>int pcre2_set_match_limit(pcre2_match_context *<i>mcontext</i>,</b> <b>int pcre2_set_match_limit(pcre2_match_context *<i>mcontext</i>,</b>
<b> uint32_t <i>value</i>);</b> <b> uint32_t <i>value</i>);</b>
<br> <br>
<br> <br>
The <i>match_limit</i> parameter provides a means of preventing PCRE2 from using The <i>match_limit</i> parameter provides a means of preventing PCRE2 from using
up too many resources when processing patterns that are not going to match, but up too many computing resources when processing patterns that are not going to
which have a very large number of possibilities in their search trees. The match, but which have a very large number of possibilities in their search
classic example is a pattern that uses nested unlimited repeats. trees. The classic example is a pattern that uses nested unlimited repeats.
</P> </P>
<P> <P>
There is an internal counter in <b>pcre2_match()</b> that is incremented each There is an internal counter in <b>pcre2_match()</b> that is incremented each
@ -895,16 +933,20 @@ limit is set, less than the default.
This parameter limits the depth of nested backtracking in <b>pcre2_match()</b>. This parameter limits the depth of nested backtracking in <b>pcre2_match()</b>.
Each time a nested backtracking point is passed, a new memory "frame" is used Each time a nested backtracking point is passed, a new memory "frame" is used
to remember the state of matching at that point. Thus, this parameter to remember the state of matching at that point. Thus, this parameter
indirectly limits the amount of memory that is used in a match. indirectly limits the amount of memory that is used in a match. However,
because the size of each memory "frame" depends on the number of capturing
parentheses, the actual memory limit varies from pattern to pattern. This limit
was more useful in versions before 10.30, where function recursion was used for
backtracking.
</P> </P>
<P> <P>
This limit is not relevant, and is ignored, when matching is done using JIT The depth limit is not relevant, and is ignored, when matching is done using
compiled code. However, it is supported by <b>pcre2_dfa_match()</b>, which uses JIT compiled code. However, it is supported by <b>pcre2_dfa_match()</b>, which
it to limit the depth of internal recursive function calls that implement uses it to limit the depth of internal recursive function calls that implement
lookaround assertions and pattern recursions. This is, therefore, an indirect atomic groups, lookaround assertions, and pattern recursions. This is,
limit on the amount of system stack that is used. A recursive pattern such as therefore, an indirect limit on the amount of system stack that is used. A
/(.)(?1)/, when matched to a very long string using <b>pcre2_dfa_match()</b>, recursive pattern such as /(.)(?1)/, when matched to a very long string using
can use a great deal of stack. <b>pcre2_dfa_match()</b>, can use a great deal of stack.
</P> </P>
<P> <P>
The default value for the depth limit can be set when PCRE2 is built; the The default value for the depth limit can be set when PCRE2 is built; the
@ -958,6 +1000,12 @@ The output is a uint32_t integer that gives the default limit for the depth of
nested backtracking in <b>pcre2_match()</b> or the depth of nested recursions nested backtracking in <b>pcre2_match()</b> or the depth of nested recursions
and lookarounds in <b>pcre2_dfa_match()</b>. Further details are given with and lookarounds in <b>pcre2_dfa_match()</b>. Further details are given with
<b>pcre2_set_depth_limit()</b> above. <b>pcre2_set_depth_limit()</b> above.
<pre>
PCRE2_CONFIG_HEAPLIMIT
</pre>
The output is a uint32_t integer that gives, in kilobytes, the default limit
for the amount of heap memory used by <b>pcre2_match()</b>. Further details are
given with <b>pcre2_set_heap_limit()</b> above.
<pre> <pre>
PCRE2_CONFIG_JIT PCRE2_CONFIG_JIT
</pre> </pre>
@ -1786,6 +1834,13 @@ Return 1 if the pattern contains any explicit matches for CR or LF characters,
otherwise 0. The third argument should point to an <b>uint32_t</b> variable. An otherwise 0. The third argument should point to an <b>uint32_t</b> variable. An
explicit match is either a literal CR or LF character, or \r or \n or one of explicit match is either a literal CR or LF character, or \r or \n or one of
the equivalent hexadecimal or octal escape sequences. the equivalent hexadecimal or octal escape sequences.
<pre>
PCRE2_INFO_HEAPLIMIT
</pre>
If the pattern set a heap memory limit by including an item of the form
(*LIMIT_HEAP=nnnn) at the start, the value is returned. The third argument
should point to an unsigned 32-bit integer. If no such value has been set, the
call to <b>pcre2_pattern_info()</b> returns the error PCRE2_ERROR_UNSET.
<pre> <pre>
PCRE2_INFO_JCHANGED PCRE2_INFO_JCHANGED
</pre> </pre>
@ -2554,7 +2609,8 @@ The backtracking match limit was reached.
</pre> </pre>
If a pattern contains many nested backtracking points, heap memory is used to If a pattern contains many nested backtracking points, heap memory is used to
remember them. This error is given when the memory allocation function (default remember them. This error is given when the memory allocation function (default
or custom) fails. or custom) fails. Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given
if the amount of memory needed exceeds the heap limit.
<pre> <pre>
PCRE2_ERROR_NULL PCRE2_ERROR_NULL
</pre> </pre>
@ -3271,7 +3327,7 @@ Cambridge, England.
</P> </P>
<br><a name="SEC42" href="#TOC1">REVISION</a><br> <br><a name="SEC42" href="#TOC1">REVISION</a><br>
<P> <P>
Last updated: 04 April 2017 Last updated: 11 April 2017
<br> <br>
Copyright &copy; 1997-2017 University of Cambridge. Copyright &copy; 1997-2017 University of Cambridge.
<br> <br>

View File

@ -265,17 +265,41 @@ to the <b>configure</b> command. This setting has no effect on the
(though the counting is done differently). (though the counting is done differently).
</P> </P>
<P> <P>
In some environments it is desirable to limit the depth of nested backtracking The <b>pcre2_match()</b> function starts out using a 20K vector on the system
in order to restrict the maximum amount of heap memory that is used. A second stack to record backtracking points. The more nested backtracking points there
limit controls this; it defaults to the value that is set for are (that is, the deeper the search tree), the more memory is needed. If the
--with-match-limit. You can set a lower default limit by adding, for example, initial vector is not large enough, heap memory is used, up to a certain limit,
which is specified in kilobytes. The limit can be changed at run time, as
described in the
<a href="pcre2api.html"><b>pcre2api</b></a>
documentation. The default limit (in effect unlimited) is 20 million. You can
change this by a setting such as
<pre>
--with-heap-limit=500
</pre>
which limits the amount of heap to 500 kilobytes. This limit applies only to
interpretive matching in pcre2_match(). It does not apply when JIT (which has
its own memory arrangements) is used, nor does it apply to
<b>pcre2_dfa_match()</b>.
</P>
<P>
You can also explicitly limit the depth of nested backtracking in the
<b>pcre2_match()</b> interpreter. This limit defaults to the value that is set
for --with-match-limit. You can set a lower default limit by adding, for
example,
<pre> <pre>
--with-match-limit_depth=10000 --with-match-limit_depth=10000
</pre> </pre>
to the <b>configure</b> command. This value can also be overridden at run time. to the <b>configure</b> command. This value can be overridden at run time. This
As well as applying to <b>pcre2_match()</b>, this limit also controls the depth depth limit indirectly limits the amount of heap memory that is used, but
of recursive function calls in <b>pcre2_dfa_match()</b>. These are used for because the size of each backtracking "frame" depends on the number of
lookaround assertions, atomic groups, and recursion within patterns. capturing parentheses in a pattern, the amount of heap that is used before the
limit is reached varies from pattern to pattern. This limit was more useful in
versions before 10.30, where function recursion was used for backtracking.
However, as well as applying to <b>pcre2_match()</b>, this limit also controls
the depth of recursive function calls in <b>pcre2_dfa_match()</b>. These are
used for lookaround assertions, atomic groups, and recursion within patterns.
The limit does not apply to JIT matching.
</P> </P>
<br><a name="SEC12" href="#TOC1">CREATING CHARACTER TABLES AT BUILD TIME</a><br> <br><a name="SEC12" href="#TOC1">CREATING CHARACTER TABLES AT BUILD TIME</a><br>
<P> <P>
@ -530,7 +554,7 @@ Cambridge, England.
</P> </P>
<br><a name="SEC25" href="#TOC1">REVISION</a><br> <br><a name="SEC25" href="#TOC1">REVISION</a><br>
<P> <P>
Last updated: 31 March 2017 Last updated: 10 April 2017
<br> <br>
Copyright &copy; 1997-2017 University of Cambridge. Copyright &copy; 1997-2017 University of Cambridge.
<br> <br>

View File

@ -404,6 +404,10 @@ file name is followed by a colon; for context lines, a hyphen separator is used.
If a line number is also being output, it follows the file name. If a line number is also being output, it follows the file name.
</P> </P>
<P> <P>
<b>--heap-limit</b>=<i>number</i>
See <b>--match-limit</b> below.
</P>
<P>
<b>--help</b> <b>--help</b>
Output a help message, giving brief details of the command options and file Output a help message, giving brief details of the command options and file
type support, and then exit. Anything else on the command line is type support, and then exit. Anything else on the command line is
@ -505,7 +509,7 @@ used. There is no short form for this option.
<b>--match-limit</b>=<i>number</i> <b>--match-limit</b>=<i>number</i>
Processing some regular expression patterns may take a very long time to search Processing some regular expression patterns may take a very long time to search
for all possible matching strings. Others may require a very large amount of for all possible matching strings. Others may require a very large amount of
memory. There are two options that set resource limits for matching. memory. There are three options that set resource limits for matching.
<br> <br>
<br> <br>
The <b>--match-limit</b> option provides a means of limiting computing resource The <b>--match-limit</b> option provides a means of limiting computing resource
@ -516,13 +520,24 @@ counter that is incremented each time around its main processing loop. If the
value set by <b>--match-limit</b> is reached, an error occurs. value set by <b>--match-limit</b> is reached, an error occurs.
<br> <br>
<br> <br>
The <b>--heap-limit</b> option specifies, as a number of kilobytes, the amount
of heap memory that may be used for matching. Heap memory is needed only if
matching the pattern requires a significant number of nested backtracking
points to be remembered. This parameter can be set to zero to forbid the use of
heap memory altogether.
<br>
<br>
The <b>--depth-limit</b> option limits the depth of nested backtracking points, The <b>--depth-limit</b> option limits the depth of nested backtracking points,
which in turn limits the amount of memory that is used. This limit is of use which indirectly limits the amount of memory that is used. The amount of memory
only if it is set smaller than <b>--match-limit</b>. needed for each backtracking point depends on the number of capturing
parentheses in the pattern, so the amount of memory that is used before this
limit acts varies from pattern to pattern. This limit is of use only if it is
set smaller than <b>--match-limit</b>.
<br> <br>
<br> <br>
There are no short forms for these options. The default settings are specified There are no short forms for these options. The default settings are specified
when the PCRE2 library is compiled, with the default default being 10 million. when the PCRE2 library is compiled, with the default defaults being very large
and so effectively unlimited.
</P> </P>
<P> <P>
\fB--max-buffer-size=<i>number</i> \fB--max-buffer-size=<i>number</i>
@ -764,11 +779,12 @@ Many of the short and long forms of <b>pcre2grep</b>'s options are the same
as in the GNU <b>grep</b> program. Any long option of the form as in the GNU <b>grep</b> program. Any long option of the form
<b>--xxx-regexp</b> (GNU terminology) is also available as <b>--xxx-regex</b> <b>--xxx-regexp</b> (GNU terminology) is also available as <b>--xxx-regex</b>
(PCRE2 terminology). However, the <b>--depth-limit</b>, <b>--file-list</b>, (PCRE2 terminology). However, the <b>--depth-limit</b>, <b>--file-list</b>,
<b>--file-offsets</b>, <b>--include-dir</b>, <b>--line-offsets</b>, <b>--file-offsets</b>, <b>--heap-limit</b>, <b>--include-dir</b>,
<b>--locale</b>, <b>--match-limit</b>, <b>-M</b>, <b>--multiline</b>, <b>-N</b>, <b>--line-offsets</b>, <b>--locale</b>, <b>--match-limit</b>, <b>-M</b>,
<b>--newline</b>, <b>--om-separator</b>, <b>--output</b>, <b>-u</b>, and <b>--multiline</b>, <b>-N</b>, <b>--newline</b>, <b>--om-separator</b>,
<b>--utf-8</b> options are specific to <b>pcre2grep</b>, as is the use of the <b>--output</b>, <b>-u</b>, and <b>--utf-8</b> options are specific to
<b>--only-matching</b> option with a capturing parentheses number. <b>pcre2grep</b>, as is the use of the <b>--only-matching</b> option with a
capturing parentheses number.
</P> </P>
<P> <P>
Although most of the common options work the same way, a few are different in Although most of the common options work the same way, a few are different in
@ -891,9 +907,9 @@ there are more than 20 such errors, <b>pcre2grep</b> gives up.
</P> </P>
<P> <P>
The <b>--match-limit</b> option of <b>pcre2grep</b> can be used to set the The <b>--match-limit</b> option of <b>pcre2grep</b> can be used to set the
overall resource limit; there is a second option called <b>--depth-limit</b> overall resource limit. There are also other limits that affect the amount of
that sets a limit on the amount of memory that is used (see the discussion of memory used during matching; see the discussion of <b>--heap-limit</b> and
these options above). <b>--depth-limit</b> above.
</P> </P>
<br><a name="SEC12" href="#TOC1">DIAGNOSTICS</a><br> <br><a name="SEC12" href="#TOC1">DIAGNOSTICS</a><br>
<P> <P>
@ -918,7 +934,7 @@ Cambridge, England.
</P> </P>
<br><a name="SEC15" href="#TOC1">REVISION</a><br> <br><a name="SEC15" href="#TOC1">REVISION</a><br>
<P> <P>
Last updated: 06 April 2017 Last updated: 11 April 2017
<br> <br>
Copyright &copy; 1997-2017 University of Cambridge. Copyright &copy; 1997-2017 University of Cambridge.
<br> <br>

View File

@ -170,14 +170,15 @@ the application to apply the JIT optimization by calling
<b>pcre2_jit_compile()</b> is ignored. <b>pcre2_jit_compile()</b> is ignored.
</P> </P>
<br><b> <br><b>
Setting match and backtracking depth limits Setting match resource limits
</b><br> </b><br>
<P> <P>
The pcre2_match() function contains a counter that is incremented every time it The pcre2_match() function contains a counter that is incremented every time it
goes round its main loop. The caller of <b>pcre2_match()</b> can set a limit on goes round its main loop. The caller of <b>pcre2_match()</b> can set a limit on
this counter, which therefore limits the amount of computing resource used for this counter, which therefore limits the amount of computing resource used for
a match. The maximum depth of nested backtracking can also be limited, and this a match. The maximum depth of nested backtracking can also be limited; this
restricts the amount of heap memory that is used. indirectly restricts the amount of heap memory that is used, but there is also
an explicit memory limit that can be set.
</P> </P>
<P> <P>
These facilities are provided to catch runaway matches that are provoked by These facilities are provided to catch runaway matches that are provoked by
@ -186,6 +187,7 @@ unlimited repeats applied to a long string that does not match). When one of
these limits is reached, <b>pcre2_match()</b> gives an error return. The limits these limits is reached, <b>pcre2_match()</b> gives an error return. The limits
can also be set by items at the start of the pattern of the form can also be set by items at the start of the pattern of the form
<pre> <pre>
(*LIMIT_HEAP=d)
(*LIMIT_MATCH=d) (*LIMIT_MATCH=d)
(*LIMIT_DEPTH=d) (*LIMIT_DEPTH=d)
</pre> </pre>
@ -200,11 +202,13 @@ Prior to release 10.30, LIMIT_DEPTH was called LIMIT_RECURSION. This name is
still recognized for backwards compatibility. still recognized for backwards compatibility.
</P> </P>
<P> <P>
The match limit is used (but in a different way) when JIT is being used, but it The heap limit applies only when the <b>pcre2_match()</b> interpreter is used
is not relevant, and is ignored, when matching with <b>pcre2_dfa_match()</b>. for matching. It does not apply to JIT or DFA matching. The match limit is used
However, the depth limit is relevant for DFA matching, which uses function (but in a different way) when JIT is being used, but it is not relevant, and is
recursion for recursions within the pattern. In this case, the depth limit ignored, when matching with <b>pcre2_dfa_match()</b>. The depth limit is ignored
controls the amount of system stack that is used. by JIT but is relevant for DFA matching, which uses function recursion for
recursions within the pattern. In this case, the depth limit controls the
amount of system stack that is used.
<a name="newlines"></a></P> <a name="newlines"></a></P>
<br><b> <br><b>
Newline conventions Newline conventions
@ -3434,7 +3438,7 @@ Cambridge, England.
</P> </P>
<br><a name="SEC30" href="#TOC1">REVISION</a><br> <br><a name="SEC30" href="#TOC1">REVISION</a><br>
<P> <P>
Last updated: 03 April 2017 Last updated: 11 April 2017
<br> <br>
Copyright &copy; 1997-2017 University of Cambridge. Copyright &copy; 1997-2017 University of Cambridge.
<br> <br>

View File

@ -83,11 +83,12 @@ From release 10.30, the interpretive (non-JIT) version of <b>pcre2_match()</b>
uses very little system stack at run time. In earlier releases recursive uses very little system stack at run time. In earlier releases recursive
function calls could use a great deal of stack, and this could cause problems, function calls could use a great deal of stack, and this could cause problems,
but this usage has been eliminated. Backtracking positions are now explicitly but this usage has been eliminated. Backtracking positions are now explicitly
remembered in memory frames controlled by the code. An initial 10K vector of remembered in memory frames controlled by the code. An initial 20K vector of
frames is allocated on the system stack (enough for about 50 frames for small frames is allocated on the system stack (enough for about 100 frames for small
patterns), but if this is insufficient, heap memory is used. Rewriting patterns patterns), but if this is insufficient, heap memory is used. The amount of heap
to be time-efficient, as described below, may also reduce the memory memory can be limited; if the limit is set to zero, only the initial stack
requirements. vector is used. Rewriting patterns to be time-efficient, as described below,
may also reduce the memory requirements.
</P> </P>
<P> <P>
In contrast to <b>pcre2_match()</b>, <b>pcre2_dfa_match()</b> does use recursive In contrast to <b>pcre2_match()</b>, <b>pcre2_dfa_match()</b> does use recursive
@ -243,7 +244,7 @@ Cambridge, England.
</P> </P>
<br><a name="SEC6" href="#TOC1">REVISION</a><br> <br><a name="SEC6" href="#TOC1">REVISION</a><br>
<P> <P>
Last updated: 31 March 2017 Last updated: 08 April 2017
<br> <br>
Copyright &copy; 1997-2017 University of Cambridge. Copyright &copy; 1997-2017 University of Cambridge.
<br> <br>

View File

@ -235,6 +235,12 @@ Behave as if each pattern line has the <b>jit</b> modifier; after successful
compilation, each pattern is passed to the just-in-time compiler, if available. compilation, each pattern is passed to the just-in-time compiler, if available.
</P> </P>
<P> <P>
<b>-jitverify</b>
Behave as if each pattern line has the <b>jitverify</b> modifier; after
successful compilation, each pattern is passed to the just-in-time compiler, if
available, and the use of JIT is verified.
</P>
<P>
\fB-pattern\fB <i>modifier-list</i> \fB-pattern\fB <i>modifier-list</i>
Behave as if each pattern line contains the given modifiers. Behave as if each pattern line contains the given modifiers.
</P> </P>
@ -1088,6 +1094,7 @@ pattern.
get=&#60;number or name&#62; extract captured substring get=&#60;number or name&#62; extract captured substring
getall extract all captured substrings getall extract all captured substrings
/g global global matching /g global global matching
heap_limit=&#60;n&#62; set a limit on heap memory
jitstack=&#60;n&#62; set size of JIT stack jitstack=&#60;n&#62; set size of JIT stack
mark show mark values mark show mark values
match_limit=&#60;n&#62; set a match limit match_limit=&#60;n&#62; set a match limit
@ -1330,11 +1337,11 @@ stack that is larger than the default 32K is necessary only for very
complicated patterns. complicated patterns.
</P> </P>
<br><b> <br><b>
Setting match and depth limits Setting heap, match, and depth limits
</b><br> </b><br>
<P> <P>
The <b>match_limit</b> and <b>depth_limit</b> modifiers set the appropriate The <b>heap_limit</b>, <b>match_limit</b>, and <b>depth_limit</b> modifiers set
limits in the match context. These values are ignored when the the appropriate limits in the match context. These values are ignored when the
<b>find_limits</b> modifier is specified. <b>find_limits</b> modifier is specified.
</P> </P>
<br><b> <br><b>
@ -1343,8 +1350,8 @@ Finding minimum limits
<P> <P>
If the <b>find_limits</b> modifier is present on a subject line, <b>pcre2test</b> If the <b>find_limits</b> modifier is present on a subject line, <b>pcre2test</b>
calls the relevant matching function several times, setting different values in calls the relevant matching function several times, setting different values in
the match context via <b>pcre2_set_match_limit()</b> or the match context via <b>pcre2_set_heap_limit(), \fBpcre2_set_match_limit()</b>,
<b>pcre2_set_depth_limit()</b> until it finds the minimum values for each or <b>pcre2_set_depth_limit()</b> until it finds the minimum values for each
parameter that allows the match to complete without error. parameter that allows the match to complete without error.
</P> </P>
<P> <P>
@ -1360,8 +1367,8 @@ increasing length of subject string.
</P> </P>
<P> <P>
For non-DFA matching, the minimum <i>depth_limit</i> number is a measure of how For non-DFA matching, the minimum <i>depth_limit</i> number is a measure of how
much memory for recording backtracking points is needed to complete the match much nested backtracking happens (that is, how deeply the pattern's tree is
attempt. In the case of DFA matching, <i>depth_limit</i> controls the depth of searched). In the case of DFA matching, <i>depth_limit</i> controls the depth of
recursive calls of the internal function that is used for handling pattern recursive calls of the internal function that is used for handling pattern
recursion, lookaround assertions, and atomic groups. recursion, lookaround assertions, and atomic groups.
</P> </P>
@ -1800,7 +1807,7 @@ Cambridge, England.
</P> </P>
<br><a name="SEC21" href="#TOC1">REVISION</a><br> <br><a name="SEC21" href="#TOC1">REVISION</a><br>
<P> <P>
Last updated: 04 April 2017 Last updated: 11 April 2017
<br> <br>
Copyright &copy; 1997-2017 University of Cambridge. Copyright &copy; 1997-2017 University of Cambridge.
<br> <br>

View File

@ -213,6 +213,9 @@ in the library.
<tr><td><a href="pcre2_set_depth_limit.html">pcre2_set_depth_limit</a></td> <tr><td><a href="pcre2_set_depth_limit.html">pcre2_set_depth_limit</a></td>
<td>&nbsp;&nbsp;Set the match backtracking depth limit</td></tr> <td>&nbsp;&nbsp;Set the match backtracking depth limit</td></tr>
<tr><td><a href="pcre2_set_heap_limit.html">pcre2_set_heap_limit</a></td>
<td>&nbsp;&nbsp;Set the match backtracking heap limit</td></tr>
<tr><td><a href="pcre2_set_match_limit.html">pcre2_set_match_limit</a></td> <tr><td><a href="pcre2_set_match_limit.html">pcre2_set_match_limit</a></td>
<td>&nbsp;&nbsp;Set the match limit</td></tr> <td>&nbsp;&nbsp;Set the match limit</td></tr>

View File

@ -283,6 +283,9 @@ PCRE2 NATIVE API MATCH CONTEXT FUNCTIONS
int pcre2_set_offset_limit(pcre2_match_context *mcontext, int pcre2_set_offset_limit(pcre2_match_context *mcontext,
PCRE2_SIZE value); PCRE2_SIZE value);
int pcre2_set_heap_limit(pcre2_match_context *mcontext,
uint32_t value);
int pcre2_set_match_limit(pcre2_match_context *mcontext, int pcre2_set_match_limit(pcre2_match_context *mcontext,
uint32_t value); uint32_t value);
@ -840,6 +843,7 @@ PCRE2 CONTEXTS
Set up a callout function Set up a callout function
Set an offset limit for matching an unanchored pattern Set an offset limit for matching an unanchored pattern
Change the limit on the amount of heap used when matching
Change the backtracking match limit Change the backtracking match limit
Change the backtracking depth limit Change the backtracking depth limit
Set custom memory management specifically for the match Set custom memory management specifically for the match
@ -896,14 +900,44 @@ PCRE2 CONTEXTS
also within the offset limit. In other words, whichever limit comes also within the offset limit. In other words, whichever limit comes
first is used. first is used.
int pcre2_set_heap_limit(pcre2_match_context *mcontext,
uint32_t value);
The heap_limit parameter specifies, in units of kilobytes, the maximum
amount of heap memory that pcre2_match() may use to hold backtracking
information when running an interpretive match. This limit does not
apply to matching with the JIT optimization, which has its own memory
control arrangements (see the pcre2jit documentation for more details),
nor does it apply to pcre2_dfa_match(). If the limit is reached, the
negative error code PCRE2_ERROR_HEAPLIMIT is returned. The default
limit is set when PCRE2 is built; the default default is very large and
is essentially "unlimited".
A value for the heap limit may also be supplied by an item at the start
of a pattern of the form
(*LIMIT_HEAP=ddd)
where ddd is a decimal number. However, such a setting is ignored
unless ddd is less than the limit set by the caller of pcre2_match()
or, if no such limit is set, less than the default.
The pcre2_match() function starts out using a 20K vector on the system
stack for recording backtracking points. The more nested backtracking
points there are (that is, the deeper the search tree), the more memory
is needed. Heap memory is used only if the initial vector is too
small. If the heap limit is set to a value less than 21 (in particular,
zero) no heap memory will be used. In this case, only patterns that do
not have a lot of nested backtracking can be successfully processed.
int pcre2_set_match_limit(pcre2_match_context *mcontext, int pcre2_set_match_limit(pcre2_match_context *mcontext,
uint32_t value); uint32_t value);
The match_limit parameter provides a means of preventing PCRE2 from The match_limit parameter provides a means of preventing PCRE2 from
using up too many resources when processing patterns that are not going using up too many computing resources when processing patterns that are
to match, but which have a very large number of possibilities in their not going to match, but which have a very large number of possibilities
search trees. The classic example is a pattern that uses nested unlim- in their search trees. The classic example is a pattern that uses
ited repeats. nested unlimited repeats.
There is an internal counter in pcre2_match() that is incremented each There is an internal counter in pcre2_match() that is incremented each
time round its main matching loop. If this value reaches the match time round its main matching loop. If this value reaches the match
@ -938,15 +972,19 @@ PCRE2 CONTEXTS
pcre2_match(). Each time a nested backtracking point is passed, a new pcre2_match(). Each time a nested backtracking point is passed, a new
memory "frame" is used to remember the state of matching at that point. memory "frame" is used to remember the state of matching at that point.
Thus, this parameter indirectly limits the amount of memory that is Thus, this parameter indirectly limits the amount of memory that is
used in a match. used in a match. However, because the size of each memory "frame"
depends on the number of capturing parentheses, the actual memory limit
varies from pattern to pattern. This limit was more useful in versions
before 10.30, where function recursion was used for backtracking.
This limit is not relevant, and is ignored, when matching is done using The depth limit is not relevant, and is ignored, when matching is done
JIT compiled code. However, it is supported by pcre2_dfa_match(), which using JIT compiled code. However, it is supported by pcre2_dfa_match(),
uses it to limit the depth of internal recursive function calls that which uses it to limit the depth of internal recursive function calls
implement lookaround assertions and pattern recursions. This is, there- that implement atomic groups, lookaround assertions, and pattern recur-
fore, an indirect limit on the amount of system stack that is used. A sions. This is, therefore, an indirect limit on the amount of system
recursive pattern such as /(.)(?1)/, when matched to a very long string stack that is used. A recursive pattern such as /(.)(?1)/, when matched
using pcre2_dfa_match(), can use a great deal of stack. to a very long string using pcre2_dfa_match(), can use a great deal of
stack.
The default value for the depth limit can be set when PCRE2 is built; The default value for the depth limit can be set when PCRE2 is built;
the default default is the same value as the default for the match the default default is the same value as the default for the match
@ -999,6 +1037,12 @@ CHECKING BUILD-TIME OPTIONS
recursions and lookarounds in pcre2_dfa_match(). Further details are recursions and lookarounds in pcre2_dfa_match(). Further details are
given with pcre2_set_depth_limit() above. given with pcre2_set_depth_limit() above.
PCRE2_CONFIG_HEAPLIMIT
The output is a uint32_t integer that gives, in kilobytes, the default
limit for the amount of heap memory used by pcre2_match(). Further
details are given with pcre2_set_heap_limit() above.
PCRE2_CONFIG_JIT PCRE2_CONFIG_JIT
The output is a uint32_t integer that is set to one if support for The output is a uint32_t integer that is set to one if support for
@ -1803,6 +1847,14 @@ INFORMATION ABOUT A COMPILED PATTERN
\r or \n or one of the equivalent hexadecimal or octal escape \r or \n or one of the equivalent hexadecimal or octal escape
sequences. sequences.
PCRE2_INFO_HEAPLIMIT
If the pattern set a heap memory limit by including an item of the form
(*LIMIT_HEAP=nnnn) at the start, the value is returned. The third argu-
ment should point to an unsigned 32-bit integer. If no such value has
been set, the call to pcre2_pattern_info() returns the error
PCRE2_ERROR_UNSET.
PCRE2_INFO_JCHANGED PCRE2_INFO_JCHANGED
Return 1 if the (?J) or (?-J) option setting is used in the pattern, Return 1 if the (?J) or (?-J) option setting is used in the pattern,
@ -2517,7 +2569,9 @@ ERROR RETURNS FROM pcre2_match()
If a pattern contains many nested backtracking points, heap memory is If a pattern contains many nested backtracking points, heap memory is
used to remember them. This error is given when the memory allocation used to remember them. This error is given when the memory allocation
function (default or custom) fails. function (default or custom) fails. Note that a different error,
PCRE2_ERROR_HEAPLIMIT, is given if the amount of memory needed exceeds
the heap limit.
PCRE2_ERROR_NULL PCRE2_ERROR_NULL
@ -3187,7 +3241,7 @@ AUTHOR
REVISION REVISION
Last updated: 04 April 2017 Last updated: 11 April 2017
Copyright (c) 1997-2017 University of Cambridge. Copyright (c) 1997-2017 University of Cambridge.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
@ -3427,19 +3481,40 @@ LIMITING PCRE2 RESOURCE USAGE
pcre2_dfa_match() matching function, but it does also limit JIT match- pcre2_dfa_match() matching function, but it does also limit JIT match-
ing (though the counting is done differently). ing (though the counting is done differently).
In some environments it is desirable to limit the depth of nested back- The pcre2_match() function starts out using a 20K vector on the system
tracking in order to restrict the maximum amount of heap memory that is stack to record backtracking points. The more nested backtracking
used. A second limit controls this; it defaults to the value that is points there are (that is, the deeper the search tree), the more memory
set for --with-match-limit. You can set a lower default limit by is needed. If the initial vector is not large enough, heap memory is
adding, for example, used, up to a certain limit, which is specified in kilobytes. The limit
can be changed at run time, as described in the pcre2api documentation.
The default limit (in effect unlimited) is 20 million. You can change
this by a setting such as
--with-heap-limit=500
which limits the amount of heap to 500 kilobytes. This limit applies
only to interpretive matching in pcre2_match(). It does not apply when
JIT (which has its own memory arrangements) is used, nor does it apply
to pcre2_dfa_match().
You can also explicitly limit the depth of nested backtracking in the
pcre2_match() interpreter. This limit defaults to the value that is set
for --with-match-limit. You can set a lower default limit by adding,
for example,
--with-match-limit_depth=10000 --with-match-limit_depth=10000
to the configure command. This value can also be overridden at run to the configure command. This value can be overridden at run time.
time. As well as applying to pcre2_match(), this limit also controls This depth limit indirectly limits the amount of heap memory that is
the depth of recursive function calls in pcre2_dfa_match(). These are used, but because the size of each backtracking "frame" depends on the
used for lookaround assertions, atomic groups, and recursion within number of capturing parentheses in a pattern, the amount of heap that
patterns. is used before the limit is reached varies from pattern to pattern.
This limit was more useful in versions before 10.30, where function
recursion was used for backtracking. However, as well as applying to
pcre2_match(), this limit also controls the depth of recursive function
calls in pcre2_dfa_match(). These are used for lookaround assertions,
atomic groups, and recursion within patterns. The limit does not apply
to JIT matching.
CREATING CHARACTER TABLES AT BUILD TIME CREATING CHARACTER TABLES AT BUILD TIME
@ -3701,7 +3776,7 @@ AUTHOR
REVISION REVISION
Last updated: 31 March 2017 Last updated: 10 April 2017
Copyright (c) 1997-2017 University of Cambridge. Copyright (c) 1997-2017 University of Cambridge.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
@ -5522,14 +5597,15 @@ SPECIAL START-OF-PATTERN ITEMS
attempt by the application to apply the JIT optimization by calling attempt by the application to apply the JIT optimization by calling
pcre2_jit_compile() is ignored. pcre2_jit_compile() is ignored.
Setting match and backtracking depth limits Setting match resource limits
The pcre2_match() function contains a counter that is incremented every The pcre2_match() function contains a counter that is incremented every
time it goes round its main loop. The caller of pcre2_match() can set a time it goes round its main loop. The caller of pcre2_match() can set a
limit on this counter, which therefore limits the amount of computing limit on this counter, which therefore limits the amount of computing
resource used for a match. The maximum depth of nested backtracking can resource used for a match. The maximum depth of nested backtracking can
also be limited, and this restricts the amount of heap memory that is also be limited; this indirectly restricts the amount of heap memory
used. that is used, but there is also an explicit memory limit that can be
set.
These facilities are provided to catch runaway matches that are pro- These facilities are provided to catch runaway matches that are pro-
voked by patterns with huge matching trees (a typical example is a pat- voked by patterns with huge matching trees (a typical example is a pat-
@ -5538,6 +5614,7 @@ SPECIAL START-OF-PATTERN ITEMS
error return. The limits can also be set by items at the start of the error return. The limits can also be set by items at the start of the
pattern of the form pattern of the form
(*LIMIT_HEAP=d)
(*LIMIT_MATCH=d) (*LIMIT_MATCH=d)
(*LIMIT_DEPTH=d) (*LIMIT_DEPTH=d)
@ -5551,12 +5628,13 @@ SPECIAL START-OF-PATTERN ITEMS
Prior to release 10.30, LIMIT_DEPTH was called LIMIT_RECURSION. This Prior to release 10.30, LIMIT_DEPTH was called LIMIT_RECURSION. This
name is still recognized for backwards compatibility. name is still recognized for backwards compatibility.
The match limit is used (but in a different way) when JIT is being The heap limit applies only when the pcre2_match() interpreter is used
used, but it is not relevant, and is ignored, when matching with for matching. It does not apply to JIT or DFA matching. The match limit
pcre2_dfa_match(). However, the depth limit is relevant for DFA match- is used (but in a different way) when JIT is being used, but it is not
ing, which uses function recursion for recursions within the pattern. relevant, and is ignored, when matching with pcre2_dfa_match(). The
In this case, the depth limit controls the amount of system stack that depth limit is ignored by JIT but is relevant for DFA matching, which
is used. uses function recursion for recursions within the pattern. In this
case, the depth limit controls the amount of system stack that is used.
Newline conventions Newline conventions
@ -8480,7 +8558,7 @@ AUTHOR
REVISION REVISION
Last updated: 03 April 2017 Last updated: 11 April 2017
Copyright (c) 1997-2017 University of Cambridge. Copyright (c) 1997-2017 University of Cambridge.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
@ -8557,10 +8635,12 @@ STACK AND HEAP USAGE AT RUN TIME
sive function calls could use a great deal of stack, and this could sive function calls could use a great deal of stack, and this could
cause problems, but this usage has been eliminated. Backtracking posi- cause problems, but this usage has been eliminated. Backtracking posi-
tions are now explicitly remembered in memory frames controlled by the tions are now explicitly remembered in memory frames controlled by the
code. An initial 10K vector of frames is allocated on the system stack code. An initial 20K vector of frames is allocated on the system stack
(enough for about 50 frames for small patterns), but if this is insuf- (enough for about 100 frames for small patterns), but if this is insuf-
ficient, heap memory is used. Rewriting patterns to be time-efficient, ficient, heap memory is used. The amount of heap memory can be limited;
as described below, may also reduce the memory requirements. if the limit is set to zero, only the initial stack vector is used.
Rewriting patterns to be time-efficient, as described below, may also
reduce the memory requirements.
In contrast to pcre2_match(), pcre2_dfa_match() does use recursive In contrast to pcre2_match(), pcre2_dfa_match() does use recursive
function calls, but only for processing atomic groups, lookaround function calls, but only for processing atomic groups, lookaround
@ -8706,7 +8786,7 @@ AUTHOR
REVISION REVISION
Last updated: 31 March 2017 Last updated: 08 April 2017
Copyright (c) 1997-2017 University of Cambridge. Copyright (c) 1997-2017 University of Cambridge.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------

View File

@ -1,4 +1,4 @@
.TH PCRE2_CONFIG 3 "24 March 2017" "PCRE2 10.30" .TH PCRE2_CONFIG 3 "11 April 2017" "PCRE2 10.30"
.SH NAME .SH NAME
PCRE2 - Perl-compatible regular expressions (revised API) PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS .SH SYNOPSIS
@ -31,6 +31,7 @@ point to a uint32_t integer variable. The available codes are:
PCRE2_CONFIG_BSR Indicates what \eR matches by default: PCRE2_CONFIG_BSR Indicates what \eR matches by default:
PCRE2_BSR_UNICODE PCRE2_BSR_UNICODE
PCRE2_BSR_ANYCRLF PCRE2_BSR_ANYCRLF
PCRE2_CONFIG_HEAPLIMIT Default heap memory limit
PCRE2_CONFIG_DEPTHLIMIT Default backtracking depth limit PCRE2_CONFIG_DEPTHLIMIT Default backtracking depth limit
.\" JOIN .\" JOIN
PCRE2_CONFIG_JIT Availability of just-in-time compiler PCRE2_CONFIG_JIT Availability of just-in-time compiler

View File

@ -1,4 +1,4 @@
.TH PCRE2_MATCH 3 "04 April 2017" "PCRE2 10.30" .TH PCRE2_MATCH 3 "11 April 2017" "PCRE2 10.30"
.SH NAME .SH NAME
PCRE2 - Perl-compatible regular expressions (revised API) PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS .SH SYNOPSIS
@ -32,6 +32,7 @@ A match context is needed only if you want to:
.sp .sp
Set up a callout function Set up a callout function
Set a matching offset limit Set a matching offset limit
Change the heap memory limit
Change the backtracking match limit Change the backtracking match limit
Change the backtracking depth limit Change the backtracking depth limit
Set custom memory management specifically for the match Set custom memory management specifically for the match

View File

@ -1,4 +1,4 @@
.TH PCRE2_PATTERN_INFO 3 "25 March 2017" "PCRE2 10.30" .TH PCRE2_PATTERN_INFO 3 "11 April 2017" "PCRE2 10.30"
.SH NAME .SH NAME
PCRE2 - Perl-compatible regular expressions (revised API) PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS .SH SYNOPSIS
@ -43,6 +43,9 @@ request are as follows:
.\" JOIN .\" JOIN
PCRE2_INFO_HASCRORLF Return 1 if explicit CR or LF matches PCRE2_INFO_HASCRORLF Return 1 if explicit CR or LF matches
exist in the pattern exist in the pattern
.\" JOIN
PCRE2_INFO_HEAPLIMIT Heap memory limit if set,
otherwise PCRE2_ERROR_UNSET
PCRE2_INFO_JCHANGED Return 1 if (?J) or (?-J) was used PCRE2_INFO_JCHANGED Return 1 if (?J) or (?-J) was used
PCRE2_INFO_JITSIZE Size of JIT compiled code, or 0 PCRE2_INFO_JITSIZE Size of JIT compiled code, or 0
PCRE2_INFO_LASTCODETYPE Type of must-be-present information PCRE2_INFO_LASTCODETYPE Type of must-be-present information

View File

@ -0,0 +1,28 @@
.TH PCRE2_SET_DEPTH_LIMIT 3 "11 April 2017" "PCRE2 10.30"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
.rs
.sp
.B #include <pcre2.h>
.PP
.nf
.B int pcre2_set_heap_limit(pcre2_match_context *\fImcontext\fP,
.B " uint32_t \fIvalue\fP);"
.fi
.
.SH DESCRIPTION
.rs
.sp
This function sets the backtracking heap limit field in a match context. The
result is always zero.
.P
There is a complete description of the PCRE2 native API in the
.\" HREF
\fBpcre2api\fP
.\"
page and a description of the POSIX API in the
.\" HREF
\fBpcre2posix\fP
.\"
page.

View File

@ -1,4 +1,4 @@
.TH PCRE2API 3 "04 April 2017" "PCRE2 10.30" .TH PCRE2API 3 "11 April 2017" "PCRE2 10.30"
.SH NAME .SH NAME
PCRE2 - Perl-compatible regular expressions (revised API) PCRE2 - Perl-compatible regular expressions (revised API)
.sp .sp
@ -123,6 +123,9 @@ document for an overview of all the PCRE2 documentation.
.B int pcre2_set_offset_limit(pcre2_match_context *\fImcontext\fP, .B int pcre2_set_offset_limit(pcre2_match_context *\fImcontext\fP,
.B " PCRE2_SIZE \fIvalue\fP);" .B " PCRE2_SIZE \fIvalue\fP);"
.sp .sp
.B int pcre2_set_heap_limit(pcre2_match_context *\fImcontext\fP,
.B " uint32_t \fIvalue\fP);"
.sp
.B int pcre2_set_match_limit(pcre2_match_context *\fImcontext\fP, .B int pcre2_set_match_limit(pcre2_match_context *\fImcontext\fP,
.B " uint32_t \fIvalue\fP);" .B " uint32_t \fIvalue\fP);"
.sp .sp
@ -753,6 +756,7 @@ A match context is required if you want to:
.sp .sp
Set up a callout function Set up a callout function
Set an offset limit for matching an unanchored pattern Set an offset limit for matching an unanchored pattern
Change the limit on the amount of heap used when matching
Change the backtracking match limit Change the backtracking match limit
Change the backtracking depth limit Change the backtracking depth limit
Set custom memory management specifically for the match Set custom memory management specifically for the match
@ -816,14 +820,49 @@ limit, a match must occur in the first line and also within the offset limit.
In other words, whichever limit comes first is used. In other words, whichever limit comes first is used.
.sp .sp
.nf .nf
.B int pcre2_set_heap_limit(pcre2_match_context *\fImcontext\fP,
.B " uint32_t \fIvalue\fP);"
.fi
.sp
The \fIheap_limit\fP parameter specifies, in units of kilobytes, the maximum
amount of heap memory that \fBpcre2_match()\fP may use to hold backtracking
information when running an interpretive match. This limit does not apply to
matching with the JIT optimization, which has its own memory control
arrangements (see the
.\" HREF
\fBpcre2jit\fP
.\"
documentation for more details), nor does it apply to \fBpcre2_dfa_match()\fP.
If the limit is reached, the negative error code PCRE2_ERROR_HEAPLIMIT is
returned. The default limit is set when PCRE2 is built; the default default is
very large and is essentially "unlimited".
.P
A value for the heap limit may also be supplied by an item at the start of a
pattern of the form
.sp
(*LIMIT_HEAP=ddd)
.sp
where ddd is a decimal number. However, such a setting is ignored unless ddd is
less than the limit set by the caller of \fBpcre2_match()\fP or, if no such
limit is set, less than the default.
.P
The \fBpcre2_match()\fP function starts out using a 20K vector on the system
stack for recording backtracking points. The more nested backtracking points
there are (that is, the deeper the search tree), the more memory is needed.
Heap memory is used only if the initial vector is too small. If the heap limit
is set to a value less than 21 (in particular, zero) no heap memory will be
used. In this case, only patterns that do not have a lot of nested backtracking
can be successfully processed.
.sp
.nf
.B int pcre2_set_match_limit(pcre2_match_context *\fImcontext\fP, .B int pcre2_set_match_limit(pcre2_match_context *\fImcontext\fP,
.B " uint32_t \fIvalue\fP);" .B " uint32_t \fIvalue\fP);"
.fi .fi
.sp .sp
The \fImatch_limit\fP parameter provides a means of preventing PCRE2 from using The \fImatch_limit\fP parameter provides a means of preventing PCRE2 from using
up too many resources when processing patterns that are not going to match, but up too many computing resources when processing patterns that are not going to
which have a very large number of possibilities in their search trees. The match, but which have a very large number of possibilities in their search
classic example is a pattern that uses nested unlimited repeats. trees. The classic example is a pattern that uses nested unlimited repeats.
.P .P
There is an internal counter in \fBpcre2_match()\fP that is incremented each There is an internal counter in \fBpcre2_match()\fP that is incremented each
time round its main matching loop. If this value reaches the match limit, time round its main matching loop. If this value reaches the match limit,
@ -859,15 +898,19 @@ limit is set, less than the default.
This parameter limits the depth of nested backtracking in \fBpcre2_match()\fP. This parameter limits the depth of nested backtracking in \fBpcre2_match()\fP.
Each time a nested backtracking point is passed, a new memory "frame" is used Each time a nested backtracking point is passed, a new memory "frame" is used
to remember the state of matching at that point. Thus, this parameter to remember the state of matching at that point. Thus, this parameter
indirectly limits the amount of memory that is used in a match. indirectly limits the amount of memory that is used in a match. However,
because the size of each memory "frame" depends on the number of capturing
parentheses, the actual memory limit varies from pattern to pattern. This limit
was more useful in versions before 10.30, where function recursion was used for
backtracking.
.P .P
This limit is not relevant, and is ignored, when matching is done using JIT The depth limit is not relevant, and is ignored, when matching is done using
compiled code. However, it is supported by \fBpcre2_dfa_match()\fP, which uses JIT compiled code. However, it is supported by \fBpcre2_dfa_match()\fP, which
it to limit the depth of internal recursive function calls that implement uses it to limit the depth of internal recursive function calls that implement
lookaround assertions and pattern recursions. This is, therefore, an indirect atomic groups, lookaround assertions, and pattern recursions. This is,
limit on the amount of system stack that is used. A recursive pattern such as therefore, an indirect limit on the amount of system stack that is used. A
/(.)(?1)/, when matched to a very long string using \fBpcre2_dfa_match()\fP, recursive pattern such as /(.)(?1)/, when matched to a very long string using
can use a great deal of stack. \fBpcre2_dfa_match()\fP, can use a great deal of stack.
.P .P
The default value for the depth limit can be set when PCRE2 is built; the The default value for the depth limit can be set when PCRE2 is built; the
default default is the same value as the default for the match limit. If the default default is the same value as the default for the match limit. If the
@ -921,6 +964,12 @@ The output is a uint32_t integer that gives the default limit for the depth of
nested backtracking in \fBpcre2_match()\fP or the depth of nested recursions nested backtracking in \fBpcre2_match()\fP or the depth of nested recursions
and lookarounds in \fBpcre2_dfa_match()\fP. Further details are given with and lookarounds in \fBpcre2_dfa_match()\fP. Further details are given with
\fBpcre2_set_depth_limit()\fP above. \fBpcre2_set_depth_limit()\fP above.
.sp
PCRE2_CONFIG_HEAPLIMIT
.sp
The output is a uint32_t integer that gives, in kilobytes, the default limit
for the amount of heap memory used by \fBpcre2_match()\fP. Further details are
given with \fBpcre2_set_heap_limit()\fP above.
.sp .sp
PCRE2_CONFIG_JIT PCRE2_CONFIG_JIT
.sp .sp
@ -1784,6 +1833,13 @@ Return 1 if the pattern contains any explicit matches for CR or LF characters,
otherwise 0. The third argument should point to an \fBuint32_t\fP variable. An otherwise 0. The third argument should point to an \fBuint32_t\fP variable. An
explicit match is either a literal CR or LF character, or \er or \en or one of explicit match is either a literal CR or LF character, or \er or \en or one of
the equivalent hexadecimal or octal escape sequences. the equivalent hexadecimal or octal escape sequences.
.sp
PCRE2_INFO_HEAPLIMIT
.sp
If the pattern set a heap memory limit by including an item of the form
(*LIMIT_HEAP=nnnn) at the start, the value is returned. The third argument
should point to an unsigned 32-bit integer. If no such value has been set, the
call to \fBpcre2_pattern_info()\fP returns the error PCRE2_ERROR_UNSET.
.sp .sp
PCRE2_INFO_JCHANGED PCRE2_INFO_JCHANGED
.sp .sp
@ -2603,7 +2659,8 @@ The backtracking match limit was reached.
.sp .sp
If a pattern contains many nested backtracking points, heap memory is used to If a pattern contains many nested backtracking points, heap memory is used to
remember them. This error is given when the memory allocation function (default remember them. This error is given when the memory allocation function (default
or custom) fails. or custom) fails. Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given
if the amount of memory needed exceeds the heap limit.
.sp .sp
PCRE2_ERROR_NULL PCRE2_ERROR_NULL
.sp .sp
@ -3322,6 +3379,6 @@ Cambridge, England.
.rs .rs
.sp .sp
.nf .nf
Last updated: 04 April 2017 Last updated: 11 April 2017
Copyright (c) 1997-2017 University of Cambridge. Copyright (c) 1997-2017 University of Cambridge.
.fi .fi

View File

@ -1,4 +1,4 @@
.TH PCRE2BUILD 3 "31 March 2017" "PCRE2 10.30" .TH PCRE2BUILD 3 "10 April 2017" "PCRE2 10.30"
.SH NAME .SH NAME
PCRE2 - Perl-compatible regular expressions (revised API) PCRE2 - Perl-compatible regular expressions (revised API)
. .
@ -260,17 +260,42 @@ to the \fBconfigure\fP command. This setting has no effect on the
\fBpcre2_dfa_match()\fP matching function, but it does also limit JIT matching \fBpcre2_dfa_match()\fP matching function, but it does also limit JIT matching
(though the counting is done differently). (though the counting is done differently).
.P .P
In some environments it is desirable to limit the depth of nested backtracking The \fBpcre2_match()\fP function starts out using a 20K vector on the system
in order to restrict the maximum amount of heap memory that is used. A second stack to record backtracking points. The more nested backtracking points there
limit controls this; it defaults to the value that is set for are (that is, the deeper the search tree), the more memory is needed. If the
--with-match-limit. You can set a lower default limit by adding, for example, initial vector is not large enough, heap memory is used, up to a certain limit,
which is specified in kilobytes. The limit can be changed at run time, as
described in the
.\" HREF
\fBpcre2api\fP
.\"
documentation. The default limit (in effect unlimited) is 20 million. You can
change this by a setting such as
.sp
--with-heap-limit=500
.sp
which limits the amount of heap to 500 kilobytes. This limit applies only to
interpretive matching in pcre2_match(). It does not apply when JIT (which has
its own memory arrangements) is used, nor does it apply to
\fBpcre2_dfa_match()\fP.
.P
You can also explicitly limit the depth of nested backtracking in the
\fBpcre2_match()\fP interpreter. This limit defaults to the value that is set
for --with-match-limit. You can set a lower default limit by adding, for
example,
.sp .sp
--with-match-limit_depth=10000 --with-match-limit_depth=10000
.sp .sp
to the \fBconfigure\fP command. This value can also be overridden at run time. to the \fBconfigure\fP command. This value can be overridden at run time. This
As well as applying to \fBpcre2_match()\fP, this limit also controls the depth depth limit indirectly limits the amount of heap memory that is used, but
of recursive function calls in \fBpcre2_dfa_match()\fP. These are used for because the size of each backtracking "frame" depends on the number of
lookaround assertions, atomic groups, and recursion within patterns. capturing parentheses in a pattern, the amount of heap that is used before the
limit is reached varies from pattern to pattern. This limit was more useful in
versions before 10.30, where function recursion was used for backtracking.
However, as well as applying to \fBpcre2_match()\fP, this limit also controls
the depth of recursive function calls in \fBpcre2_dfa_match()\fP. These are
used for lookaround assertions, atomic groups, and recursion within patterns.
The limit does not apply to JIT matching.
. .
. .
.SH "CREATING CHARACTER TABLES AT BUILD TIME" .SH "CREATING CHARACTER TABLES AT BUILD TIME"
@ -547,6 +572,6 @@ Cambridge, England.
.rs .rs
.sp .sp
.nf .nf
Last updated: 31 March 2017 Last updated: 10 April 2017
Copyright (c) 1997-2017 University of Cambridge. Copyright (c) 1997-2017 University of Cambridge.
.fi .fi

View File

@ -1,4 +1,4 @@
.TH PCRE2GREP 1 "06 April 2017" "PCRE2 10.30" .TH PCRE2GREP 1 "11 April 2017" "PCRE2 10.30"
.SH NAME .SH NAME
pcre2grep - a grep with Perl-compatible regular expressions. pcre2grep - a grep with Perl-compatible regular expressions.
.SH SYNOPSIS .SH SYNOPSIS
@ -347,6 +347,9 @@ file names are shown when multiple files are searched. For matching lines, the
file name is followed by a colon; for context lines, a hyphen separator is used. file name is followed by a colon; for context lines, a hyphen separator is used.
If a line number is also being output, it follows the file name. If a line number is also being output, it follows the file name.
.TP .TP
\fB--heap-limit\fP=\fInumber\fP
See \fB--match-limit\fP below.
.TP
\fB--help\fP \fB--help\fP
Output a help message, giving brief details of the command options and file Output a help message, giving brief details of the command options and file
type support, and then exit. Anything else on the command line is type support, and then exit. Anything else on the command line is
@ -436,7 +439,7 @@ used. There is no short form for this option.
\fB--match-limit\fP=\fInumber\fP \fB--match-limit\fP=\fInumber\fP
Processing some regular expression patterns may take a very long time to search Processing some regular expression patterns may take a very long time to search
for all possible matching strings. Others may require a very large amount of for all possible matching strings. Others may require a very large amount of
memory. There are two options that set resource limits for matching. memory. There are three options that set resource limits for matching.
.sp .sp
The \fB--match-limit\fP option provides a means of limiting computing resource The \fB--match-limit\fP option provides a means of limiting computing resource
usage when processing patterns that are not going to match, but which have a usage when processing patterns that are not going to match, but which have a
@ -445,12 +448,22 @@ is a pattern that uses nested unlimited repeats. Internally, PCRE2 has a
counter that is incremented each time around its main processing loop. If the counter that is incremented each time around its main processing loop. If the
value set by \fB--match-limit\fP is reached, an error occurs. value set by \fB--match-limit\fP is reached, an error occurs.
.sp .sp
The \fB--heap-limit\fP option specifies, as a number of kilobytes, the amount
of heap memory that may be used for matching. Heap memory is needed only if
matching the pattern requires a significant number of nested backtracking
points to be remembered. This parameter can be set to zero to forbid the use of
heap memory altogether.
.sp
The \fB--depth-limit\fP option limits the depth of nested backtracking points, The \fB--depth-limit\fP option limits the depth of nested backtracking points,
which in turn limits the amount of memory that is used. This limit is of use which indirectly limits the amount of memory that is used. The amount of memory
only if it is set smaller than \fB--match-limit\fP. needed for each backtracking point depends on the number of capturing
parentheses in the pattern, so the amount of memory that is used before this
limit acts varies from pattern to pattern. This limit is of use only if it is
set smaller than \fB--match-limit\fP.
.sp .sp
There are no short forms for these options. The default settings are specified There are no short forms for these options. The default settings are specified
when the PCRE2 library is compiled, with the default default being 10 million. when the PCRE2 library is compiled, with the default defaults being very large
and so effectively unlimited.
.TP .TP
\fB--max-buffer-size=\fInumber\fP \fB--max-buffer-size=\fInumber\fP
This limits the expansion of the processing buffer, whose initial size can be This limits the expansion of the processing buffer, whose initial size can be
@ -670,11 +683,12 @@ Many of the short and long forms of \fBpcre2grep\fP's options are the same
as in the GNU \fBgrep\fP program. Any long option of the form as in the GNU \fBgrep\fP program. Any long option of the form
\fB--xxx-regexp\fP (GNU terminology) is also available as \fB--xxx-regex\fP \fB--xxx-regexp\fP (GNU terminology) is also available as \fB--xxx-regex\fP
(PCRE2 terminology). However, the \fB--depth-limit\fP, \fB--file-list\fP, (PCRE2 terminology). However, the \fB--depth-limit\fP, \fB--file-list\fP,
\fB--file-offsets\fP, \fB--include-dir\fP, \fB--line-offsets\fP, \fB--file-offsets\fP, \fB--heap-limit\fP, \fB--include-dir\fP,
\fB--locale\fP, \fB--match-limit\fP, \fB-M\fP, \fB--multiline\fP, \fB-N\fP, \fB--line-offsets\fP, \fB--locale\fP, \fB--match-limit\fP, \fB-M\fP,
\fB--newline\fP, \fB--om-separator\fP, \fB--output\fP, \fB-u\fP, and \fB--multiline\fP, \fB-N\fP, \fB--newline\fP, \fB--om-separator\fP,
\fB--utf-8\fP options are specific to \fBpcre2grep\fP, as is the use of the \fB--output\fP, \fB-u\fP, and \fB--utf-8\fP options are specific to
\fB--only-matching\fP option with a capturing parentheses number. \fBpcre2grep\fP, as is the use of the \fB--only-matching\fP option with a
capturing parentheses number.
.P .P
Although most of the common options work the same way, a few are different in Although most of the common options work the same way, a few are different in
\fBpcre2grep\fP. For example, the \fB--include\fP option's argument is a glob \fBpcre2grep\fP. For example, the \fB--include\fP option's argument is a glob
@ -799,9 +813,9 @@ message and the line that caused the problem to the standard error stream. If
there are more than 20 such errors, \fBpcre2grep\fP gives up. there are more than 20 such errors, \fBpcre2grep\fP gives up.
.P .P
The \fB--match-limit\fP option of \fBpcre2grep\fP can be used to set the The \fB--match-limit\fP option of \fBpcre2grep\fP can be used to set the
overall resource limit; there is a second option called \fB--depth-limit\fP overall resource limit. There are also other limits that affect the amount of
that sets a limit on the amount of memory that is used (see the discussion of memory used during matching; see the discussion of \fB--heap-limit\fP and
these options above). \fB--depth-limit\fP above.
. .
. .
.SH DIAGNOSTICS .SH DIAGNOSTICS
@ -834,6 +848,6 @@ Cambridge, England.
.rs .rs
.sp .sp
.nf .nf
Last updated: 06 April 2017 Last updated: 11 April 2017
Copyright (c) 1997-2017 University of Cambridge. Copyright (c) 1997-2017 University of Cambridge.
.fi .fi

View File

@ -383,6 +383,9 @@ OPTIONS
colon; for context lines, a hyphen separator is used. If a colon; for context lines, a hyphen separator is used. If a
line number is also being output, it follows the file name. line number is also being output, it follows the file name.
--heap-limit=number
See --match-limit below.
--help Output a help message, giving brief details of the command --help Output a help message, giving brief details of the command
options and file type support, and then exit. Anything else options and file type support, and then exit. Anything else
on the command line is ignored. on the command line is ignored.
@ -482,7 +485,7 @@ OPTIONS
--match-limit=number --match-limit=number
Processing some regular expression patterns may take a very Processing some regular expression patterns may take a very
long time to search for all possible matching strings. Others long time to search for all possible matching strings. Others
may require a very large amount of memory. There are two may require a very large amount of memory. There are three
options that set resource limits for matching. options that set resource limits for matching.
The --match-limit option provides a means of limiting comput- The --match-limit option provides a means of limiting comput-
@ -494,14 +497,25 @@ OPTIONS
processing loop. If the value set by --match-limit is processing loop. If the value set by --match-limit is
reached, an error occurs. reached, an error occurs.
The --heap-limit option specifies, as a number of kilobytes,
the amount of heap memory that may be used for matching. Heap
memory is needed only if matching the pattern requires a sig-
nificant number of nested backtracking points to be remem-
bered. This parameter can be set to zero to forbid the use of
heap memory altogether.
The --depth-limit option limits the depth of nested back- The --depth-limit option limits the depth of nested back-
tracking points, which in turn limits the amount of memory tracking points, which indirectly limits the amount of memory
that is used. This limit is of use only if it is set smaller that is used. The amount of memory needed for each backtrack-
than --match-limit. ing point depends on the number of capturing parentheses in
the pattern, so the amount of memory that is used before this
limit acts varies from pattern to pattern. This limit is of
use only if it is set smaller than --match-limit.
There are no short forms for these options. The default set- There are no short forms for these options. The default set-
tings are specified when the PCRE2 library is compiled, with tings are specified when the PCRE2 library is compiled, with
the default default being 10 million. the default defaults being very large and so effectively
unlimited.
--max-buffer-size=number --max-buffer-size=number
This limits the expansion of the processing buffer, whose This limits the expansion of the processing buffer, whose
@ -748,11 +762,11 @@ OPTIONS COMPATIBILITY
Many of the short and long forms of pcre2grep's options are the same as Many of the short and long forms of pcre2grep's options are the same as
in the GNU grep program. Any long option of the form --xxx-regexp (GNU in the GNU grep program. Any long option of the form --xxx-regexp (GNU
terminology) is also available as --xxx-regex (PCRE2 terminology). How- terminology) is also available as --xxx-regex (PCRE2 terminology). How-
ever, the --depth-limit, --file-list, --file-offsets, --include-dir, ever, the --depth-limit, --file-list, --file-offsets, --heap-limit,
--line-offsets, --locale, --match-limit, -M, --multiline, -N, --new- --include-dir, --line-offsets, --locale, --match-limit, -M, --multi-
line, --om-separator, --output, -u, and --utf-8 options are specific to line, -N, --newline, --om-separator, --output, -u, and --utf-8 options
pcre2grep, as is the use of the --only-matching option with a capturing are specific to pcre2grep, as is the use of the --only-matching option
parentheses number. with a capturing parentheses number.
Although most of the common options work the same way, a few are dif- Although most of the common options work the same way, a few are dif-
ferent in pcre2grep. For example, the --include option's argument is a ferent in pcre2grep. For example, the --include option's argument is a
@ -873,9 +887,9 @@ MATCHING ERRORS
such errors, pcre2grep gives up. such errors, pcre2grep gives up.
The --match-limit option of pcre2grep can be used to set the overall The --match-limit option of pcre2grep can be used to set the overall
resource limit; there is a second option called --depth-limit that sets resource limit. There are also other limits that affect the amount of
a limit on the amount of memory that is used (see the discussion of memory used during matching; see the discussion of --heap-limit and
these options above). --depth-limit above.
DIAGNOSTICS DIAGNOSTICS
@ -901,5 +915,5 @@ AUTHOR
REVISION REVISION
Last updated: 06 April 2017 Last updated: 11 April 2017
Copyright (c) 1997-2017 University of Cambridge. Copyright (c) 1997-2017 University of Cambridge.

View File

@ -1,4 +1,4 @@
.TH PCRE2PATTERN 3 "03 April 2017" "PCRE2 10.30" .TH PCRE2PATTERN 3 "11 April 2017" "PCRE2 10.30"
.SH NAME .SH NAME
PCRE2 - Perl-compatible regular expressions (revised API) PCRE2 - Perl-compatible regular expressions (revised API)
.SH "PCRE2 REGULAR EXPRESSION DETAILS" .SH "PCRE2 REGULAR EXPRESSION DETAILS"
@ -138,14 +138,15 @@ the application to apply the JIT optimization by calling
\fBpcre2_jit_compile()\fP is ignored. \fBpcre2_jit_compile()\fP is ignored.
. .
. .
.SS "Setting match and backtracking depth limits" .SS "Setting match resource limits"
.rs .rs
.sp .sp
The pcre2_match() function contains a counter that is incremented every time it The pcre2_match() function contains a counter that is incremented every time it
goes round its main loop. The caller of \fBpcre2_match()\fP can set a limit on goes round its main loop. The caller of \fBpcre2_match()\fP can set a limit on
this counter, which therefore limits the amount of computing resource used for this counter, which therefore limits the amount of computing resource used for
a match. The maximum depth of nested backtracking can also be limited, and this a match. The maximum depth of nested backtracking can also be limited; this
restricts the amount of heap memory that is used. indirectly restricts the amount of heap memory that is used, but there is also
an explicit memory limit that can be set.
.P .P
These facilities are provided to catch runaway matches that are provoked by These facilities are provided to catch runaway matches that are provoked by
patterns with huge matching trees (a typical example is a pattern with nested patterns with huge matching trees (a typical example is a pattern with nested
@ -153,6 +154,7 @@ unlimited repeats applied to a long string that does not match). When one of
these limits is reached, \fBpcre2_match()\fP gives an error return. The limits these limits is reached, \fBpcre2_match()\fP gives an error return. The limits
can also be set by items at the start of the pattern of the form can also be set by items at the start of the pattern of the form
.sp .sp
(*LIMIT_HEAP=d)
(*LIMIT_MATCH=d) (*LIMIT_MATCH=d)
(*LIMIT_DEPTH=d) (*LIMIT_DEPTH=d)
.sp .sp
@ -165,11 +167,13 @@ setting of one of these limits, the lower value is used.
Prior to release 10.30, LIMIT_DEPTH was called LIMIT_RECURSION. This name is Prior to release 10.30, LIMIT_DEPTH was called LIMIT_RECURSION. This name is
still recognized for backwards compatibility. still recognized for backwards compatibility.
.P .P
The match limit is used (but in a different way) when JIT is being used, but it The heap limit applies only when the \fBpcre2_match()\fP interpreter is used
is not relevant, and is ignored, when matching with \fBpcre2_dfa_match()\fP. for matching. It does not apply to JIT or DFA matching. The match limit is used
However, the depth limit is relevant for DFA matching, which uses function (but in a different way) when JIT is being used, but it is not relevant, and is
recursion for recursions within the pattern. In this case, the depth limit ignored, when matching with \fBpcre2_dfa_match()\fP. The depth limit is ignored
controls the amount of system stack that is used. by JIT but is relevant for DFA matching, which uses function recursion for
recursions within the pattern. In this case, the depth limit controls the
amount of system stack that is used.
. .
. .
.\" HTML <a name="newlines"></a> .\" HTML <a name="newlines"></a>
@ -3465,6 +3469,6 @@ Cambridge, England.
.rs .rs
.sp .sp
.nf .nf
Last updated: 03 April 2017 Last updated: 11 April 2017
Copyright (c) 1997-2017 University of Cambridge. Copyright (c) 1997-2017 University of Cambridge.
.fi .fi

View File

@ -1,4 +1,4 @@
.TH PCRE2PERFORM 3 "31 March 2017" "PCRE2 10.30" .TH PCRE2PERFORM 3 "08 April 2017" "PCRE2 10.30"
.SH NAME .SH NAME
PCRE2 - Perl-compatible regular expressions (revised API) PCRE2 - Perl-compatible regular expressions (revised API)
.SH "PCRE2 PERFORMANCE" .SH "PCRE2 PERFORMANCE"
@ -69,11 +69,12 @@ From release 10.30, the interpretive (non-JIT) version of \fBpcre2_match()\fP
uses very little system stack at run time. In earlier releases recursive uses very little system stack at run time. In earlier releases recursive
function calls could use a great deal of stack, and this could cause problems, function calls could use a great deal of stack, and this could cause problems,
but this usage has been eliminated. Backtracking positions are now explicitly but this usage has been eliminated. Backtracking positions are now explicitly
remembered in memory frames controlled by the code. An initial 10K vector of remembered in memory frames controlled by the code. An initial 20K vector of
frames is allocated on the system stack (enough for about 50 frames for small frames is allocated on the system stack (enough for about 100 frames for small
patterns), but if this is insufficient, heap memory is used. Rewriting patterns patterns), but if this is insufficient, heap memory is used. The amount of heap
to be time-efficient, as described below, may also reduce the memory memory can be limited; if the limit is set to zero, only the initial stack
requirements. vector is used. Rewriting patterns to be time-efficient, as described below,
may also reduce the memory requirements.
.P .P
In contrast to \fBpcre2_match()\fP, \fBpcre2_dfa_match()\fP does use recursive In contrast to \fBpcre2_match()\fP, \fBpcre2_dfa_match()\fP does use recursive
function calls, but only for processing atomic groups, lookaround assertions, function calls, but only for processing atomic groups, lookaround assertions,
@ -231,6 +232,6 @@ Cambridge, England.
.rs .rs
.sp .sp
.nf .nf
Last updated: 31 March 2017 Last updated: 08 April 2017
Copyright (c) 1997-2017 University of Cambridge. Copyright (c) 1997-2017 University of Cambridge.
.fi .fi

View File

@ -1,4 +1,4 @@
.TH PCRE2TEST 1 "08 April 2017" "PCRE 10.30" .TH PCRE2TEST 1 "11 April 2017" "PCRE 10.30"
.SH NAME .SH NAME
pcre2test - a program for testing Perl-compatible regular expressions. pcre2test - a program for testing Perl-compatible regular expressions.
.SH SYNOPSIS .SH SYNOPSIS
@ -1063,6 +1063,7 @@ pattern.
get=<number or name> extract captured substring get=<number or name> extract captured substring
getall extract all captured substrings getall extract all captured substrings
/g global global matching /g global global matching
heap_limit=<n> set a limit on heap memory
jitstack=<n> set size of JIT stack jitstack=<n> set size of JIT stack
mark show mark values mark show mark values
match_limit=<n> set a match limit match_limit=<n> set a match limit
@ -1293,11 +1294,11 @@ stack that is larger than the default 32K is necessary only for very
complicated patterns. complicated patterns.
. .
. .
.SS "Setting match and depth limits" .SS "Setting heap, match, and depth limits"
.rs .rs
.sp .sp
The \fBmatch_limit\fP and \fBdepth_limit\fP modifiers set the appropriate The \fBheap_limit\fP, \fBmatch_limit\fP, and \fBdepth_limit\fP modifiers set
limits in the match context. These values are ignored when the the appropriate limits in the match context. These values are ignored when the
\fBfind_limits\fP modifier is specified. \fBfind_limits\fP modifier is specified.
. .
. .
@ -1306,8 +1307,8 @@ limits in the match context. These values are ignored when the
.sp .sp
If the \fBfind_limits\fP modifier is present on a subject line, \fBpcre2test\fP If the \fBfind_limits\fP modifier is present on a subject line, \fBpcre2test\fP
calls the relevant matching function several times, setting different values in calls the relevant matching function several times, setting different values in
the match context via \fBpcre2_set_match_limit()\fP or the match context via \fBpcre2_set_heap_limit(), \fBpcre2_set_match_limit()\fP,
\fBpcre2_set_depth_limit()\fP until it finds the minimum values for each or \fBpcre2_set_depth_limit()\fP until it finds the minimum values for each
parameter that allows the match to complete without error. parameter that allows the match to complete without error.
.P .P
If JIT is being used, only the match limit is relevant. If DFA matching is If JIT is being used, only the match limit is relevant. If DFA matching is
@ -1320,8 +1321,8 @@ numbers of matching possibilities, it can become large very quickly with
increasing length of subject string. increasing length of subject string.
.P .P
For non-DFA matching, the minimum \fIdepth_limit\fP number is a measure of how For non-DFA matching, the minimum \fIdepth_limit\fP number is a measure of how
much memory for recording backtracking points is needed to complete the match much nested backtracking happens (that is, how deeply the pattern's tree is
attempt. In the case of DFA matching, \fIdepth_limit\fP controls the depth of searched). In the case of DFA matching, \fIdepth_limit\fP controls the depth of
recursive calls of the internal function that is used for handling pattern recursive calls of the internal function that is used for handling pattern
recursion, lookaround assertions, and atomic groups. recursion, lookaround assertions, and atomic groups.
. .
@ -1782,6 +1783,6 @@ Cambridge, England.
.rs .rs
.sp .sp
.nf .nf
Last updated: 08 April 2017 Last updated: 11 April 2017
Copyright (c) 1997-2017 University of Cambridge. Copyright (c) 1997-2017 University of Cambridge.
.fi .fi

View File

@ -185,6 +185,12 @@ COMMAND LINE OPTIONS
successful compilation, each pattern is passed to the just- successful compilation, each pattern is passed to the just-
in-time compiler, if available. in-time compiler, if available.
-jitverify
Behave as if each pattern line has the jitverify modifier;
after successful compilation, each pattern is passed to the
just-in-time compiler, if available, and the use of JIT is
verified.
-pattern modifier-list -pattern modifier-list
Behave as if each pattern line contains the given modifiers. Behave as if each pattern line contains the given modifiers.
@ -972,6 +978,7 @@ SUBJECT MODIFIERS
get=<number or name> extract captured substring get=<number or name> extract captured substring
getall extract all captured substrings getall extract all captured substrings
/g global global matching /g global global matching
heap_limit=<n> set a limit on heap memory
jitstack=<n> set size of JIT stack jitstack=<n> set size of JIT stack
mark show mark values mark show mark values
match_limit=<n> set a match limit match_limit=<n> set a match limit
@ -1196,19 +1203,20 @@ SUBJECT MODIFIERS
Providing a stack that is larger than the default 32K is necessary only Providing a stack that is larger than the default 32K is necessary only
for very complicated patterns. for very complicated patterns.
Setting match and depth limits Setting heap, match, and depth limits
The match_limit and depth_limit modifiers set the appropriate limits in The heap_limit, match_limit, and depth_limit modifiers set the appro-
the match context. These values are ignored when the find_limits modi- priate limits in the match context. These values are ignored when the
fier is specified. find_limits modifier is specified.
Finding minimum limits Finding minimum limits
If the find_limits modifier is present on a subject line, pcre2test If the find_limits modifier is present on a subject line, pcre2test
calls the relevant matching function several times, setting different calls the relevant matching function several times, setting different
values in the match context via pcre2_set_match_limit() or values in the match context via pcre2_set_heap_limit(),
pcre2_set_depth_limit() until it finds the minimum values for each pcre2_set_match_limit(), or pcre2_set_depth_limit() until it finds the
parameter that allows the match to complete without error. minimum values for each parameter that allows the match to complete
without error.
If JIT is being used, only the match limit is relevant. If DFA matching If JIT is being used, only the match limit is relevant. If DFA matching
is being used, only the depth limit is relevant. is being used, only the depth limit is relevant.
@ -1220,8 +1228,8 @@ SUBJECT MODIFIERS
quickly with increasing length of subject string. quickly with increasing length of subject string.
For non-DFA matching, the minimum depth_limit number is a measure of For non-DFA matching, the minimum depth_limit number is a measure of
how much memory for recording backtracking points is needed to complete how much nested backtracking happens (that is, how deeply the pattern's
the match attempt. In the case of DFA matching, depth_limit controls tree is searched). In the case of DFA matching, depth_limit controls
the depth of recursive calls of the internal function that is used for the depth of recursive calls of the internal function that is used for
handling pattern recursion, lookaround assertions, and atomic groups. handling pattern recursion, lookaround assertions, and atomic groups.
@ -1632,5 +1640,5 @@ AUTHOR
REVISION REVISION
Last updated: 04 April 2017 Last updated: 11 April 2017
Copyright (c) 1997-2017 University of Cambridge. Copyright (c) 1997-2017 University of Cambridge.

View File

@ -132,6 +132,10 @@ sure both macros are undefined; an emulation function will then be used. */
/* Define to 1 if you have the <zlib.h> header file. */ /* Define to 1 if you have the <zlib.h> header file. */
#undef HAVE_ZLIB_H #undef HAVE_ZLIB_H
/* This limits the amount of memory that pcre2_match() may use while matching
a pattern. The value is in kilobytes. */
#undef HEAP_LIMIT
/* The value of LINK_SIZE determines the number of bytes used to store links /* The value of LINK_SIZE determines the number of bytes used to store links
as offsets within the compiled regex. The default is 2, which allows for as offsets within the compiled regex. The default is 2, which allows for
compiled patterns up to 64K long. This covers the vast majority of cases. compiled patterns up to 64K long. This covers the vast majority of cases.
@ -143,7 +147,7 @@ sure both macros are undefined; an emulation function will then be used. */
#undef LT_OBJDIR #undef LT_OBJDIR
/* The value of MATCH_LIMIT determines the default number of times the /* The value of MATCH_LIMIT determines the default number of times the
internal match() function can record a backtrack position during a single pcre2_match() function can record a backtrack position during a single
matching attempt. There is a runtime interface for setting a different matching attempt. There is a runtime interface for setting a different
limit. The limit exists in order to catch runaway regular expressions that limit. The limit exists in order to catch runaway regular expressions that
take for ever to determine that they do not match. The default is set very take for ever to determine that they do not match. The default is set very

View File

@ -268,6 +268,7 @@ numbers must not be changed. */
#define PCRE2_ERROR_BADSUBSPATTERN (-60) #define PCRE2_ERROR_BADSUBSPATTERN (-60)
#define PCRE2_ERROR_TOOMANYREPLACE (-61) #define PCRE2_ERROR_TOOMANYREPLACE (-61)
#define PCRE2_ERROR_BADSERIALIZEDDATA (-62) #define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
#define PCRE2_ERROR_HEAPLIMIT (-63)
/* Request types for pcre2_pattern_info() */ /* Request types for pcre2_pattern_info() */
@ -297,6 +298,7 @@ numbers must not be changed. */
#define PCRE2_INFO_SIZE 22 #define PCRE2_INFO_SIZE 22
#define PCRE2_INFO_HASBACKSLASHC 23 #define PCRE2_INFO_HASBACKSLASHC 23
#define PCRE2_INFO_FRAMESIZE 24 #define PCRE2_INFO_FRAMESIZE 24
#define PCRE2_INFO_HEAPLIMIT 25
/* Request types for pcre2_config(). */ /* Request types for pcre2_config(). */
@ -313,6 +315,7 @@ numbers must not be changed. */
#define PCRE2_CONFIG_UNICODE 9 #define PCRE2_CONFIG_UNICODE 9
#define PCRE2_CONFIG_UNICODE_VERSION 10 #define PCRE2_CONFIG_UNICODE_VERSION 10
#define PCRE2_CONFIG_VERSION 11 #define PCRE2_CONFIG_VERSION 11
#define PCRE2_CONFIG_HEAPLIMIT 12
/* Types for code units in patterns and subject strings. */ /* Types for code units in patterns and subject strings. */
@ -452,6 +455,8 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
int (*)(pcre2_callout_block *, void *), void *); \ int (*)(pcre2_callout_block *, void *), void *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \ pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_heap_limit(pcre2_match_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_match_limit(pcre2_match_context *, uint32_t); \ pcre2_set_match_limit(pcre2_match_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
@ -676,6 +681,7 @@ pcre2_compile are called by application code. */
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_) #define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_) #define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_) #define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
#define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_)
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_) #define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_) #define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_) #define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)

View File

@ -268,6 +268,7 @@ numbers must not be changed. */
#define PCRE2_ERROR_BADSUBSPATTERN (-60) #define PCRE2_ERROR_BADSUBSPATTERN (-60)
#define PCRE2_ERROR_TOOMANYREPLACE (-61) #define PCRE2_ERROR_TOOMANYREPLACE (-61)
#define PCRE2_ERROR_BADSERIALIZEDDATA (-62) #define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
#define PCRE2_ERROR_HEAPLIMIT (-63)
/* Request types for pcre2_pattern_info() */ /* Request types for pcre2_pattern_info() */
@ -297,6 +298,7 @@ numbers must not be changed. */
#define PCRE2_INFO_SIZE 22 #define PCRE2_INFO_SIZE 22
#define PCRE2_INFO_HASBACKSLASHC 23 #define PCRE2_INFO_HASBACKSLASHC 23
#define PCRE2_INFO_FRAMESIZE 24 #define PCRE2_INFO_FRAMESIZE 24
#define PCRE2_INFO_HEAPLIMIT 25
/* Request types for pcre2_config(). */ /* Request types for pcre2_config(). */
@ -313,6 +315,7 @@ numbers must not be changed. */
#define PCRE2_CONFIG_UNICODE 9 #define PCRE2_CONFIG_UNICODE 9
#define PCRE2_CONFIG_UNICODE_VERSION 10 #define PCRE2_CONFIG_UNICODE_VERSION 10
#define PCRE2_CONFIG_VERSION 11 #define PCRE2_CONFIG_VERSION 11
#define PCRE2_CONFIG_HEAPLIMIT 12
/* Types for code units in patterns and subject strings. */ /* Types for code units in patterns and subject strings. */
@ -452,6 +455,8 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
int (*)(pcre2_callout_block *, void *), void *); \ int (*)(pcre2_callout_block *, void *), void *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \ pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_heap_limit(pcre2_match_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_match_limit(pcre2_match_context *, uint32_t); \ pcre2_set_match_limit(pcre2_match_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
@ -676,6 +681,7 @@ pcre2_compile are called by application code. */
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_) #define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_) #define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_) #define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
#define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_)
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_) #define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_) #define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_) #define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)

View File

@ -727,6 +727,7 @@ enum { PSO_OPT, /* Value is an option bit */
PSO_FLG, /* Value is a flag bit */ PSO_FLG, /* Value is a flag bit */
PSO_NL, /* Value is a newline type */ PSO_NL, /* Value is a newline type */
PSO_BSR, /* Value is a \R type */ PSO_BSR, /* Value is a \R type */
PSO_LIMH, /* Read integer value for heap limit */
PSO_LIMM, /* Read integer value for match limit */ PSO_LIMM, /* Read integer value for match limit */
PSO_LIMD }; /* Read integer value for depth limit */ PSO_LIMD }; /* Read integer value for depth limit */
@ -749,6 +750,7 @@ static pso pso_list[] = {
{ (uint8_t *)STRING_NO_DOTSTAR_ANCHOR_RIGHTPAR, 18, PSO_OPT, PCRE2_NO_DOTSTAR_ANCHOR }, { (uint8_t *)STRING_NO_DOTSTAR_ANCHOR_RIGHTPAR, 18, PSO_OPT, PCRE2_NO_DOTSTAR_ANCHOR },
{ (uint8_t *)STRING_NO_JIT_RIGHTPAR, 7, PSO_FLG, PCRE2_NOJIT }, { (uint8_t *)STRING_NO_JIT_RIGHTPAR, 7, PSO_FLG, PCRE2_NOJIT },
{ (uint8_t *)STRING_NO_START_OPT_RIGHTPAR, 13, PSO_OPT, PCRE2_NO_START_OPTIMIZE }, { (uint8_t *)STRING_NO_START_OPT_RIGHTPAR, 13, PSO_OPT, PCRE2_NO_START_OPTIMIZE },
{ (uint8_t *)STRING_LIMIT_HEAP_EQ, 11, PSO_LIMH, 0 },
{ (uint8_t *)STRING_LIMIT_MATCH_EQ, 12, PSO_LIMM, 0 }, { (uint8_t *)STRING_LIMIT_MATCH_EQ, 12, PSO_LIMM, 0 },
{ (uint8_t *)STRING_LIMIT_DEPTH_EQ, 12, PSO_LIMD, 0 }, { (uint8_t *)STRING_LIMIT_DEPTH_EQ, 12, PSO_LIMD, 0 },
{ (uint8_t *)STRING_LIMIT_RECURSION_EQ, 16, PSO_LIMD, 0 }, { (uint8_t *)STRING_LIMIT_RECURSION_EQ, 16, PSO_LIMD, 0 },
@ -8853,6 +8855,7 @@ uint32_t firstcu, reqcu; /* Value of first/req code unit */
uint32_t setflags = 0; /* NL and BSR set flags */ uint32_t setflags = 0; /* NL and BSR set flags */
uint32_t skipatstart; /* When checking (*UTF) etc */ uint32_t skipatstart; /* When checking (*UTF) etc */
uint32_t limit_heap = UINT32_MAX;
uint32_t limit_match = UINT32_MAX; /* Unset match limits */ uint32_t limit_match = UINT32_MAX; /* Unset match limits */
uint32_t limit_depth = UINT32_MAX; uint32_t limit_depth = UINT32_MAX;
@ -9026,6 +9029,7 @@ while (patlen - skipatstart >= 2 &&
case PSO_LIMM: case PSO_LIMM:
case PSO_LIMD: case PSO_LIMD:
case PSO_LIMH:
c = 0; c = 0;
pp = skipatstart; pp = skipatstart;
if (!IS_DIGIT(ptr[pp])) if (!IS_DIGIT(ptr[pp]))
@ -9045,7 +9049,8 @@ while (patlen - skipatstart >= 2 &&
ptr += pp; ptr += pp;
goto HAD_EARLY_ERROR; goto HAD_EARLY_ERROR;
} }
if (p->type == PSO_LIMM) limit_match = c; if (p->type == PSO_LIMH) limit_heap = c;
else if (p->type == PSO_LIMM) limit_match = c;
else limit_depth = c; else limit_depth = c;
skipatstart += pp - skipatstart; skipatstart += pp - skipatstart;
break; break;
@ -9288,6 +9293,7 @@ re->magic_number = MAGIC_NUMBER;
re->compile_options = options; re->compile_options = options;
re->overall_options = cb.external_options; re->overall_options = cb.external_options;
re->flags = PCRE2_CODE_UNIT_WIDTH/8 | cb.external_flags | setflags; re->flags = PCRE2_CODE_UNIT_WIDTH/8 | cb.external_flags | setflags;
re->limit_heap = limit_heap;
re->limit_match = limit_match; re->limit_match = limit_match;
re->limit_depth = limit_depth; re->limit_depth = limit_depth;
re->first_codeunit = 0; re->first_codeunit = 0;

View File

@ -84,6 +84,7 @@ if (where == NULL) /* Requests a length */
return PCRE2_ERROR_BADOPTION; return PCRE2_ERROR_BADOPTION;
case PCRE2_CONFIG_BSR: case PCRE2_CONFIG_BSR:
case PCRE2_CONFIG_HEAPLIMIT:
case PCRE2_CONFIG_JIT: case PCRE2_CONFIG_JIT:
case PCRE2_CONFIG_LINKSIZE: case PCRE2_CONFIG_LINKSIZE:
case PCRE2_CONFIG_MATCHLIMIT: case PCRE2_CONFIG_MATCHLIMIT:
@ -116,6 +117,10 @@ switch (what)
#endif #endif
break; break;
case PCRE2_CONFIG_HEAPLIMIT:
*((uint32_t *)where) = HEAP_LIMIT;
break;
case PCRE2_CONFIG_JIT: case PCRE2_CONFIG_JIT:
#ifdef SUPPORT_JIT #ifdef SUPPORT_JIT
*((uint32_t *)where) = 1; *((uint32_t *)where) = 1;

View File

@ -168,6 +168,7 @@ const pcre2_match_context PRIV(default_match_context) = {
NULL, NULL,
NULL, NULL,
PCRE2_UNSET, /* Offset limit */ PCRE2_UNSET, /* Offset limit */
HEAP_LIMIT,
MATCH_LIMIT, MATCH_LIMIT,
MATCH_LIMIT_DEPTH }; MATCH_LIMIT_DEPTH };
@ -346,6 +347,13 @@ mcontext->callout_data = callout_data;
return 0; return 0;
} }
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_heap_limit(pcre2_match_context *mcontext, uint32_t limit)
{
mcontext->heap_limit = limit;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_match_limit(pcre2_match_context *mcontext, uint32_t limit) pcre2_set_match_limit(pcre2_match_context *mcontext, uint32_t limit)
{ {

View File

@ -256,6 +256,7 @@ static const unsigned char match_error_texts[] =
"match with end before start is not supported\0" "match with end before start is not supported\0"
"too many replacements (more than INT_MAX)\0" "too many replacements (more than INT_MAX)\0"
"bad serialized data\0" "bad serialized data\0"
"heap limit exceeded\0"
; ;

View File

@ -240,6 +240,16 @@ not rely on this. */
#define COMPILE_ERROR_BASE 100 #define COMPILE_ERROR_BASE 100
/* The initial frames vector for remembering backtracking points in
pcre2_match() is allocated on the system stack, of this size (bytes). The size
must be a multiple of sizeof(PCRE2_SPTR) in all environments, so making it a
multiple of 8 is best. Typical frame sizes are a few hundred bytes (it depends
on the number of capturing parentheses) so 20K handles quite a few frames. A
larger vector on the heap is obtained for patterns that need more frames. The
maximum size of this can be limited. */
#define START_FRAMES_SIZE 20480
/* Define the default BSR convention. */ /* Define the default BSR convention. */
#ifdef BSR_ANYCRLF #ifdef BSR_ANYCRLF
@ -922,6 +932,7 @@ a positive value. */
#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)" #define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)"
#define STRING_NOTEMPTY_RIGHTPAR "NOTEMPTY)" #define STRING_NOTEMPTY_RIGHTPAR "NOTEMPTY)"
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR "NOTEMPTY_ATSTART)" #define STRING_NOTEMPTY_ATSTART_RIGHTPAR "NOTEMPTY_ATSTART)"
#define STRING_LIMIT_HEAP_EQ "LIMIT_HEAP="
#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH=" #define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH="
#define STRING_LIMIT_DEPTH_EQ "LIMIT_DEPTH=" #define STRING_LIMIT_DEPTH_EQ "LIMIT_DEPTH="
#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION=" #define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION="
@ -1196,6 +1207,7 @@ only. */
#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS #define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
#define STRING_NOTEMPTY_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_RIGHT_PARENTHESIS #define STRING_NOTEMPTY_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_RIGHT_PARENTHESIS
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_UNDERSCORE STR_A STR_T STR_S STR_T STR_A STR_R STR_T STR_RIGHT_PARENTHESIS #define STRING_NOTEMPTY_ATSTART_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_UNDERSCORE STR_A STR_T STR_S STR_T STR_A STR_R STR_T STR_RIGHT_PARENTHESIS
#define STRING_LIMIT_HEAP_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_H STR_E STR_A STR_P STR_EQUALS_SIGN
#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN #define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
#define STRING_LIMIT_DEPTH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_D STR_E STR_P STR_T STR_H STR_EQUALS_SIGN #define STRING_LIMIT_DEPTH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_D STR_E STR_P STR_T STR_H STR_EQUALS_SIGN
#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN #define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN

View File

@ -585,6 +585,7 @@ typedef struct pcre2_real_match_context {
int (*callout)(pcre2_callout_block *, void *); int (*callout)(pcre2_callout_block *, void *);
void *callout_data; void *callout_data;
PCRE2_SIZE offset_limit; PCRE2_SIZE offset_limit;
uint32_t heap_limit;
uint32_t match_limit; uint32_t match_limit;
uint32_t depth_limit; uint32_t depth_limit;
} pcre2_real_match_context; } pcre2_real_match_context;
@ -614,6 +615,7 @@ typedef struct pcre2_real_code {
uint32_t compile_options; /* Options passed to pcre2_compile() */ uint32_t compile_options; /* Options passed to pcre2_compile() */
uint32_t overall_options; /* Options after processing the pattern */ uint32_t overall_options; /* Options after processing the pattern */
uint32_t flags; /* Various state flags */ uint32_t flags; /* Various state flags */
uint32_t limit_heap; /* Limit set in the pattern */
uint32_t limit_match; /* Limit set in the pattern */ uint32_t limit_match; /* Limit set in the pattern */
uint32_t limit_depth; /* Limit set in the pattern */ uint32_t limit_depth; /* Limit set in the pattern */
uint32_t first_codeunit; /* Starting code unit */ uint32_t first_codeunit; /* Starting code unit */
@ -808,9 +810,10 @@ typedef struct match_block {
heapframe *match_frames; /* Points to vector of frames */ heapframe *match_frames; /* Points to vector of frames */
heapframe *match_frames_top; /* Points after the end of the vector */ heapframe *match_frames_top; /* Points after the end of the vector */
heapframe *stack_frames; /* The original vector on the stack */ heapframe *stack_frames; /* The original vector on the stack */
uint32_t match_call_count; /* Number of times a new frame is created */ PCRE2_SIZE heap_limit; /* As it says */
uint32_t match_limit; /* As it says */ uint32_t match_limit; /* As it says */
uint32_t match_limit_depth; /* As it says */ uint32_t match_limit_depth; /* As it says */
uint32_t match_call_count; /* Number of times a new frame is created */
BOOL hitend; /* Hit the end of the subject at some point */ BOOL hitend; /* Hit the end of the subject at some point */
BOOL hasthen; /* Pattern contains (*THEN) */ BOOL hasthen; /* Pattern contains (*THEN) */
const uint8_t *lcc; /* Points to lower casing table */ const uint8_t *lcc; /* Points to lower casing table */

View File

@ -64,15 +64,6 @@ information, and fields within it. */
#define RECURSE_UNSET 0xffffffffu /* Bigger than max group number */ #define RECURSE_UNSET 0xffffffffu /* Bigger than max group number */
/* The initial frames vector for remembering backtracking points is allocated
on the system stack, of this size (bytes). The size must be a multiple of
sizeof(PCRE2_SPTR) in all environments, so making it a multiple of 8 is best.
Typical frame sizes are a few hundred bytes (it depends on the number of
capturing parentheses) so 10K handles quite a few frames. A larger vector on
the heap is obtained for patterns that need more frames. */
#define START_FRAMES_SIZE 10240
/* Masks for identifying the public options that are permitted at match time. */ /* Masks for identifying the public options that are permitted at match time. */
#define PUBLIC_MATCH_OPTIONS \ #define PUBLIC_MATCH_OPTIONS \
@ -618,14 +609,22 @@ backtracking point. */
MATCH_RECURSE: MATCH_RECURSE:
/* Set up a new backtracking frame. If the vector is full, get a new one /* Set up a new backtracking frame. If the vector is full, get a new one
on the heap, doubling the size. */ on the heap, doubling the size, but constrained by the heap limit. */
N = (heapframe *)((char *)F + frame_size); N = (heapframe *)((char *)F + frame_size);
if (N >= mb->match_frames_top) if (N >= mb->match_frames_top)
{ {
PCRE2_SIZE newsize = mb->frame_vector_size * 2; PCRE2_SIZE newsize = mb->frame_vector_size * 2;
heapframe *new = mb->memctl.malloc(newsize, mb->memctl.memory_data); heapframe *new;
if ((newsize / 1024) > mb->heap_limit)
{
PCRE2_SIZE maxsize = ((mb->heap_limit * 1024)/frame_size) * frame_size;
if (mb->frame_vector_size == maxsize) return PCRE2_ERROR_HEAPLIMIT;
newsize = maxsize;
}
new = mb->memctl.malloc(newsize, mb->memctl.memory_data);
if (new == NULL) return PCRE2_ERROR_NOMEMORY; if (new == NULL) return PCRE2_ERROR_NOMEMORY;
memcpy(new, mb->match_frames, mb->frame_vector_size); memcpy(new, mb->match_frames, mb->frame_vector_size);
@ -6266,9 +6265,22 @@ correct when calling match() more than once for non-anchored patterns. */
frame_size = sizeof(heapframe) + ((re->top_bracket - 1) * 2 * sizeof(PCRE2_SIZE)); frame_size = sizeof(heapframe) + ((re->top_bracket - 1) * 2 * sizeof(PCRE2_SIZE));
/* Limits set in the pattern override the match context only if they are
smaller. */
mb->heap_limit = (mcontext->heap_limit < re->limit_heap)?
mcontext->heap_limit : re->limit_heap;
mb->match_limit = (mcontext->match_limit < re->limit_match)?
mcontext->match_limit : re->limit_match;
mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)?
mcontext->depth_limit : re->limit_depth;
/* If a pattern has very many capturing parentheses, the frame size may be very /* If a pattern has very many capturing parentheses, the frame size may be very
large. Ensure that there are at least 10 available frames by getting an initial large. Ensure that there are at least 10 available frames by getting an initial
vector on the heap if necessary. */ vector on the heap if necessary, except when the heap limit prevents this. Get
fewer if possible. (The heap limit is in kilobytes.) */
if (frame_size <= START_FRAMES_SIZE/10) if (frame_size <= START_FRAMES_SIZE/10)
{ {
@ -6278,6 +6290,11 @@ if (frame_size <= START_FRAMES_SIZE/10)
else else
{ {
mb->frame_vector_size = frame_size * 10; mb->frame_vector_size = frame_size * 10;
if ((mb->frame_vector_size / 1024) > mb->heap_limit)
{
if (frame_size > mb->heap_limit * 1024) return PCRE2_ERROR_HEAPLIMIT;
mb->frame_vector_size = ((mb->heap_limit * 1024)/frame_size) * frame_size;
}
mb->match_frames = mb->memctl.malloc(mb->frame_vector_size, mb->match_frames = mb->memctl.malloc(mb->frame_vector_size,
mb->memctl.memory_data); mb->memctl.memory_data);
if (mb->match_frames == NULL) return PCRE2_ERROR_NOMEMORY; if (mb->match_frames == NULL) return PCRE2_ERROR_NOMEMORY;
@ -6292,14 +6309,6 @@ to avoid uninitialized memory read errors when it is copied to a new frame. */
memset((char *)(mb->match_frames) + offsetof(heapframe, ovector), 0xff, memset((char *)(mb->match_frames) + offsetof(heapframe, ovector), 0xff,
re->top_bracket * 2 * sizeof(PCRE2_SIZE)); re->top_bracket * 2 * sizeof(PCRE2_SIZE));
/* Limits set in the pattern override the match context only if they are
smaller. */
mb->match_limit = (mcontext->match_limit < re->limit_match)?
mcontext->match_limit : re->limit_match;
mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)?
mcontext->depth_limit : re->limit_depth;
/* Pointers to the individual character tables */ /* Pointers to the individual character tables */
mb->lcc = re->tables + lcc_offset; mb->lcc = re->tables + lcc_offset;

View File

@ -80,6 +80,7 @@ if (where == NULL) /* Requests field length */
case PCRE2_INFO_FIRSTCODEUNIT: case PCRE2_INFO_FIRSTCODEUNIT:
case PCRE2_INFO_HASBACKSLASHC: case PCRE2_INFO_HASBACKSLASHC:
case PCRE2_INFO_HASCRORLF: case PCRE2_INFO_HASCRORLF:
case PCRE2_INFO_HEAPLIMIT:
case PCRE2_INFO_JCHANGED: case PCRE2_INFO_JCHANGED:
case PCRE2_INFO_LASTCODETYPE: case PCRE2_INFO_LASTCODETYPE:
case PCRE2_INFO_LASTCODEUNIT: case PCRE2_INFO_LASTCODEUNIT:
@ -171,6 +172,11 @@ switch(what)
*((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0; *((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0;
break; break;
case PCRE2_INFO_HEAPLIMIT:
*((uint32_t *)where) = re->limit_heap;
if (re->limit_heap == UINT32_MAX) return PCRE2_ERROR_UNSET;
break;
case PCRE2_INFO_JCHANGED: case PCRE2_INFO_JCHANGED:
*((uint32_t *)where) = (re->flags & PCRE2_JCHANGED) != 0; *((uint32_t *)where) = (re->flags & PCRE2_JCHANGED) != 0;
break; break;

View File

@ -212,6 +212,7 @@ static const uint8_t *character_tables = NULL;
static uint32_t pcre2_options = 0; static uint32_t pcre2_options = 0;
static uint32_t process_options = 0; static uint32_t process_options = 0;
static PCRE2_SIZE heap_limit = PCRE2_UNSET;
static uint32_t match_limit = 0; static uint32_t match_limit = 0;
static uint32_t depth_limit = 0; static uint32_t depth_limit = 0;
@ -330,7 +331,7 @@ static const char *incexname[4] = { "--include", "--exclude",
/* Structure for options and list of them */ /* Structure for options and list of them */
enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER, enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER, OP_SIZE,
OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES }; OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
typedef struct option_item { typedef struct option_item {
@ -356,16 +357,17 @@ used to identify them. */
#define N_LOFFSETS (-10) #define N_LOFFSETS (-10)
#define N_FOFFSETS (-11) #define N_FOFFSETS (-11)
#define N_LBUFFER (-12) #define N_LBUFFER (-12)
#define N_M_LIMIT (-13) #define N_H_LIMIT (-13)
#define N_M_LIMIT_DEP (-14) #define N_M_LIMIT (-14)
#define N_BUFSIZE (-15) #define N_M_LIMIT_DEP (-15)
#define N_NOJIT (-16) #define N_BUFSIZE (-16)
#define N_FILE_LIST (-17) #define N_NOJIT (-17)
#define N_BINARY_FILES (-18) #define N_FILE_LIST (-18)
#define N_EXCLUDE_FROM (-19) #define N_BINARY_FILES (-19)
#define N_INCLUDE_FROM (-20) #define N_EXCLUDE_FROM (-20)
#define N_OM_SEPARATOR (-21) #define N_INCLUDE_FROM (-21)
#define N_MAX_BUFSIZE (-22) #define N_OM_SEPARATOR (-22)
#define N_MAX_BUFSIZE (-23)
static option_item optionlist[] = { static option_item optionlist[] = {
{ OP_NODATA, N_NULL, NULL, "", "terminate options" }, { OP_NODATA, N_NULL, NULL, "", "terminate options" },
@ -397,6 +399,7 @@ static option_item optionlist[] = {
{ OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" }, { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
{ OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" }, { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
{ OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" }, { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
{ OP_SIZE, N_H_LIMIT, &heap_limit, "heap-limit=number", "set PCRE2 heap limit option (kilobytes)" },
{ OP_U32NUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE2 match limit option" }, { OP_U32NUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE2 match limit option" },
{ OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" }, { OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" },
{ OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsolete synonym for depth-limit" }, { OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsolete synonym for depth-limit" },
@ -525,9 +528,9 @@ pcre2grep_exit(int rc)
{ {
if (resource_error) if (resource_error)
{ {
fprintf(stderr, "pcre2grep: Error %d, %d or %d means that a resource limit " fprintf(stderr, "pcre2grep: Error %d, %d, %d or %d means that a resource "
"was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT, "limit was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
PCRE2_ERROR_DEPTHLIMIT); PCRE2_ERROR_DEPTHLIMIT, PCRE2_ERROR_HEAPLIMIT);
fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n"); fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n");
} }
exit(rc); exit(rc);
@ -1647,7 +1650,7 @@ for (i = 1; p != NULL; p = p->next, i++)
FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */ FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
fprintf(stderr, "\n\n"); fprintf(stderr, "\n\n");
if (*mrc == PCRE2_ERROR_MATCHLIMIT || *mrc == PCRE2_ERROR_DEPTHLIMIT || if (*mrc == PCRE2_ERROR_MATCHLIMIT || *mrc == PCRE2_ERROR_DEPTHLIMIT ||
*mrc == PCRE2_ERROR_JIT_STACKLIMIT) *mrc == PCRE2_ERROR_HEAPLIMIT || *mrc == PCRE2_ERROR_JIT_STACKLIMIT)
resource_error = TRUE; resource_error = TRUE;
if (error_count++ > 20) if (error_count++ > 20)
{ {
@ -3796,7 +3799,7 @@ for (i = 1; i < argc; i++)
/* Otherwise, deal with a single string or numeric data value. */ /* Otherwise, deal with a single string or numeric data value. */
else if (op->type != OP_NUMBER && op->type != OP_U32NUMBER && else if (op->type != OP_NUMBER && op->type != OP_U32NUMBER &&
op->type != OP_OP_NUMBER) op->type != OP_OP_NUMBER && op->type != OP_SIZE)
{ {
*((char **)op->dataptr) = option_data; *((char **)op->dataptr) = option_data;
} }
@ -3804,6 +3807,7 @@ for (i = 1; i < argc; i++)
{ {
unsigned long int n = decode_number(option_data, op, longop); unsigned long int n = decode_number(option_data, op, longop);
if (op->type == OP_U32NUMBER) *((uint32_t *)op->dataptr) = n; if (op->type == OP_U32NUMBER) *((uint32_t *)op->dataptr) = n;
else if (op->type == OP_SIZE) *((PCRE2_SIZE *)op->dataptr) = n;
else *((int *)op->dataptr) = n; else *((int *)op->dataptr) = n;
} }
} }
@ -3839,6 +3843,7 @@ if (output_text != NULL &&
/* Put limits into the match data block. */ /* Put limits into the match data block. */
if (heap_limit != PCRE2_UNSET) pcre2_set_heap_limit(match_context, heap_limit);
if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit); if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
if (depth_limit > 0) pcre2_set_depth_limit(match_context, depth_limit); if (depth_limit > 0) pcre2_set_depth_limit(match_context, depth_limit);

View File

@ -588,6 +588,7 @@ static modstruct modlist[] = {
{ "get", MOD_DAT, MOD_NN, DO(get_numbers), DO(get_names) }, { "get", MOD_DAT, MOD_NN, DO(get_numbers), DO(get_names) },
{ "getall", MOD_DAT, MOD_CTL, CTL_GETALL, DO(control) }, { "getall", MOD_DAT, MOD_CTL, CTL_GETALL, DO(control) },
{ "global", MOD_PNDP, MOD_CTL, CTL_GLOBAL, PO(control) }, { "global", MOD_PNDP, MOD_CTL, CTL_GLOBAL, PO(control) },
{ "heap_limit", MOD_CTM, MOD_INT, 0, MO(heap_limit) },
{ "hex", MOD_PAT, MOD_CTL, CTL_HEXPAT, PO(control) }, { "hex", MOD_PAT, MOD_CTL, CTL_HEXPAT, PO(control) },
{ "info", MOD_PAT, MOD_CTL, CTL_INFO, PO(control) }, { "info", MOD_PAT, MOD_CTL, CTL_INFO, PO(control) },
{ "jit", MOD_PAT, MOD_IND, 7, PO(jit) }, { "jit", MOD_PAT, MOD_IND, 7, PO(jit) },
@ -1207,6 +1208,14 @@ are supported. */
else \ else \
pcre2_set_depth_limit_32(G(a,32),b) pcre2_set_depth_limit_32(G(a,32),b)
#define PCRE2_SET_HEAP_LIMIT(a,b) \
if (test_mode == PCRE8_MODE) \
pcre2_set_heap_limit_8(G(a,8),b); \
else if (test_mode == PCRE16_MODE) \
pcre2_set_heap_limit_16(G(a,16),b); \
else \
pcre2_set_heap_limit_32(G(a,32),b)
#define PCRE2_SET_MATCH_LIMIT(a,b) \ #define PCRE2_SET_MATCH_LIMIT(a,b) \
if (test_mode == PCRE8_MODE) \ if (test_mode == PCRE8_MODE) \
pcre2_set_match_limit_8(G(a,8),b); \ pcre2_set_match_limit_8(G(a,8),b); \
@ -1643,6 +1652,12 @@ the three different cases. */
else \ else \
G(pcre2_set_depth_limit_,BITTWO)(G(a,BITTWO),b) G(pcre2_set_depth_limit_,BITTWO)(G(a,BITTWO),b)
#define PCRE2_SET_HEAP_LIMIT(a,b) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
G(pcre2_set_heap_limit_,BITONE)(G(a,BITONE),b); \
else \
G(pcre2_set_heap_limit_,BITTWO)(G(a,BITTWO),b)
#define PCRE2_SET_MATCH_LIMIT(a,b) \ #define PCRE2_SET_MATCH_LIMIT(a,b) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \
G(pcre2_set_match_limit_,BITONE)(G(a,BITONE),b); \ G(pcre2_set_match_limit_,BITONE)(G(a,BITONE),b); \
@ -1856,6 +1871,7 @@ the three different cases. */
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
pcre2_set_compile_recursion_guard_8(G(a,8),b,c) pcre2_set_compile_recursion_guard_8(G(a,8),b,c)
#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_8(G(a,8),b) #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_8(G(a,8),b)
#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_8(G(a,8),b)
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b) #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b) #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b)
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b) #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b)
@ -1952,6 +1968,7 @@ the three different cases. */
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
pcre2_set_compile_recursion_guard_16(G(a,16),b,c) pcre2_set_compile_recursion_guard_16(G(a,16),b,c)
#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_16(G(a,16),b) #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_16(G(a,16),b)
#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_16(G(a,16),b)
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b) #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_16(G(a,16),b) #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_16(G(a,16),b)
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b) #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b)
@ -2048,6 +2065,7 @@ the three different cases. */
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \ #define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
pcre2_set_compile_recursion_guard_32(G(a,32),b,c) pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_32(G(a,32),b) #define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_32(G(a,32),b)
#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_32(G(a,32),b)
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b) #define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_32(G(a,32),b) #define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_32(G(a,32),b)
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b) #define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b)
@ -4040,14 +4058,28 @@ if ((pat_patctl.control & CTL_INFO) != 0)
{ {
void *nametable; void *nametable;
uint8_t *start_bits; uint8_t *start_bits;
BOOL match_limit_set, depth_limit_set; BOOL heap_limit_set, match_limit_set, depth_limit_set;
uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit, uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit,
hasbackslashc, hascrorlf, jchanged, last_ctype, last_cunit, match_empty, hasbackslashc, hascrorlf, jchanged, last_ctype, last_cunit, match_empty,
match_limit, minlength, nameentrysize, namecount, newline_convention, depth_limit, heap_limit, match_limit, minlength, nameentrysize, namecount,
depth_limit; newline_convention;
/* These info requests may return PCRE2_ERROR_UNSET. */ /* These info requests may return PCRE2_ERROR_UNSET. */
switch(pattern_info(PCRE2_INFO_HEAPLIMIT, &heap_limit, TRUE))
{
case 0:
heap_limit_set = TRUE;
break;
case PCRE2_ERROR_UNSET:
heap_limit_set = FALSE;
break;
default:
return PR_ABEND;
}
switch(pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit, TRUE)) switch(pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit, TRUE))
{ {
case 0: case 0:
@ -4106,6 +4138,9 @@ if ((pat_patctl.control & CTL_INFO) != 0)
if (maxlookbehind > 0) if (maxlookbehind > 0)
fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind); fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
if (heap_limit_set)
fprintf(outfile, "Heap limit = %u\n", heap_limit);
if (match_limit_set) if (match_limit_set)
fprintf(outfile, "Match limit = %u\n", match_limit); fprintf(outfile, "Match limit = %u\n", match_limit);
@ -5353,10 +5388,15 @@ uint32_t max = UINT32_MAX;
PCRE2_SET_MATCH_LIMIT(dat_context, max); PCRE2_SET_MATCH_LIMIT(dat_context, max);
PCRE2_SET_DEPTH_LIMIT(dat_context, max); PCRE2_SET_DEPTH_LIMIT(dat_context, max);
PCRE2_SET_HEAP_LIMIT(dat_context, max);
for (;;) for (;;)
{ {
if (errnumber == PCRE2_ERROR_MATCHLIMIT) if (errnumber == PCRE2_ERROR_HEAPLIMIT)
{
PCRE2_SET_HEAP_LIMIT(dat_context, mid);
}
else if (errnumber == PCRE2_ERROR_MATCHLIMIT)
{ {
PCRE2_SET_MATCH_LIMIT(dat_context, mid); PCRE2_SET_MATCH_LIMIT(dat_context, mid);
} }
@ -5393,13 +5433,23 @@ for (;;)
capcount == PCRE2_ERROR_NOMATCH || capcount == PCRE2_ERROR_NOMATCH ||
capcount == PCRE2_ERROR_PARTIAL) capcount == PCRE2_ERROR_PARTIAL)
{ {
/* If we've not hit the error with a heap limit less than the size of the
initial stack frame vector, the heap is not being used, so the minimum
limit is zero; there's no need to go on. The other limits are always
greater than zero. */
if (errnumber == PCRE2_ERROR_HEAPLIMIT && mid < START_FRAMES_SIZE/1024)
{
fprintf(outfile, "Minimum %s limit = 0\n", msg);
break;
}
if (mid == min + 1) if (mid == min + 1)
{ {
fprintf(outfile, "Minimum %s limit = %d\n", msg, mid); fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
break; break;
} }
max = mid; max = mid;
mid = (min + mid)/2; mid = (min + max)/2;
} }
else break; /* Some other error */ else break; /* Some other error */
} }
@ -6662,21 +6712,33 @@ else for (gmatched = 0;; gmatched++)
(double)CLOCKS_PER_SEC); (double)CLOCKS_PER_SEC);
} }
/* Find the match and depth limits if requested. The match limit is not /* Find the heap, match and depth limits if requested. The match and heap
relevant for DFA matching and the depth limit is not relevant for JIT. */ limits are not relevant for DFA matching and the depth limit is not relevant
for JIT. */
if ((dat_datctl.control & CTL_FINDLIMITS) != 0) if ((dat_datctl.control & CTL_FINDLIMITS) != 0)
{ {
if ((dat_datctl.control & CTL_DFA) == 0) if ((dat_datctl.control & CTL_DFA) == 0)
{
if (FLD(compiled_code, executable_jit) == NULL ||
(dat_datctl.options & PCRE2_NO_JIT) != 0)
{
capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_HEAPLIMIT,
"heap");
}
capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_MATCHLIMIT, capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_MATCHLIMIT,
"match"); "match");
}
else capcount = 0; else capcount = 0;
if (FLD(compiled_code, executable_jit) == NULL || if (FLD(compiled_code, executable_jit) == NULL ||
(dat_datctl.options & PCRE2_NO_JIT) != 0 || (dat_datctl.options & PCRE2_NO_JIT) != 0 ||
(dat_datctl.control & CTL_DFA) != 0) (dat_datctl.control & CTL_DFA) != 0)
{
capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_DEPTHLIMIT, capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_DEPTHLIMIT,
"depth"); "depth");
} }
}
/* Otherwise just run a single match, setting up a callout if required (the /* Otherwise just run a single match, setting up a callout if required (the
default). There is a copy of the pattern in pbuffer8 for use by callouts. */ default). There is a copy of the pattern in pbuffer8 for use by callouts. */
@ -7402,6 +7464,8 @@ printf(" \\C is supported\n");
printf(" Internal link size = %d\n", optval); printf(" Internal link size = %d\n", optval);
(void)PCRE2_CONFIG(PCRE2_CONFIG_PARENSLIMIT, &optval); (void)PCRE2_CONFIG(PCRE2_CONFIG_PARENSLIMIT, &optval);
printf(" Parentheses nest limit = %d\n", optval); printf(" Parentheses nest limit = %d\n", optval);
(void)PCRE2_CONFIG(PCRE2_CONFIG_HEAPLIMIT, &optval);
printf(" Default heap limit = %d\n", optval);
(void)PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, &optval); (void)PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, &optval);
printf(" Default match limit = %d\n", optval); printf(" Default match limit = %d\n", optval);
(void)PCRE2_CONFIG(PCRE2_CONFIG_DEPTHLIMIT, &optval); (void)PCRE2_CONFIG(PCRE2_CONFIG_DEPTHLIMIT, &optval);

13
testdata/testoutput15 vendored
View File

@ -12,11 +12,13 @@ Starting code units: a z
Last code unit = 'z' Last code unit = 'z'
Subject length lower bound = 2 Subject length lower bound = 2
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits
Minimum heap limit = 0
Minimum match limit = 7 Minimum match limit = 7
Minimum depth limit = 7 Minimum depth limit = 7
0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazz 0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazz
1: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 1: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
aaaaaaaaaaaaaz\=find_limits aaaaaaaaaaaaaz\=find_limits
Minimum heap limit = 0
Minimum match limit = 20481 Minimum match limit = 20481
Minimum depth limit = 30 Minimum depth limit = 30
No match No match
@ -26,6 +28,7 @@ Capturing subpattern count = 1
May match empty string May match empty string
Subject length lower bound = 0 Subject length lower bound = 0
/* this is a C style comment */\=find_limits /* this is a C style comment */\=find_limits
Minimum heap limit = 0
Minimum match limit = 64 Minimum match limit = 64
Minimum depth limit = 7 Minimum depth limit = 7
0: /* this is a C style comment */ 0: /* this is a C style comment */
@ -33,21 +36,25 @@ Minimum depth limit = 7
/^(?>a)++/ /^(?>a)++/
aa\=find_limits aa\=find_limits
Minimum heap limit = 0
Minimum match limit = 5 Minimum match limit = 5
Minimum depth limit = 3 Minimum depth limit = 3
0: aa 0: aa
aaaaaaaaa\=find_limits aaaaaaaaa\=find_limits
Minimum heap limit = 0
Minimum match limit = 12 Minimum match limit = 12
Minimum depth limit = 3 Minimum depth limit = 3
0: aaaaaaaaa 0: aaaaaaaaa
/(a)(?1)++/ /(a)(?1)++/
aa\=find_limits aa\=find_limits
Minimum heap limit = 0
Minimum match limit = 7 Minimum match limit = 7
Minimum depth limit = 5 Minimum depth limit = 5
0: aa 0: aa
1: a 1: a
aaaaaaaaa\=find_limits aaaaaaaaa\=find_limits
Minimum heap limit = 0
Minimum match limit = 21 Minimum match limit = 21
Minimum depth limit = 5 Minimum depth limit = 5
0: aaaaaaaaa 0: aaaaaaaaa
@ -55,30 +62,35 @@ Minimum depth limit = 5
/a(?:.)*?a/ims /a(?:.)*?a/ims
abbbbbbbbbbbbbbbbbbbbba\=find_limits abbbbbbbbbbbbbbbbbbbbba\=find_limits
Minimum heap limit = 0
Minimum match limit = 24 Minimum match limit = 24
Minimum depth limit = 3 Minimum depth limit = 3
0: abbbbbbbbbbbbbbbbbbbbba 0: abbbbbbbbbbbbbbbbbbbbba
/a(?:.(*THEN))*?a/ims /a(?:.(*THEN))*?a/ims
abbbbbbbbbbbbbbbbbbbbba\=find_limits abbbbbbbbbbbbbbbbbbbbba\=find_limits
Minimum heap limit = 0
Minimum match limit = 66 Minimum match limit = 66
Minimum depth limit = 45 Minimum depth limit = 45
0: abbbbbbbbbbbbbbbbbbbbba 0: abbbbbbbbbbbbbbbbbbbbba
/a(?:.(*THEN:ABC))*?a/ims /a(?:.(*THEN:ABC))*?a/ims
abbbbbbbbbbbbbbbbbbbbba\=find_limits abbbbbbbbbbbbbbbbbbbbba\=find_limits
Minimum heap limit = 0
Minimum match limit = 66 Minimum match limit = 66
Minimum depth limit = 45 Minimum depth limit = 45
0: abbbbbbbbbbbbbbbbbbbbba 0: abbbbbbbbbbbbbbbbbbbbba
/^(?>a+)(?>b+)(?>c+)(?>d+)(?>e+)/ /^(?>a+)(?>b+)(?>c+)(?>d+)(?>e+)/
aabbccddee\=find_limits aabbccddee\=find_limits
Minimum heap limit = 0
Minimum match limit = 7 Minimum match limit = 7
Minimum depth limit = 7 Minimum depth limit = 7
0: aabbccddee 0: aabbccddee
/^(?>(a+))(?>(b+))(?>(c+))(?>(d+))(?>(e+))/ /^(?>(a+))(?>(b+))(?>(c+))(?>(d+))(?>(e+))/
aabbccddee\=find_limits aabbccddee\=find_limits
Minimum heap limit = 0
Minimum match limit = 12 Minimum match limit = 12
Minimum depth limit = 12 Minimum depth limit = 12
0: aabbccddee 0: aabbccddee
@ -90,6 +102,7 @@ Minimum depth limit = 12
/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/ /^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/
aabbccddee\=find_limits aabbccddee\=find_limits
Minimum heap limit = 0
Minimum match limit = 10 Minimum match limit = 10
Minimum depth limit = 10 Minimum depth limit = 10
0: aabbccddee 0: aabbccddee

View File

@ -15609,7 +15609,7 @@ Last code unit = 'c'
Subject length lower bound = 4 Subject length lower bound = 4
# End of testinput2 # End of testinput2
Error -63: PCRE2_ERROR_BADDATA (unknown error number) Error -64: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data Error -62: bad serialized data
Error -2: partial match Error -2: partial match
Error -1: no match Error -1: no match