Add explicit heap limiting options to pcre2_match(), with associated features

for listing, configuring, etc.
This commit is contained in:
Philip.Hazel 2017-04-11 11:47:25 +00:00
parent f0126dc7ae
commit 14989bd454
47 changed files with 2322 additions and 1778 deletions

View File

@ -78,6 +78,7 @@
# fix by David Gaussmann
# 2016-10-07 PH added PCREGREP_MAX_BUFSIZE
# 2017-03-11 PH turned HEAP_MATCH_RECURSE into a NO-OP for 10.30
# 2017-04-08 PH added HEAP_LIMIT
PROJECT(PCRE2 C)
@ -143,6 +144,9 @@ SET(PCRE2_LINK_SIZE "2" CACHE STRING
SET(PCRE2_PARENS_NEST_LIMIT "250" CACHE STRING
"Default nested parentheses limit. See PARENS_NEST_LIMIT in config.h.in for details.")
SET(PCRE2_HEAP_LIMIT "20000000" CACHE STRING
"Default limit on heap memory (kilobytes). See HEAP_LIMIT in config.h.in for details.")
SET(PCRE2_MATCH_LIMIT "10000000" CACHE STRING
"Default limit on internal looping. See MATCH_LIMIT in config.h.in for details.")
@ -765,6 +769,7 @@ IF(PCRE2_SHOW_REPORT)
MESSAGE(STATUS " Rebuild char tables ............. : ${PCRE2_REBUILD_CHARTABLES}")
MESSAGE(STATUS " Internal link size .............. : ${PCRE2_LINK_SIZE}")
MESSAGE(STATUS " Parentheses nest limit .......... : ${PCRE2_PARENS_NEST_LIMIT}")
MESSAGE(STATUS " Heap limit ...................... : ${PCRE2_HEAP_LIMIT}")
MESSAGE(STATUS " Match limit ..................... : ${PCRE2_MATCH_LIMIT}")
MESSAGE(STATUS " Match depth limit ............... : ${PCRE2_MATCH_LIMIT_DEPTH}")
MESSAGE(STATUS " Build shared libs ............... : ${BUILD_SHARED_LIBS}")

View File

@ -121,6 +121,11 @@ single-branch conditions with a false condition (e.g. DEFINE) at the start of a
branch. For example, /(?(DEFINE)...)^A/ and /(...){0}^B/ are now flagged as
anchored.
22. Added an explicit limit on the amount of heap used by pcre2_match(), set by
pcre2_set_heap_limit() or (*LIMIT_HEAP=xxx). Upgraded pcre2test to show the
heap limit along with other pattern information, and to find the minimum when
the find_limits modifier is set.
Version 10.23 14-February-2017
------------------------------

View File

@ -69,6 +69,7 @@ dist_html_DATA = \
doc/html/pcre2_set_character_tables.html \
doc/html/pcre2_set_compile_recursion_guard.html \
doc/html/pcre2_set_depth_limit.html \
doc/html/pcre2_set_heap_limit.html \
doc/html/pcre2_set_match_limit.html \
doc/html/pcre2_set_max_pattern_length.html \
doc/html/pcre2_set_offset_limit.html \
@ -152,6 +153,7 @@ dist_man_MANS = \
doc/pcre2_set_character_tables.3 \
doc/pcre2_set_compile_recursion_guard.3 \
doc/pcre2_set_depth_limit.3 \
doc/pcre2_set_heap_limit.3 \
doc/pcre2_set_match_limit.3 \
doc/pcre2_set_max_pattern_length.3 \
doc/pcre2_set_offset_limit.3 \

23
README
View File

@ -223,10 +223,10 @@ library. They are also documented in the pcre2build man page.
--with-parens-nest-limit=500
. PCRE2 has a counter that can be set to limit the amount of resources it uses
when matching a pattern. If the limit is exceeded during a match, the match
fails. The default is ten million. You can change the default by setting, for
example,
. PCRE2 has a counter that can be set to limit the amount of computing resource
it uses when matching a pattern with the Perl-compatible matching function.
If the limit is exceeded during a match, the match fails. The default is ten
million. You can change the default by setting, for example,
--with-match-limit=500000
@ -235,8 +235,8 @@ library. They are also documented in the pcre2build man page.
pcre2api man page (search for pcre2_set_match_limit).
. There is a separate counter that limits the depth of nested backtracking
during a matching process, which in turn limits the amount of memory that is
used. This also has a default of ten million, which is essentially
during a matching process, which indirectly limits the amount of heap memory
that is used. This also has a default of ten million, which is essentially
"unlimited". You can change the default by setting, for example,
--with-match-limit-depth=5000
@ -244,6 +244,15 @@ library. They are also documented in the pcre2build man page.
There is more discussion in the pcre2api man page (search for
pcre2_set_depth_limit).
. You can also set an explicit limit on the amount of heap memory used by
the pcre2_match() interpreter:
--with-heap-limit=500
The units are kilobytes. This limit does not apply when the JIT optimization
(which has its own memory control features) is used. There is more discussion
on the pcre2api man page (search for pcre2_set_heap_limit).
. In the 8-bit library, the default maximum compiled pattern size is around
64K bytes. You can increase this by adding --with-link-size=3 to the
"configure" command. PCRE2 then uses three bytes instead of two for offsets
@ -865,4 +874,4 @@ The distribution should contain the files listed below.
Philip Hazel
Email local part: ph10
Email domain: cam.ac.uk
Last updated: 17 March 2017
Last updated: 11 April 2017

View File

@ -489,7 +489,7 @@ for bmode in "$test8" "$test16" "$test32"; do
for opt in "" $jitopt; do
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput2 testtry
if [ $? = 0 ] ; then
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -63,-62,-2,-1,0,100,188,189,190,191 >>testtry
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -64,-62,-2,-1,0,100,188,189,190,191 >>testtry
checkresult $? 2 "$opt"
fi
done

View File

@ -36,6 +36,7 @@
#cmakedefine NEVER_BACKSLASH_C 1
#define LINK_SIZE @PCRE2_LINK_SIZE@
#define HEAP_LIMIT @PCRE2_HEAP_LIMIT@
#define MATCH_LIMIT @PCRE2_MATCH_LIMIT@
#define MATCH_LIMIT_DEPTH @PCRE2_MATCH_LIMIT_DEPTH@
#define NEWLINE_DEFAULT @NEWLINE_DEFAULT@

View File

@ -263,6 +263,12 @@ AC_ARG_WITH(parens-nest-limit,
[nested parentheses limit (default=250)]),
, with_parens_nest_limit=250)
# Handle --with-heap-limit
AC_ARG_WITH(heap-limit,
AS_HELP_STRING([--with-heap-limit=N],
[default limit on heap memory (kilobytes, default=20000000)]),
, with_heap_limit=20000000)
# Handle --with-match-limit=N
AC_ARG_WITH(match-limit,
AS_HELP_STRING([--with-match-limit=N],
@ -680,12 +686,12 @@ AC_DEFINE_UNQUOTED([PARENS_NEST_LIMIT], [$with_parens_nest_limit], [
stack that is used while compiling a pattern.])
AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [
The value of MATCH_LIMIT determines the default number of times the internal
match() function can record a backtrack position during a single matching
attempt. There is a runtime interface for setting a different limit. The
limit exists in order to catch runaway regular expressions that take for ever
to determine that they do not match. The default is set very large so that it
does not accidentally catch legitimate cases.])
The value of MATCH_LIMIT determines the default number of times the
pcre2_match() function can record a backtrack position during a single
matching attempt. There is a runtime interface for setting a different limit.
The limit exists in order to catch runaway regular expressions that take for
ever to determine that they do not match. The default is set very large so
that it does not accidentally catch legitimate cases.])
# --with-match-limit-recursion is an obsolete synonym for --with-match-limit-depth
@ -694,7 +700,7 @@ cat <<EOF
WARNING: --with-match-limit-recursion is an obsolete option. Please use
--with-match-limit-depth in future. If both are set, --with-match-limit-depth
will be used.
will be used. See also --with-heap-limit.
EOF
if test "$with_match_limit_depth" = "MATCH_LIMIT"; then
@ -711,6 +717,10 @@ AC_DEFINE_UNQUOTED([MATCH_LIMIT_DEPTH], [$with_match_limit_depth], [
be less than the value of MATCH_LIMIT. The default is to use the same value
as MATCH_LIMIT. There is a runtime method for setting a different limit.])
AC_DEFINE_UNQUOTED([HEAP_LIMIT], [$with_heap_limit], [
This limits the amount of memory that pcre2_match() may use while matching
a pattern. The value is in kilobytes.])
AC_DEFINE([MAX_NAME_SIZE], [32], [
This limit is parameterized just in case anybody ever wants to
change it. Care must be taken if it is increased, because it guards
@ -971,6 +981,7 @@ $PACKAGE-$VERSION configuration summary:
Rebuild char tables ................ : ${enable_rebuild_chartables}
Internal link size ................. : ${with_link_size}
Nested parentheses limit ........... : ${with_parens_nest_limit}
Heap limit ......................... : ${with_heap_limit} kilobytes
Match limit ........................ : ${with_match_limit}
Match depth limit .................. : ${with_match_limit_depth}
Build shared libs .................. : ${enable_shared}

View File

@ -223,10 +223,10 @@ library. They are also documented in the pcre2build man page.
--with-parens-nest-limit=500
. PCRE2 has a counter that can be set to limit the amount of resources it uses
when matching a pattern. If the limit is exceeded during a match, the match
fails. The default is ten million. You can change the default by setting, for
example,
. PCRE2 has a counter that can be set to limit the amount of computing resource
it uses when matching a pattern with the Perl-compatible matching function.
If the limit is exceeded during a match, the match fails. The default is ten
million. You can change the default by setting, for example,
--with-match-limit=500000
@ -235,8 +235,8 @@ library. They are also documented in the pcre2build man page.
pcre2api man page (search for pcre2_set_match_limit).
. There is a separate counter that limits the depth of nested backtracking
during a matching process, which in turn limits the amount of memory that is
used. This also has a default of ten million, which is essentially
during a matching process, which indirectly limits the amount of heap memory
that is used. This also has a default of ten million, which is essentially
"unlimited". You can change the default by setting, for example,
--with-match-limit-depth=5000
@ -244,6 +244,15 @@ library. They are also documented in the pcre2build man page.
There is more discussion in the pcre2api man page (search for
pcre2_set_depth_limit).
. You can also set an explicit limit on the amount of heap memory used by
the pcre2_match() interpreter:
--with-heap-limit=500
The units are kilobytes. This limit does not apply when the JIT optimization
(which has its own memory control features) is used. There is more discussion
on the pcre2api man page (search for pcre2_set_heap_limit).
. In the 8-bit library, the default maximum compiled pattern size is around
64K bytes. You can increase this by adding --with-link-size=3 to the
"configure" command. PCRE2 then uses three bytes instead of two for offsets
@ -865,4 +874,4 @@ The distribution should contain the files listed below.
Philip Hazel
Email local part: ph10
Email domain: cam.ac.uk
Last updated: 17 March 2017
Last updated: 11 April 2017

View File

@ -213,6 +213,9 @@ in the library.
<tr><td><a href="pcre2_set_depth_limit.html">pcre2_set_depth_limit</a></td>
<td>&nbsp;&nbsp;Set the match backtracking depth limit</td></tr>
<tr><td><a href="pcre2_set_heap_limit.html">pcre2_set_heap_limit</a></td>
<td>&nbsp;&nbsp;Set the match backtracking heap limit</td></tr>
<tr><td><a href="pcre2_set_match_limit.html">pcre2_set_match_limit</a></td>
<td>&nbsp;&nbsp;Set the match limit</td></tr>

View File

@ -45,6 +45,7 @@ point to a uint32_t integer variable. The available codes are:
PCRE2_CONFIG_BSR Indicates what \R matches by default:
PCRE2_BSR_UNICODE
PCRE2_BSR_ANYCRLF
PCRE2_CONFIG_HEAPLIMIT Default heap memory limit
PCRE2_CONFIG_DEPTHLIMIT Default backtracking depth limit
PCRE2_CONFIG_JIT Availability of just-in-time compiler support (1=yes 0=no)
PCRE2_CONFIG_JITTARGET Information (a string) about the target architecture for the JIT compiler

View File

@ -44,6 +44,7 @@ A match context is needed only if you want to:
<pre>
Set up a callout function
Set a matching offset limit
Change the heap memory limit
Change the backtracking match limit
Change the backtracking depth limit
Set custom memory management specifically for the match

View File

@ -51,6 +51,7 @@ request are as follows:
PCRE2_INFO_FRAMESIZE Size of backtracking frame
PCRE2_INFO_HASBACKSLASHC Return 1 if pattern contains \C
PCRE2_INFO_HASCRORLF Return 1 if explicit CR or LF matches exist in the pattern
PCRE2_INFO_HEAPLIMIT Heap memory limit if set, otherwise PCRE2_ERROR_UNSET
PCRE2_INFO_JCHANGED Return 1 if (?J) or (?-J) was used
PCRE2_INFO_JITSIZE Size of JIT compiled code, or 0
PCRE2_INFO_LASTCODETYPE Type of must-be-present information

View File

@ -182,6 +182,10 @@ document for an overview of all the PCRE2 documentation.
<b> PCRE2_SIZE <i>value</i>);</b>
<br>
<br>
<b>int pcre2_set_heap_limit(pcre2_match_context *<i>mcontext</i>,</b>
<b> uint32_t <i>value</i>);</b>
<br>
<br>
<b>int pcre2_set_match_limit(pcre2_match_context *<i>mcontext</i>,</b>
<b> uint32_t <i>value</i>);</b>
<br>
@ -793,6 +797,7 @@ A match context is required if you want to:
<pre>
Set up a callout function
Set an offset limit for matching an unanchored pattern
Change the limit on the amount of heap used when matching
Change the backtracking match limit
Change the backtracking depth limit
Set custom memory management specifically for the match
@ -851,14 +856,47 @@ subject strings. See also the PCRE2_FIRSTLINE option, which requires a match to
start within the first line of the subject. If this is set with an offset
limit, a match must occur in the first line and also within the offset limit.
In other words, whichever limit comes first is used.
<b>int pcre2_set_heap_limit(pcre2_match_context *<i>mcontext</i>,</b>
<b> uint32_t <i>value</i>);</b>
<br>
<br>
The <i>heap_limit</i> parameter specifies, in units of kilobytes, the maximum
amount of heap memory that <b>pcre2_match()</b> may use to hold backtracking
information when running an interpretive match. This limit does not apply to
matching with the JIT optimization, which has its own memory control
arrangements (see the
<a href="pcre2jit.html"><b>pcre2jit</b></a>
documentation for more details), nor does it apply to <b>pcre2_dfa_match()</b>.
If the limit is reached, the negative error code PCRE2_ERROR_HEAPLIMIT is
returned. The default limit is set when PCRE2 is built; the default default is
very large and is essentially "unlimited".
</P>
<P>
A value for the heap limit may also be supplied by an item at the start of a
pattern of the form
<pre>
(*LIMIT_HEAP=ddd)
</pre>
where ddd is a decimal number. However, such a setting is ignored unless ddd is
less than the limit set by the caller of <b>pcre2_match()</b> or, if no such
limit is set, less than the default.
</P>
<P>
The <b>pcre2_match()</b> function starts out using a 20K vector on the system
stack for recording backtracking points. The more nested backtracking points
there are (that is, the deeper the search tree), the more memory is needed.
Heap memory is used only if the initial vector is too small. If the heap limit
is set to a value less than 21 (in particular, zero) no heap memory will be
used. In this case, only patterns that do not have a lot of nested backtracking
can be successfully processed.
<b>int pcre2_set_match_limit(pcre2_match_context *<i>mcontext</i>,</b>
<b> uint32_t <i>value</i>);</b>
<br>
<br>
The <i>match_limit</i> parameter provides a means of preventing PCRE2 from using
up too many resources when processing patterns that are not going to match, but
which have a very large number of possibilities in their search trees. The
classic example is a pattern that uses nested unlimited repeats.
up too many computing resources when processing patterns that are not going to
match, but which have a very large number of possibilities in their search
trees. The classic example is a pattern that uses nested unlimited repeats.
</P>
<P>
There is an internal counter in <b>pcre2_match()</b> that is incremented each
@ -895,16 +933,20 @@ limit is set, less than the default.
This parameter limits the depth of nested backtracking in <b>pcre2_match()</b>.
Each time a nested backtracking point is passed, a new memory "frame" is used
to remember the state of matching at that point. Thus, this parameter
indirectly limits the amount of memory that is used in a match.
indirectly limits the amount of memory that is used in a match. However,
because the size of each memory "frame" depends on the number of capturing
parentheses, the actual memory limit varies from pattern to pattern. This limit
was more useful in versions before 10.30, where function recursion was used for
backtracking.
</P>
<P>
This limit is not relevant, and is ignored, when matching is done using JIT
compiled code. However, it is supported by <b>pcre2_dfa_match()</b>, which uses
it to limit the depth of internal recursive function calls that implement
lookaround assertions and pattern recursions. This is, therefore, an indirect
limit on the amount of system stack that is used. A recursive pattern such as
/(.)(?1)/, when matched to a very long string using <b>pcre2_dfa_match()</b>,
can use a great deal of stack.
The depth limit is not relevant, and is ignored, when matching is done using
JIT compiled code. However, it is supported by <b>pcre2_dfa_match()</b>, which
uses it to limit the depth of internal recursive function calls that implement
atomic groups, lookaround assertions, and pattern recursions. This is,
therefore, an indirect limit on the amount of system stack that is used. A
recursive pattern such as /(.)(?1)/, when matched to a very long string using
<b>pcre2_dfa_match()</b>, can use a great deal of stack.
</P>
<P>
The default value for the depth limit can be set when PCRE2 is built; the
@ -958,6 +1000,12 @@ The output is a uint32_t integer that gives the default limit for the depth of
nested backtracking in <b>pcre2_match()</b> or the depth of nested recursions
and lookarounds in <b>pcre2_dfa_match()</b>. Further details are given with
<b>pcre2_set_depth_limit()</b> above.
<pre>
PCRE2_CONFIG_HEAPLIMIT
</pre>
The output is a uint32_t integer that gives, in kilobytes, the default limit
for the amount of heap memory used by <b>pcre2_match()</b>. Further details are
given with <b>pcre2_set_heap_limit()</b> above.
<pre>
PCRE2_CONFIG_JIT
</pre>
@ -1786,6 +1834,13 @@ Return 1 if the pattern contains any explicit matches for CR or LF characters,
otherwise 0. The third argument should point to an <b>uint32_t</b> variable. An
explicit match is either a literal CR or LF character, or \r or \n or one of
the equivalent hexadecimal or octal escape sequences.
<pre>
PCRE2_INFO_HEAPLIMIT
</pre>
If the pattern set a heap memory limit by including an item of the form
(*LIMIT_HEAP=nnnn) at the start, the value is returned. The third argument
should point to an unsigned 32-bit integer. If no such value has been set, the
call to <b>pcre2_pattern_info()</b> returns the error PCRE2_ERROR_UNSET.
<pre>
PCRE2_INFO_JCHANGED
</pre>
@ -2554,7 +2609,8 @@ The backtracking match limit was reached.
</pre>
If a pattern contains many nested backtracking points, heap memory is used to
remember them. This error is given when the memory allocation function (default
or custom) fails.
or custom) fails. Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given
if the amount of memory needed exceeds the heap limit.
<pre>
PCRE2_ERROR_NULL
</pre>
@ -3271,7 +3327,7 @@ Cambridge, England.
</P>
<br><a name="SEC42" href="#TOC1">REVISION</a><br>
<P>
Last updated: 04 April 2017
Last updated: 11 April 2017
<br>
Copyright &copy; 1997-2017 University of Cambridge.
<br>

View File

@ -265,17 +265,41 @@ to the <b>configure</b> command. This setting has no effect on the
(though the counting is done differently).
</P>
<P>
In some environments it is desirable to limit the depth of nested backtracking
in order to restrict the maximum amount of heap memory that is used. A second
limit controls this; it defaults to the value that is set for
--with-match-limit. You can set a lower default limit by adding, for example,
The <b>pcre2_match()</b> function starts out using a 20K vector on the system
stack to record backtracking points. The more nested backtracking points there
are (that is, the deeper the search tree), the more memory is needed. If the
initial vector is not large enough, heap memory is used, up to a certain limit,
which is specified in kilobytes. The limit can be changed at run time, as
described in the
<a href="pcre2api.html"><b>pcre2api</b></a>
documentation. The default limit (in effect unlimited) is 20 million. You can
change this by a setting such as
<pre>
--with-heap-limit=500
</pre>
which limits the amount of heap to 500 kilobytes. This limit applies only to
interpretive matching in pcre2_match(). It does not apply when JIT (which has
its own memory arrangements) is used, nor does it apply to
<b>pcre2_dfa_match()</b>.
</P>
<P>
You can also explicitly limit the depth of nested backtracking in the
<b>pcre2_match()</b> interpreter. This limit defaults to the value that is set
for --with-match-limit. You can set a lower default limit by adding, for
example,
<pre>
--with-match-limit_depth=10000
</pre>
to the <b>configure</b> command. This value can also be overridden at run time.
As well as applying to <b>pcre2_match()</b>, this limit also controls the depth
of recursive function calls in <b>pcre2_dfa_match()</b>. These are used for
lookaround assertions, atomic groups, and recursion within patterns.
to the <b>configure</b> command. This value can be overridden at run time. This
depth limit indirectly limits the amount of heap memory that is used, but
because the size of each backtracking "frame" depends on the number of
capturing parentheses in a pattern, the amount of heap that is used before the
limit is reached varies from pattern to pattern. This limit was more useful in
versions before 10.30, where function recursion was used for backtracking.
However, as well as applying to <b>pcre2_match()</b>, this limit also controls
the depth of recursive function calls in <b>pcre2_dfa_match()</b>. These are
used for lookaround assertions, atomic groups, and recursion within patterns.
The limit does not apply to JIT matching.
</P>
<br><a name="SEC12" href="#TOC1">CREATING CHARACTER TABLES AT BUILD TIME</a><br>
<P>
@ -530,7 +554,7 @@ Cambridge, England.
</P>
<br><a name="SEC25" href="#TOC1">REVISION</a><br>
<P>
Last updated: 31 March 2017
Last updated: 10 April 2017
<br>
Copyright &copy; 1997-2017 University of Cambridge.
<br>

View File

@ -404,6 +404,10 @@ file name is followed by a colon; for context lines, a hyphen separator is used.
If a line number is also being output, it follows the file name.
</P>
<P>
<b>--heap-limit</b>=<i>number</i>
See <b>--match-limit</b> below.
</P>
<P>
<b>--help</b>
Output a help message, giving brief details of the command options and file
type support, and then exit. Anything else on the command line is
@ -505,7 +509,7 @@ used. There is no short form for this option.
<b>--match-limit</b>=<i>number</i>
Processing some regular expression patterns may take a very long time to search
for all possible matching strings. Others may require a very large amount of
memory. There are two options that set resource limits for matching.
memory. There are three options that set resource limits for matching.
<br>
<br>
The <b>--match-limit</b> option provides a means of limiting computing resource
@ -516,13 +520,24 @@ counter that is incremented each time around its main processing loop. If the
value set by <b>--match-limit</b> is reached, an error occurs.
<br>
<br>
The <b>--heap-limit</b> option specifies, as a number of kilobytes, the amount
of heap memory that may be used for matching. Heap memory is needed only if
matching the pattern requires a significant number of nested backtracking
points to be remembered. This parameter can be set to zero to forbid the use of
heap memory altogether.
<br>
<br>
The <b>--depth-limit</b> option limits the depth of nested backtracking points,
which in turn limits the amount of memory that is used. This limit is of use
only if it is set smaller than <b>--match-limit</b>.
which indirectly limits the amount of memory that is used. The amount of memory
needed for each backtracking point depends on the number of capturing
parentheses in the pattern, so the amount of memory that is used before this
limit acts varies from pattern to pattern. This limit is of use only if it is
set smaller than <b>--match-limit</b>.
<br>
<br>
There are no short forms for these options. The default settings are specified
when the PCRE2 library is compiled, with the default default being 10 million.
when the PCRE2 library is compiled, with the default defaults being very large
and so effectively unlimited.
</P>
<P>
\fB--max-buffer-size=<i>number</i>
@ -764,11 +779,12 @@ Many of the short and long forms of <b>pcre2grep</b>'s options are the same
as in the GNU <b>grep</b> program. Any long option of the form
<b>--xxx-regexp</b> (GNU terminology) is also available as <b>--xxx-regex</b>
(PCRE2 terminology). However, the <b>--depth-limit</b>, <b>--file-list</b>,
<b>--file-offsets</b>, <b>--include-dir</b>, <b>--line-offsets</b>,
<b>--locale</b>, <b>--match-limit</b>, <b>-M</b>, <b>--multiline</b>, <b>-N</b>,
<b>--newline</b>, <b>--om-separator</b>, <b>--output</b>, <b>-u</b>, and
<b>--utf-8</b> options are specific to <b>pcre2grep</b>, as is the use of the
<b>--only-matching</b> option with a capturing parentheses number.
<b>--file-offsets</b>, <b>--heap-limit</b>, <b>--include-dir</b>,
<b>--line-offsets</b>, <b>--locale</b>, <b>--match-limit</b>, <b>-M</b>,
<b>--multiline</b>, <b>-N</b>, <b>--newline</b>, <b>--om-separator</b>,
<b>--output</b>, <b>-u</b>, and <b>--utf-8</b> options are specific to
<b>pcre2grep</b>, as is the use of the <b>--only-matching</b> option with a
capturing parentheses number.
</P>
<P>
Although most of the common options work the same way, a few are different in
@ -891,9 +907,9 @@ there are more than 20 such errors, <b>pcre2grep</b> gives up.
</P>
<P>
The <b>--match-limit</b> option of <b>pcre2grep</b> can be used to set the
overall resource limit; there is a second option called <b>--depth-limit</b>
that sets a limit on the amount of memory that is used (see the discussion of
these options above).
overall resource limit. There are also other limits that affect the amount of
memory used during matching; see the discussion of <b>--heap-limit</b> and
<b>--depth-limit</b> above.
</P>
<br><a name="SEC12" href="#TOC1">DIAGNOSTICS</a><br>
<P>
@ -918,7 +934,7 @@ Cambridge, England.
</P>
<br><a name="SEC15" href="#TOC1">REVISION</a><br>
<P>
Last updated: 06 April 2017
Last updated: 11 April 2017
<br>
Copyright &copy; 1997-2017 University of Cambridge.
<br>

View File

@ -170,14 +170,15 @@ the application to apply the JIT optimization by calling
<b>pcre2_jit_compile()</b> is ignored.
</P>
<br><b>
Setting match and backtracking depth limits
Setting match resource limits
</b><br>
<P>
The pcre2_match() function contains a counter that is incremented every time it
goes round its main loop. The caller of <b>pcre2_match()</b> can set a limit on
this counter, which therefore limits the amount of computing resource used for
a match. The maximum depth of nested backtracking can also be limited, and this
restricts the amount of heap memory that is used.
a match. The maximum depth of nested backtracking can also be limited; this
indirectly restricts the amount of heap memory that is used, but there is also
an explicit memory limit that can be set.
</P>
<P>
These facilities are provided to catch runaway matches that are provoked by
@ -186,6 +187,7 @@ unlimited repeats applied to a long string that does not match). When one of
these limits is reached, <b>pcre2_match()</b> gives an error return. The limits
can also be set by items at the start of the pattern of the form
<pre>
(*LIMIT_HEAP=d)
(*LIMIT_MATCH=d)
(*LIMIT_DEPTH=d)
</pre>
@ -200,11 +202,13 @@ Prior to release 10.30, LIMIT_DEPTH was called LIMIT_RECURSION. This name is
still recognized for backwards compatibility.
</P>
<P>
The match limit is used (but in a different way) when JIT is being used, but it
is not relevant, and is ignored, when matching with <b>pcre2_dfa_match()</b>.
However, the depth limit is relevant for DFA matching, which uses function
recursion for recursions within the pattern. In this case, the depth limit
controls the amount of system stack that is used.
The heap limit applies only when the <b>pcre2_match()</b> interpreter is used
for matching. It does not apply to JIT or DFA matching. The match limit is used
(but in a different way) when JIT is being used, but it is not relevant, and is
ignored, when matching with <b>pcre2_dfa_match()</b>. The depth limit is ignored
by JIT but is relevant for DFA matching, which uses function recursion for
recursions within the pattern. In this case, the depth limit controls the
amount of system stack that is used.
<a name="newlines"></a></P>
<br><b>
Newline conventions
@ -3434,7 +3438,7 @@ Cambridge, England.
</P>
<br><a name="SEC30" href="#TOC1">REVISION</a><br>
<P>
Last updated: 03 April 2017
Last updated: 11 April 2017
<br>
Copyright &copy; 1997-2017 University of Cambridge.
<br>

View File

@ -83,11 +83,12 @@ From release 10.30, the interpretive (non-JIT) version of <b>pcre2_match()</b>
uses very little system stack at run time. In earlier releases recursive
function calls could use a great deal of stack, and this could cause problems,
but this usage has been eliminated. Backtracking positions are now explicitly
remembered in memory frames controlled by the code. An initial 10K vector of
frames is allocated on the system stack (enough for about 50 frames for small
patterns), but if this is insufficient, heap memory is used. Rewriting patterns
to be time-efficient, as described below, may also reduce the memory
requirements.
remembered in memory frames controlled by the code. An initial 20K vector of
frames is allocated on the system stack (enough for about 100 frames for small
patterns), but if this is insufficient, heap memory is used. The amount of heap
memory can be limited; if the limit is set to zero, only the initial stack
vector is used. Rewriting patterns to be time-efficient, as described below,
may also reduce the memory requirements.
</P>
<P>
In contrast to <b>pcre2_match()</b>, <b>pcre2_dfa_match()</b> does use recursive
@ -243,7 +244,7 @@ Cambridge, England.
</P>
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
<P>
Last updated: 31 March 2017
Last updated: 08 April 2017
<br>
Copyright &copy; 1997-2017 University of Cambridge.
<br>

View File

@ -235,6 +235,12 @@ Behave as if each pattern line has the <b>jit</b> modifier; after successful
compilation, each pattern is passed to the just-in-time compiler, if available.
</P>
<P>
<b>-jitverify</b>
Behave as if each pattern line has the <b>jitverify</b> modifier; after
successful compilation, each pattern is passed to the just-in-time compiler, if
available, and the use of JIT is verified.
</P>
<P>
\fB-pattern\fB <i>modifier-list</i>
Behave as if each pattern line contains the given modifiers.
</P>
@ -1088,6 +1094,7 @@ pattern.
get=&#60;number or name&#62; extract captured substring
getall extract all captured substrings
/g global global matching
heap_limit=&#60;n&#62; set a limit on heap memory
jitstack=&#60;n&#62; set size of JIT stack
mark show mark values
match_limit=&#60;n&#62; set a match limit
@ -1330,11 +1337,11 @@ stack that is larger than the default 32K is necessary only for very
complicated patterns.
</P>
<br><b>
Setting match and depth limits
Setting heap, match, and depth limits
</b><br>
<P>
The <b>match_limit</b> and <b>depth_limit</b> modifiers set the appropriate
limits in the match context. These values are ignored when the
The <b>heap_limit</b>, <b>match_limit</b>, and <b>depth_limit</b> modifiers set
the appropriate limits in the match context. These values are ignored when the
<b>find_limits</b> modifier is specified.
</P>
<br><b>
@ -1343,8 +1350,8 @@ Finding minimum limits
<P>
If the <b>find_limits</b> modifier is present on a subject line, <b>pcre2test</b>
calls the relevant matching function several times, setting different values in
the match context via <b>pcre2_set_match_limit()</b> or
<b>pcre2_set_depth_limit()</b> until it finds the minimum values for each
the match context via <b>pcre2_set_heap_limit(), \fBpcre2_set_match_limit()</b>,
or <b>pcre2_set_depth_limit()</b> until it finds the minimum values for each
parameter that allows the match to complete without error.
</P>
<P>
@ -1360,8 +1367,8 @@ increasing length of subject string.
</P>
<P>
For non-DFA matching, the minimum <i>depth_limit</i> number is a measure of how
much memory for recording backtracking points is needed to complete the match
attempt. In the case of DFA matching, <i>depth_limit</i> controls the depth of
much nested backtracking happens (that is, how deeply the pattern's tree is
searched). In the case of DFA matching, <i>depth_limit</i> controls the depth of
recursive calls of the internal function that is used for handling pattern
recursion, lookaround assertions, and atomic groups.
</P>
@ -1800,7 +1807,7 @@ Cambridge, England.
</P>
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
<P>
Last updated: 04 April 2017
Last updated: 11 April 2017
<br>
Copyright &copy; 1997-2017 University of Cambridge.
<br>

View File

@ -213,6 +213,9 @@ in the library.
<tr><td><a href="pcre2_set_depth_limit.html">pcre2_set_depth_limit</a></td>
<td>&nbsp;&nbsp;Set the match backtracking depth limit</td></tr>
<tr><td><a href="pcre2_set_heap_limit.html">pcre2_set_heap_limit</a></td>
<td>&nbsp;&nbsp;Set the match backtracking heap limit</td></tr>
<tr><td><a href="pcre2_set_match_limit.html">pcre2_set_match_limit</a></td>
<td>&nbsp;&nbsp;Set the match limit</td></tr>

View File

@ -283,6 +283,9 @@ PCRE2 NATIVE API MATCH CONTEXT FUNCTIONS
int pcre2_set_offset_limit(pcre2_match_context *mcontext,
PCRE2_SIZE value);
int pcre2_set_heap_limit(pcre2_match_context *mcontext,
uint32_t value);
int pcre2_set_match_limit(pcre2_match_context *mcontext,
uint32_t value);
@ -840,6 +843,7 @@ PCRE2 CONTEXTS
Set up a callout function
Set an offset limit for matching an unanchored pattern
Change the limit on the amount of heap used when matching
Change the backtracking match limit
Change the backtracking depth limit
Set custom memory management specifically for the match
@ -896,14 +900,44 @@ PCRE2 CONTEXTS
also within the offset limit. In other words, whichever limit comes
first is used.
int pcre2_set_heap_limit(pcre2_match_context *mcontext,
uint32_t value);
The heap_limit parameter specifies, in units of kilobytes, the maximum
amount of heap memory that pcre2_match() may use to hold backtracking
information when running an interpretive match. This limit does not
apply to matching with the JIT optimization, which has its own memory
control arrangements (see the pcre2jit documentation for more details),
nor does it apply to pcre2_dfa_match(). If the limit is reached, the
negative error code PCRE2_ERROR_HEAPLIMIT is returned. The default
limit is set when PCRE2 is built; the default default is very large and
is essentially "unlimited".
A value for the heap limit may also be supplied by an item at the start
of a pattern of the form
(*LIMIT_HEAP=ddd)
where ddd is a decimal number. However, such a setting is ignored
unless ddd is less than the limit set by the caller of pcre2_match()
or, if no such limit is set, less than the default.
The pcre2_match() function starts out using a 20K vector on the system
stack for recording backtracking points. The more nested backtracking
points there are (that is, the deeper the search tree), the more memory
is needed. Heap memory is used only if the initial vector is too
small. If the heap limit is set to a value less than 21 (in particular,
zero) no heap memory will be used. In this case, only patterns that do
not have a lot of nested backtracking can be successfully processed.
int pcre2_set_match_limit(pcre2_match_context *mcontext,
uint32_t value);
The match_limit parameter provides a means of preventing PCRE2 from
using up too many resources when processing patterns that are not going
to match, but which have a very large number of possibilities in their
search trees. The classic example is a pattern that uses nested unlim-
ited repeats.
using up too many computing resources when processing patterns that are
not going to match, but which have a very large number of possibilities
in their search trees. The classic example is a pattern that uses
nested unlimited repeats.
There is an internal counter in pcre2_match() that is incremented each
time round its main matching loop. If this value reaches the match
@ -938,15 +972,19 @@ PCRE2 CONTEXTS
pcre2_match(). Each time a nested backtracking point is passed, a new
memory "frame" is used to remember the state of matching at that point.
Thus, this parameter indirectly limits the amount of memory that is
used in a match.
used in a match. However, because the size of each memory "frame"
depends on the number of capturing parentheses, the actual memory limit
varies from pattern to pattern. This limit was more useful in versions
before 10.30, where function recursion was used for backtracking.
This limit is not relevant, and is ignored, when matching is done using
JIT compiled code. However, it is supported by pcre2_dfa_match(), which
uses it to limit the depth of internal recursive function calls that
implement lookaround assertions and pattern recursions. This is, there-
fore, an indirect limit on the amount of system stack that is used. A
recursive pattern such as /(.)(?1)/, when matched to a very long string
using pcre2_dfa_match(), can use a great deal of stack.
The depth limit is not relevant, and is ignored, when matching is done
using JIT compiled code. However, it is supported by pcre2_dfa_match(),
which uses it to limit the depth of internal recursive function calls
that implement atomic groups, lookaround assertions, and pattern recur-
sions. This is, therefore, an indirect limit on the amount of system
stack that is used. A recursive pattern such as /(.)(?1)/, when matched
to a very long string using pcre2_dfa_match(), can use a great deal of
stack.
The default value for the depth limit can be set when PCRE2 is built;
the default default is the same value as the default for the match
@ -999,6 +1037,12 @@ CHECKING BUILD-TIME OPTIONS
recursions and lookarounds in pcre2_dfa_match(). Further details are
given with pcre2_set_depth_limit() above.
PCRE2_CONFIG_HEAPLIMIT
The output is a uint32_t integer that gives, in kilobytes, the default
limit for the amount of heap memory used by pcre2_match(). Further
details are given with pcre2_set_heap_limit() above.
PCRE2_CONFIG_JIT
The output is a uint32_t integer that is set to one if support for
@ -1803,6 +1847,14 @@ INFORMATION ABOUT A COMPILED PATTERN
\r or \n or one of the equivalent hexadecimal or octal escape
sequences.
PCRE2_INFO_HEAPLIMIT
If the pattern set a heap memory limit by including an item of the form
(*LIMIT_HEAP=nnnn) at the start, the value is returned. The third argu-
ment should point to an unsigned 32-bit integer. If no such value has
been set, the call to pcre2_pattern_info() returns the error
PCRE2_ERROR_UNSET.
PCRE2_INFO_JCHANGED
Return 1 if the (?J) or (?-J) option setting is used in the pattern,
@ -2517,7 +2569,9 @@ ERROR RETURNS FROM pcre2_match()
If a pattern contains many nested backtracking points, heap memory is
used to remember them. This error is given when the memory allocation
function (default or custom) fails.
function (default or custom) fails. Note that a different error,
PCRE2_ERROR_HEAPLIMIT, is given if the amount of memory needed exceeds
the heap limit.
PCRE2_ERROR_NULL
@ -3187,7 +3241,7 @@ AUTHOR
REVISION
Last updated: 04 April 2017
Last updated: 11 April 2017
Copyright (c) 1997-2017 University of Cambridge.
------------------------------------------------------------------------------
@ -3427,19 +3481,40 @@ LIMITING PCRE2 RESOURCE USAGE
pcre2_dfa_match() matching function, but it does also limit JIT match-
ing (though the counting is done differently).
In some environments it is desirable to limit the depth of nested back-
tracking in order to restrict the maximum amount of heap memory that is
used. A second limit controls this; it defaults to the value that is
set for --with-match-limit. You can set a lower default limit by
adding, for example,
The pcre2_match() function starts out using a 20K vector on the system
stack to record backtracking points. The more nested backtracking
points there are (that is, the deeper the search tree), the more memory
is needed. If the initial vector is not large enough, heap memory is
used, up to a certain limit, which is specified in kilobytes. The limit
can be changed at run time, as described in the pcre2api documentation.
The default limit (in effect unlimited) is 20 million. You can change
this by a setting such as
--with-heap-limit=500
which limits the amount of heap to 500 kilobytes. This limit applies
only to interpretive matching in pcre2_match(). It does not apply when
JIT (which has its own memory arrangements) is used, nor does it apply
to pcre2_dfa_match().
You can also explicitly limit the depth of nested backtracking in the
pcre2_match() interpreter. This limit defaults to the value that is set
for --with-match-limit. You can set a lower default limit by adding,
for example,
--with-match-limit_depth=10000
to the configure command. This value can also be overridden at run
time. As well as applying to pcre2_match(), this limit also controls
the depth of recursive function calls in pcre2_dfa_match(). These are
used for lookaround assertions, atomic groups, and recursion within
patterns.
to the configure command. This value can be overridden at run time.
This depth limit indirectly limits the amount of heap memory that is
used, but because the size of each backtracking "frame" depends on the
number of capturing parentheses in a pattern, the amount of heap that
is used before the limit is reached varies from pattern to pattern.
This limit was more useful in versions before 10.30, where function
recursion was used for backtracking. However, as well as applying to
pcre2_match(), this limit also controls the depth of recursive function
calls in pcre2_dfa_match(). These are used for lookaround assertions,
atomic groups, and recursion within patterns. The limit does not apply
to JIT matching.
CREATING CHARACTER TABLES AT BUILD TIME
@ -3701,7 +3776,7 @@ AUTHOR
REVISION
Last updated: 31 March 2017
Last updated: 10 April 2017
Copyright (c) 1997-2017 University of Cambridge.
------------------------------------------------------------------------------
@ -5522,14 +5597,15 @@ SPECIAL START-OF-PATTERN ITEMS
attempt by the application to apply the JIT optimization by calling
pcre2_jit_compile() is ignored.
Setting match and backtracking depth limits
Setting match resource limits
The pcre2_match() function contains a counter that is incremented every
time it goes round its main loop. The caller of pcre2_match() can set a
limit on this counter, which therefore limits the amount of computing
resource used for a match. The maximum depth of nested backtracking can
also be limited, and this restricts the amount of heap memory that is
used.
also be limited; this indirectly restricts the amount of heap memory
that is used, but there is also an explicit memory limit that can be
set.
These facilities are provided to catch runaway matches that are pro-
voked by patterns with huge matching trees (a typical example is a pat-
@ -5538,6 +5614,7 @@ SPECIAL START-OF-PATTERN ITEMS
error return. The limits can also be set by items at the start of the
pattern of the form
(*LIMIT_HEAP=d)
(*LIMIT_MATCH=d)
(*LIMIT_DEPTH=d)
@ -5551,12 +5628,13 @@ SPECIAL START-OF-PATTERN ITEMS
Prior to release 10.30, LIMIT_DEPTH was called LIMIT_RECURSION. This
name is still recognized for backwards compatibility.
The match limit is used (but in a different way) when JIT is being
used, but it is not relevant, and is ignored, when matching with
pcre2_dfa_match(). However, the depth limit is relevant for DFA match-
ing, which uses function recursion for recursions within the pattern.
In this case, the depth limit controls the amount of system stack that
is used.
The heap limit applies only when the pcre2_match() interpreter is used
for matching. It does not apply to JIT or DFA matching. The match limit
is used (but in a different way) when JIT is being used, but it is not
relevant, and is ignored, when matching with pcre2_dfa_match(). The
depth limit is ignored by JIT but is relevant for DFA matching, which
uses function recursion for recursions within the pattern. In this
case, the depth limit controls the amount of system stack that is used.
Newline conventions
@ -8480,7 +8558,7 @@ AUTHOR
REVISION
Last updated: 03 April 2017
Last updated: 11 April 2017
Copyright (c) 1997-2017 University of Cambridge.
------------------------------------------------------------------------------
@ -8557,10 +8635,12 @@ STACK AND HEAP USAGE AT RUN TIME
sive function calls could use a great deal of stack, and this could
cause problems, but this usage has been eliminated. Backtracking posi-
tions are now explicitly remembered in memory frames controlled by the
code. An initial 10K vector of frames is allocated on the system stack
(enough for about 50 frames for small patterns), but if this is insuf-
ficient, heap memory is used. Rewriting patterns to be time-efficient,
as described below, may also reduce the memory requirements.
code. An initial 20K vector of frames is allocated on the system stack
(enough for about 100 frames for small patterns), but if this is insuf-
ficient, heap memory is used. The amount of heap memory can be limited;
if the limit is set to zero, only the initial stack vector is used.
Rewriting patterns to be time-efficient, as described below, may also
reduce the memory requirements.
In contrast to pcre2_match(), pcre2_dfa_match() does use recursive
function calls, but only for processing atomic groups, lookaround
@ -8706,7 +8786,7 @@ AUTHOR
REVISION
Last updated: 31 March 2017
Last updated: 08 April 2017
Copyright (c) 1997-2017 University of Cambridge.
------------------------------------------------------------------------------

View File

@ -1,4 +1,4 @@
.TH PCRE2_CONFIG 3 "24 March 2017" "PCRE2 10.30"
.TH PCRE2_CONFIG 3 "11 April 2017" "PCRE2 10.30"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@ -31,6 +31,7 @@ point to a uint32_t integer variable. The available codes are:
PCRE2_CONFIG_BSR Indicates what \eR matches by default:
PCRE2_BSR_UNICODE
PCRE2_BSR_ANYCRLF
PCRE2_CONFIG_HEAPLIMIT Default heap memory limit
PCRE2_CONFIG_DEPTHLIMIT Default backtracking depth limit
.\" JOIN
PCRE2_CONFIG_JIT Availability of just-in-time compiler

View File

@ -1,4 +1,4 @@
.TH PCRE2_MATCH 3 "04 April 2017" "PCRE2 10.30"
.TH PCRE2_MATCH 3 "11 April 2017" "PCRE2 10.30"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@ -32,6 +32,7 @@ A match context is needed only if you want to:
.sp
Set up a callout function
Set a matching offset limit
Change the heap memory limit
Change the backtracking match limit
Change the backtracking depth limit
Set custom memory management specifically for the match

View File

@ -1,4 +1,4 @@
.TH PCRE2_PATTERN_INFO 3 "25 March 2017" "PCRE2 10.30"
.TH PCRE2_PATTERN_INFO 3 "11 April 2017" "PCRE2 10.30"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
@ -43,6 +43,9 @@ request are as follows:
.\" JOIN
PCRE2_INFO_HASCRORLF Return 1 if explicit CR or LF matches
exist in the pattern
.\" JOIN
PCRE2_INFO_HEAPLIMIT Heap memory limit if set,
otherwise PCRE2_ERROR_UNSET
PCRE2_INFO_JCHANGED Return 1 if (?J) or (?-J) was used
PCRE2_INFO_JITSIZE Size of JIT compiled code, or 0
PCRE2_INFO_LASTCODETYPE Type of must-be-present information

View File

@ -0,0 +1,28 @@
.TH PCRE2_SET_DEPTH_LIMIT 3 "11 April 2017" "PCRE2 10.30"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH SYNOPSIS
.rs
.sp
.B #include <pcre2.h>
.PP
.nf
.B int pcre2_set_heap_limit(pcre2_match_context *\fImcontext\fP,
.B " uint32_t \fIvalue\fP);"
.fi
.
.SH DESCRIPTION
.rs
.sp
This function sets the backtracking heap limit field in a match context. The
result is always zero.
.P
There is a complete description of the PCRE2 native API in the
.\" HREF
\fBpcre2api\fP
.\"
page and a description of the POSIX API in the
.\" HREF
\fBpcre2posix\fP
.\"
page.

View File

@ -1,4 +1,4 @@
.TH PCRE2API 3 "04 April 2017" "PCRE2 10.30"
.TH PCRE2API 3 "11 April 2017" "PCRE2 10.30"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.sp
@ -123,6 +123,9 @@ document for an overview of all the PCRE2 documentation.
.B int pcre2_set_offset_limit(pcre2_match_context *\fImcontext\fP,
.B " PCRE2_SIZE \fIvalue\fP);"
.sp
.B int pcre2_set_heap_limit(pcre2_match_context *\fImcontext\fP,
.B " uint32_t \fIvalue\fP);"
.sp
.B int pcre2_set_match_limit(pcre2_match_context *\fImcontext\fP,
.B " uint32_t \fIvalue\fP);"
.sp
@ -753,6 +756,7 @@ A match context is required if you want to:
.sp
Set up a callout function
Set an offset limit for matching an unanchored pattern
Change the limit on the amount of heap used when matching
Change the backtracking match limit
Change the backtracking depth limit
Set custom memory management specifically for the match
@ -816,14 +820,49 @@ limit, a match must occur in the first line and also within the offset limit.
In other words, whichever limit comes first is used.
.sp
.nf
.B int pcre2_set_heap_limit(pcre2_match_context *\fImcontext\fP,
.B " uint32_t \fIvalue\fP);"
.fi
.sp
The \fIheap_limit\fP parameter specifies, in units of kilobytes, the maximum
amount of heap memory that \fBpcre2_match()\fP may use to hold backtracking
information when running an interpretive match. This limit does not apply to
matching with the JIT optimization, which has its own memory control
arrangements (see the
.\" HREF
\fBpcre2jit\fP
.\"
documentation for more details), nor does it apply to \fBpcre2_dfa_match()\fP.
If the limit is reached, the negative error code PCRE2_ERROR_HEAPLIMIT is
returned. The default limit is set when PCRE2 is built; the default default is
very large and is essentially "unlimited".
.P
A value for the heap limit may also be supplied by an item at the start of a
pattern of the form
.sp
(*LIMIT_HEAP=ddd)
.sp
where ddd is a decimal number. However, such a setting is ignored unless ddd is
less than the limit set by the caller of \fBpcre2_match()\fP or, if no such
limit is set, less than the default.
.P
The \fBpcre2_match()\fP function starts out using a 20K vector on the system
stack for recording backtracking points. The more nested backtracking points
there are (that is, the deeper the search tree), the more memory is needed.
Heap memory is used only if the initial vector is too small. If the heap limit
is set to a value less than 21 (in particular, zero) no heap memory will be
used. In this case, only patterns that do not have a lot of nested backtracking
can be successfully processed.
.sp
.nf
.B int pcre2_set_match_limit(pcre2_match_context *\fImcontext\fP,
.B " uint32_t \fIvalue\fP);"
.fi
.sp
The \fImatch_limit\fP parameter provides a means of preventing PCRE2 from using
up too many resources when processing patterns that are not going to match, but
which have a very large number of possibilities in their search trees. The
classic example is a pattern that uses nested unlimited repeats.
up too many computing resources when processing patterns that are not going to
match, but which have a very large number of possibilities in their search
trees. The classic example is a pattern that uses nested unlimited repeats.
.P
There is an internal counter in \fBpcre2_match()\fP that is incremented each
time round its main matching loop. If this value reaches the match limit,
@ -859,15 +898,19 @@ limit is set, less than the default.
This parameter limits the depth of nested backtracking in \fBpcre2_match()\fP.
Each time a nested backtracking point is passed, a new memory "frame" is used
to remember the state of matching at that point. Thus, this parameter
indirectly limits the amount of memory that is used in a match.
indirectly limits the amount of memory that is used in a match. However,
because the size of each memory "frame" depends on the number of capturing
parentheses, the actual memory limit varies from pattern to pattern. This limit
was more useful in versions before 10.30, where function recursion was used for
backtracking.
.P
This limit is not relevant, and is ignored, when matching is done using JIT
compiled code. However, it is supported by \fBpcre2_dfa_match()\fP, which uses
it to limit the depth of internal recursive function calls that implement
lookaround assertions and pattern recursions. This is, therefore, an indirect
limit on the amount of system stack that is used. A recursive pattern such as
/(.)(?1)/, when matched to a very long string using \fBpcre2_dfa_match()\fP,
can use a great deal of stack.
The depth limit is not relevant, and is ignored, when matching is done using
JIT compiled code. However, it is supported by \fBpcre2_dfa_match()\fP, which
uses it to limit the depth of internal recursive function calls that implement
atomic groups, lookaround assertions, and pattern recursions. This is,
therefore, an indirect limit on the amount of system stack that is used. A
recursive pattern such as /(.)(?1)/, when matched to a very long string using
\fBpcre2_dfa_match()\fP, can use a great deal of stack.
.P
The default value for the depth limit can be set when PCRE2 is built; the
default default is the same value as the default for the match limit. If the
@ -921,6 +964,12 @@ The output is a uint32_t integer that gives the default limit for the depth of
nested backtracking in \fBpcre2_match()\fP or the depth of nested recursions
and lookarounds in \fBpcre2_dfa_match()\fP. Further details are given with
\fBpcre2_set_depth_limit()\fP above.
.sp
PCRE2_CONFIG_HEAPLIMIT
.sp
The output is a uint32_t integer that gives, in kilobytes, the default limit
for the amount of heap memory used by \fBpcre2_match()\fP. Further details are
given with \fBpcre2_set_heap_limit()\fP above.
.sp
PCRE2_CONFIG_JIT
.sp
@ -1784,6 +1833,13 @@ Return 1 if the pattern contains any explicit matches for CR or LF characters,
otherwise 0. The third argument should point to an \fBuint32_t\fP variable. An
explicit match is either a literal CR or LF character, or \er or \en or one of
the equivalent hexadecimal or octal escape sequences.
.sp
PCRE2_INFO_HEAPLIMIT
.sp
If the pattern set a heap memory limit by including an item of the form
(*LIMIT_HEAP=nnnn) at the start, the value is returned. The third argument
should point to an unsigned 32-bit integer. If no such value has been set, the
call to \fBpcre2_pattern_info()\fP returns the error PCRE2_ERROR_UNSET.
.sp
PCRE2_INFO_JCHANGED
.sp
@ -2603,7 +2659,8 @@ The backtracking match limit was reached.
.sp
If a pattern contains many nested backtracking points, heap memory is used to
remember them. This error is given when the memory allocation function (default
or custom) fails.
or custom) fails. Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given
if the amount of memory needed exceeds the heap limit.
.sp
PCRE2_ERROR_NULL
.sp
@ -3322,6 +3379,6 @@ Cambridge, England.
.rs
.sp
.nf
Last updated: 04 April 2017
Last updated: 11 April 2017
Copyright (c) 1997-2017 University of Cambridge.
.fi

View File

@ -1,4 +1,4 @@
.TH PCRE2BUILD 3 "31 March 2017" "PCRE2 10.30"
.TH PCRE2BUILD 3 "10 April 2017" "PCRE2 10.30"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.
@ -260,17 +260,42 @@ to the \fBconfigure\fP command. This setting has no effect on the
\fBpcre2_dfa_match()\fP matching function, but it does also limit JIT matching
(though the counting is done differently).
.P
In some environments it is desirable to limit the depth of nested backtracking
in order to restrict the maximum amount of heap memory that is used. A second
limit controls this; it defaults to the value that is set for
--with-match-limit. You can set a lower default limit by adding, for example,
The \fBpcre2_match()\fP function starts out using a 20K vector on the system
stack to record backtracking points. The more nested backtracking points there
are (that is, the deeper the search tree), the more memory is needed. If the
initial vector is not large enough, heap memory is used, up to a certain limit,
which is specified in kilobytes. The limit can be changed at run time, as
described in the
.\" HREF
\fBpcre2api\fP
.\"
documentation. The default limit (in effect unlimited) is 20 million. You can
change this by a setting such as
.sp
--with-heap-limit=500
.sp
which limits the amount of heap to 500 kilobytes. This limit applies only to
interpretive matching in pcre2_match(). It does not apply when JIT (which has
its own memory arrangements) is used, nor does it apply to
\fBpcre2_dfa_match()\fP.
.P
You can also explicitly limit the depth of nested backtracking in the
\fBpcre2_match()\fP interpreter. This limit defaults to the value that is set
for --with-match-limit. You can set a lower default limit by adding, for
example,
.sp
--with-match-limit_depth=10000
.sp
to the \fBconfigure\fP command. This value can also be overridden at run time.
As well as applying to \fBpcre2_match()\fP, this limit also controls the depth
of recursive function calls in \fBpcre2_dfa_match()\fP. These are used for
lookaround assertions, atomic groups, and recursion within patterns.
to the \fBconfigure\fP command. This value can be overridden at run time. This
depth limit indirectly limits the amount of heap memory that is used, but
because the size of each backtracking "frame" depends on the number of
capturing parentheses in a pattern, the amount of heap that is used before the
limit is reached varies from pattern to pattern. This limit was more useful in
versions before 10.30, where function recursion was used for backtracking.
However, as well as applying to \fBpcre2_match()\fP, this limit also controls
the depth of recursive function calls in \fBpcre2_dfa_match()\fP. These are
used for lookaround assertions, atomic groups, and recursion within patterns.
The limit does not apply to JIT matching.
.
.
.SH "CREATING CHARACTER TABLES AT BUILD TIME"
@ -547,6 +572,6 @@ Cambridge, England.
.rs
.sp
.nf
Last updated: 31 March 2017
Last updated: 10 April 2017
Copyright (c) 1997-2017 University of Cambridge.
.fi

View File

@ -1,4 +1,4 @@
.TH PCRE2GREP 1 "06 April 2017" "PCRE2 10.30"
.TH PCRE2GREP 1 "11 April 2017" "PCRE2 10.30"
.SH NAME
pcre2grep - a grep with Perl-compatible regular expressions.
.SH SYNOPSIS
@ -347,6 +347,9 @@ file names are shown when multiple files are searched. For matching lines, the
file name is followed by a colon; for context lines, a hyphen separator is used.
If a line number is also being output, it follows the file name.
.TP
\fB--heap-limit\fP=\fInumber\fP
See \fB--match-limit\fP below.
.TP
\fB--help\fP
Output a help message, giving brief details of the command options and file
type support, and then exit. Anything else on the command line is
@ -436,7 +439,7 @@ used. There is no short form for this option.
\fB--match-limit\fP=\fInumber\fP
Processing some regular expression patterns may take a very long time to search
for all possible matching strings. Others may require a very large amount of
memory. There are two options that set resource limits for matching.
memory. There are three options that set resource limits for matching.
.sp
The \fB--match-limit\fP option provides a means of limiting computing resource
usage when processing patterns that are not going to match, but which have a
@ -445,12 +448,22 @@ is a pattern that uses nested unlimited repeats. Internally, PCRE2 has a
counter that is incremented each time around its main processing loop. If the
value set by \fB--match-limit\fP is reached, an error occurs.
.sp
The \fB--heap-limit\fP option specifies, as a number of kilobytes, the amount
of heap memory that may be used for matching. Heap memory is needed only if
matching the pattern requires a significant number of nested backtracking
points to be remembered. This parameter can be set to zero to forbid the use of
heap memory altogether.
.sp
The \fB--depth-limit\fP option limits the depth of nested backtracking points,
which in turn limits the amount of memory that is used. This limit is of use
only if it is set smaller than \fB--match-limit\fP.
which indirectly limits the amount of memory that is used. The amount of memory
needed for each backtracking point depends on the number of capturing
parentheses in the pattern, so the amount of memory that is used before this
limit acts varies from pattern to pattern. This limit is of use only if it is
set smaller than \fB--match-limit\fP.
.sp
There are no short forms for these options. The default settings are specified
when the PCRE2 library is compiled, with the default default being 10 million.
when the PCRE2 library is compiled, with the default defaults being very large
and so effectively unlimited.
.TP
\fB--max-buffer-size=\fInumber\fP
This limits the expansion of the processing buffer, whose initial size can be
@ -670,11 +683,12 @@ Many of the short and long forms of \fBpcre2grep\fP's options are the same
as in the GNU \fBgrep\fP program. Any long option of the form
\fB--xxx-regexp\fP (GNU terminology) is also available as \fB--xxx-regex\fP
(PCRE2 terminology). However, the \fB--depth-limit\fP, \fB--file-list\fP,
\fB--file-offsets\fP, \fB--include-dir\fP, \fB--line-offsets\fP,
\fB--locale\fP, \fB--match-limit\fP, \fB-M\fP, \fB--multiline\fP, \fB-N\fP,
\fB--newline\fP, \fB--om-separator\fP, \fB--output\fP, \fB-u\fP, and
\fB--utf-8\fP options are specific to \fBpcre2grep\fP, as is the use of the
\fB--only-matching\fP option with a capturing parentheses number.
\fB--file-offsets\fP, \fB--heap-limit\fP, \fB--include-dir\fP,
\fB--line-offsets\fP, \fB--locale\fP, \fB--match-limit\fP, \fB-M\fP,
\fB--multiline\fP, \fB-N\fP, \fB--newline\fP, \fB--om-separator\fP,
\fB--output\fP, \fB-u\fP, and \fB--utf-8\fP options are specific to
\fBpcre2grep\fP, as is the use of the \fB--only-matching\fP option with a
capturing parentheses number.
.P
Although most of the common options work the same way, a few are different in
\fBpcre2grep\fP. For example, the \fB--include\fP option's argument is a glob
@ -799,9 +813,9 @@ message and the line that caused the problem to the standard error stream. If
there are more than 20 such errors, \fBpcre2grep\fP gives up.
.P
The \fB--match-limit\fP option of \fBpcre2grep\fP can be used to set the
overall resource limit; there is a second option called \fB--depth-limit\fP
that sets a limit on the amount of memory that is used (see the discussion of
these options above).
overall resource limit. There are also other limits that affect the amount of
memory used during matching; see the discussion of \fB--heap-limit\fP and
\fB--depth-limit\fP above.
.
.
.SH DIAGNOSTICS
@ -834,6 +848,6 @@ Cambridge, England.
.rs
.sp
.nf
Last updated: 06 April 2017
Last updated: 11 April 2017
Copyright (c) 1997-2017 University of Cambridge.
.fi

View File

@ -383,6 +383,9 @@ OPTIONS
colon; for context lines, a hyphen separator is used. If a
line number is also being output, it follows the file name.
--heap-limit=number
See --match-limit below.
--help Output a help message, giving brief details of the command
options and file type support, and then exit. Anything else
on the command line is ignored.
@ -482,7 +485,7 @@ OPTIONS
--match-limit=number
Processing some regular expression patterns may take a very
long time to search for all possible matching strings. Others
may require a very large amount of memory. There are two
may require a very large amount of memory. There are three
options that set resource limits for matching.
The --match-limit option provides a means of limiting comput-
@ -494,14 +497,25 @@ OPTIONS
processing loop. If the value set by --match-limit is
reached, an error occurs.
The --heap-limit option specifies, as a number of kilobytes,
the amount of heap memory that may be used for matching. Heap
memory is needed only if matching the pattern requires a sig-
nificant number of nested backtracking points to be remem-
bered. This parameter can be set to zero to forbid the use of
heap memory altogether.
The --depth-limit option limits the depth of nested back-
tracking points, which in turn limits the amount of memory
that is used. This limit is of use only if it is set smaller
than --match-limit.
tracking points, which indirectly limits the amount of memory
that is used. The amount of memory needed for each backtrack-
ing point depends on the number of capturing parentheses in
the pattern, so the amount of memory that is used before this
limit acts varies from pattern to pattern. This limit is of
use only if it is set smaller than --match-limit.
There are no short forms for these options. The default set-
tings are specified when the PCRE2 library is compiled, with
the default default being 10 million.
the default defaults being very large and so effectively
unlimited.
--max-buffer-size=number
This limits the expansion of the processing buffer, whose
@ -748,11 +762,11 @@ OPTIONS COMPATIBILITY
Many of the short and long forms of pcre2grep's options are the same as
in the GNU grep program. Any long option of the form --xxx-regexp (GNU
terminology) is also available as --xxx-regex (PCRE2 terminology). How-
ever, the --depth-limit, --file-list, --file-offsets, --include-dir,
--line-offsets, --locale, --match-limit, -M, --multiline, -N, --new-
line, --om-separator, --output, -u, and --utf-8 options are specific to
pcre2grep, as is the use of the --only-matching option with a capturing
parentheses number.
ever, the --depth-limit, --file-list, --file-offsets, --heap-limit,
--include-dir, --line-offsets, --locale, --match-limit, -M, --multi-
line, -N, --newline, --om-separator, --output, -u, and --utf-8 options
are specific to pcre2grep, as is the use of the --only-matching option
with a capturing parentheses number.
Although most of the common options work the same way, a few are dif-
ferent in pcre2grep. For example, the --include option's argument is a
@ -873,9 +887,9 @@ MATCHING ERRORS
such errors, pcre2grep gives up.
The --match-limit option of pcre2grep can be used to set the overall
resource limit; there is a second option called --depth-limit that sets
a limit on the amount of memory that is used (see the discussion of
these options above).
resource limit. There are also other limits that affect the amount of
memory used during matching; see the discussion of --heap-limit and
--depth-limit above.
DIAGNOSTICS
@ -901,5 +915,5 @@ AUTHOR
REVISION
Last updated: 06 April 2017
Last updated: 11 April 2017
Copyright (c) 1997-2017 University of Cambridge.

View File

@ -1,4 +1,4 @@
.TH PCRE2PATTERN 3 "03 April 2017" "PCRE2 10.30"
.TH PCRE2PATTERN 3 "11 April 2017" "PCRE2 10.30"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH "PCRE2 REGULAR EXPRESSION DETAILS"
@ -138,14 +138,15 @@ the application to apply the JIT optimization by calling
\fBpcre2_jit_compile()\fP is ignored.
.
.
.SS "Setting match and backtracking depth limits"
.SS "Setting match resource limits"
.rs
.sp
The pcre2_match() function contains a counter that is incremented every time it
goes round its main loop. The caller of \fBpcre2_match()\fP can set a limit on
this counter, which therefore limits the amount of computing resource used for
a match. The maximum depth of nested backtracking can also be limited, and this
restricts the amount of heap memory that is used.
a match. The maximum depth of nested backtracking can also be limited; this
indirectly restricts the amount of heap memory that is used, but there is also
an explicit memory limit that can be set.
.P
These facilities are provided to catch runaway matches that are provoked by
patterns with huge matching trees (a typical example is a pattern with nested
@ -153,6 +154,7 @@ unlimited repeats applied to a long string that does not match). When one of
these limits is reached, \fBpcre2_match()\fP gives an error return. The limits
can also be set by items at the start of the pattern of the form
.sp
(*LIMIT_HEAP=d)
(*LIMIT_MATCH=d)
(*LIMIT_DEPTH=d)
.sp
@ -165,11 +167,13 @@ setting of one of these limits, the lower value is used.
Prior to release 10.30, LIMIT_DEPTH was called LIMIT_RECURSION. This name is
still recognized for backwards compatibility.
.P
The match limit is used (but in a different way) when JIT is being used, but it
is not relevant, and is ignored, when matching with \fBpcre2_dfa_match()\fP.
However, the depth limit is relevant for DFA matching, which uses function
recursion for recursions within the pattern. In this case, the depth limit
controls the amount of system stack that is used.
The heap limit applies only when the \fBpcre2_match()\fP interpreter is used
for matching. It does not apply to JIT or DFA matching. The match limit is used
(but in a different way) when JIT is being used, but it is not relevant, and is
ignored, when matching with \fBpcre2_dfa_match()\fP. The depth limit is ignored
by JIT but is relevant for DFA matching, which uses function recursion for
recursions within the pattern. In this case, the depth limit controls the
amount of system stack that is used.
.
.
.\" HTML <a name="newlines"></a>
@ -3465,6 +3469,6 @@ Cambridge, England.
.rs
.sp
.nf
Last updated: 03 April 2017
Last updated: 11 April 2017
Copyright (c) 1997-2017 University of Cambridge.
.fi

View File

@ -1,4 +1,4 @@
.TH PCRE2PERFORM 3 "31 March 2017" "PCRE2 10.30"
.TH PCRE2PERFORM 3 "08 April 2017" "PCRE2 10.30"
.SH NAME
PCRE2 - Perl-compatible regular expressions (revised API)
.SH "PCRE2 PERFORMANCE"
@ -69,11 +69,12 @@ From release 10.30, the interpretive (non-JIT) version of \fBpcre2_match()\fP
uses very little system stack at run time. In earlier releases recursive
function calls could use a great deal of stack, and this could cause problems,
but this usage has been eliminated. Backtracking positions are now explicitly
remembered in memory frames controlled by the code. An initial 10K vector of
frames is allocated on the system stack (enough for about 50 frames for small
patterns), but if this is insufficient, heap memory is used. Rewriting patterns
to be time-efficient, as described below, may also reduce the memory
requirements.
remembered in memory frames controlled by the code. An initial 20K vector of
frames is allocated on the system stack (enough for about 100 frames for small
patterns), but if this is insufficient, heap memory is used. The amount of heap
memory can be limited; if the limit is set to zero, only the initial stack
vector is used. Rewriting patterns to be time-efficient, as described below,
may also reduce the memory requirements.
.P
In contrast to \fBpcre2_match()\fP, \fBpcre2_dfa_match()\fP does use recursive
function calls, but only for processing atomic groups, lookaround assertions,
@ -231,6 +232,6 @@ Cambridge, England.
.rs
.sp
.nf
Last updated: 31 March 2017
Last updated: 08 April 2017
Copyright (c) 1997-2017 University of Cambridge.
.fi

View File

@ -1,4 +1,4 @@
.TH PCRE2TEST 1 "08 April 2017" "PCRE 10.30"
.TH PCRE2TEST 1 "11 April 2017" "PCRE 10.30"
.SH NAME
pcre2test - a program for testing Perl-compatible regular expressions.
.SH SYNOPSIS
@ -1063,6 +1063,7 @@ pattern.
get=<number or name> extract captured substring
getall extract all captured substrings
/g global global matching
heap_limit=<n> set a limit on heap memory
jitstack=<n> set size of JIT stack
mark show mark values
match_limit=<n> set a match limit
@ -1293,11 +1294,11 @@ stack that is larger than the default 32K is necessary only for very
complicated patterns.
.
.
.SS "Setting match and depth limits"
.SS "Setting heap, match, and depth limits"
.rs
.sp
The \fBmatch_limit\fP and \fBdepth_limit\fP modifiers set the appropriate
limits in the match context. These values are ignored when the
The \fBheap_limit\fP, \fBmatch_limit\fP, and \fBdepth_limit\fP modifiers set
the appropriate limits in the match context. These values are ignored when the
\fBfind_limits\fP modifier is specified.
.
.
@ -1306,8 +1307,8 @@ limits in the match context. These values are ignored when the
.sp
If the \fBfind_limits\fP modifier is present on a subject line, \fBpcre2test\fP
calls the relevant matching function several times, setting different values in
the match context via \fBpcre2_set_match_limit()\fP or
\fBpcre2_set_depth_limit()\fP until it finds the minimum values for each
the match context via \fBpcre2_set_heap_limit(), \fBpcre2_set_match_limit()\fP,
or \fBpcre2_set_depth_limit()\fP until it finds the minimum values for each
parameter that allows the match to complete without error.
.P
If JIT is being used, only the match limit is relevant. If DFA matching is
@ -1320,8 +1321,8 @@ numbers of matching possibilities, it can become large very quickly with
increasing length of subject string.
.P
For non-DFA matching, the minimum \fIdepth_limit\fP number is a measure of how
much memory for recording backtracking points is needed to complete the match
attempt. In the case of DFA matching, \fIdepth_limit\fP controls the depth of
much nested backtracking happens (that is, how deeply the pattern's tree is
searched). In the case of DFA matching, \fIdepth_limit\fP controls the depth of
recursive calls of the internal function that is used for handling pattern
recursion, lookaround assertions, and atomic groups.
.
@ -1782,6 +1783,6 @@ Cambridge, England.
.rs
.sp
.nf
Last updated: 08 April 2017
Last updated: 11 April 2017
Copyright (c) 1997-2017 University of Cambridge.
.fi

View File

@ -185,6 +185,12 @@ COMMAND LINE OPTIONS
successful compilation, each pattern is passed to the just-
in-time compiler, if available.
-jitverify
Behave as if each pattern line has the jitverify modifier;
after successful compilation, each pattern is passed to the
just-in-time compiler, if available, and the use of JIT is
verified.
-pattern modifier-list
Behave as if each pattern line contains the given modifiers.
@ -972,6 +978,7 @@ SUBJECT MODIFIERS
get=<number or name> extract captured substring
getall extract all captured substrings
/g global global matching
heap_limit=<n> set a limit on heap memory
jitstack=<n> set size of JIT stack
mark show mark values
match_limit=<n> set a match limit
@ -1196,19 +1203,20 @@ SUBJECT MODIFIERS
Providing a stack that is larger than the default 32K is necessary only
for very complicated patterns.
Setting match and depth limits
Setting heap, match, and depth limits
The match_limit and depth_limit modifiers set the appropriate limits in
the match context. These values are ignored when the find_limits modi-
fier is specified.
The heap_limit, match_limit, and depth_limit modifiers set the appro-
priate limits in the match context. These values are ignored when the
find_limits modifier is specified.
Finding minimum limits
If the find_limits modifier is present on a subject line, pcre2test
calls the relevant matching function several times, setting different
values in the match context via pcre2_set_match_limit() or
pcre2_set_depth_limit() until it finds the minimum values for each
parameter that allows the match to complete without error.
values in the match context via pcre2_set_heap_limit(),
pcre2_set_match_limit(), or pcre2_set_depth_limit() until it finds the
minimum values for each parameter that allows the match to complete
without error.
If JIT is being used, only the match limit is relevant. If DFA matching
is being used, only the depth limit is relevant.
@ -1220,8 +1228,8 @@ SUBJECT MODIFIERS
quickly with increasing length of subject string.
For non-DFA matching, the minimum depth_limit number is a measure of
how much memory for recording backtracking points is needed to complete
the match attempt. In the case of DFA matching, depth_limit controls
how much nested backtracking happens (that is, how deeply the pattern's
tree is searched). In the case of DFA matching, depth_limit controls
the depth of recursive calls of the internal function that is used for
handling pattern recursion, lookaround assertions, and atomic groups.
@ -1632,5 +1640,5 @@ AUTHOR
REVISION
Last updated: 04 April 2017
Last updated: 11 April 2017
Copyright (c) 1997-2017 University of Cambridge.

View File

@ -132,6 +132,10 @@ sure both macros are undefined; an emulation function will then be used. */
/* Define to 1 if you have the <zlib.h> header file. */
#undef HAVE_ZLIB_H
/* This limits the amount of memory that pcre2_match() may use while matching
a pattern. The value is in kilobytes. */
#undef HEAP_LIMIT
/* The value of LINK_SIZE determines the number of bytes used to store links
as offsets within the compiled regex. The default is 2, which allows for
compiled patterns up to 64K long. This covers the vast majority of cases.
@ -143,7 +147,7 @@ sure both macros are undefined; an emulation function will then be used. */
#undef LT_OBJDIR
/* The value of MATCH_LIMIT determines the default number of times the
internal match() function can record a backtrack position during a single
pcre2_match() function can record a backtrack position during a single
matching attempt. There is a runtime interface for setting a different
limit. The limit exists in order to catch runaway regular expressions that
take for ever to determine that they do not match. The default is set very

View File

@ -268,6 +268,7 @@ numbers must not be changed. */
#define PCRE2_ERROR_BADSUBSPATTERN (-60)
#define PCRE2_ERROR_TOOMANYREPLACE (-61)
#define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
#define PCRE2_ERROR_HEAPLIMIT (-63)
/* Request types for pcre2_pattern_info() */
@ -297,6 +298,7 @@ numbers must not be changed. */
#define PCRE2_INFO_SIZE 22
#define PCRE2_INFO_HASBACKSLASHC 23
#define PCRE2_INFO_FRAMESIZE 24
#define PCRE2_INFO_HEAPLIMIT 25
/* Request types for pcre2_config(). */
@ -313,6 +315,7 @@ numbers must not be changed. */
#define PCRE2_CONFIG_UNICODE 9
#define PCRE2_CONFIG_UNICODE_VERSION 10
#define PCRE2_CONFIG_VERSION 11
#define PCRE2_CONFIG_HEAPLIMIT 12
/* Types for code units in patterns and subject strings. */
@ -452,6 +455,8 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
int (*)(pcre2_callout_block *, void *), void *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_heap_limit(pcre2_match_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_match_limit(pcre2_match_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
@ -676,6 +681,7 @@ pcre2_compile are called by application code. */
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
#define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_)
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)

View File

@ -268,6 +268,7 @@ numbers must not be changed. */
#define PCRE2_ERROR_BADSUBSPATTERN (-60)
#define PCRE2_ERROR_TOOMANYREPLACE (-61)
#define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
#define PCRE2_ERROR_HEAPLIMIT (-63)
/* Request types for pcre2_pattern_info() */
@ -297,6 +298,7 @@ numbers must not be changed. */
#define PCRE2_INFO_SIZE 22
#define PCRE2_INFO_HASBACKSLASHC 23
#define PCRE2_INFO_FRAMESIZE 24
#define PCRE2_INFO_HEAPLIMIT 25
/* Request types for pcre2_config(). */
@ -313,6 +315,7 @@ numbers must not be changed. */
#define PCRE2_CONFIG_UNICODE 9
#define PCRE2_CONFIG_UNICODE_VERSION 10
#define PCRE2_CONFIG_VERSION 11
#define PCRE2_CONFIG_HEAPLIMIT 12
/* Types for code units in patterns and subject strings. */
@ -452,6 +455,8 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
int (*)(pcre2_callout_block *, void *), void *); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_heap_limit(pcre2_match_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
pcre2_set_match_limit(pcre2_match_context *, uint32_t); \
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
@ -676,6 +681,7 @@ pcre2_compile are called by application code. */
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
#define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_)
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)

View File

@ -727,6 +727,7 @@ enum { PSO_OPT, /* Value is an option bit */
PSO_FLG, /* Value is a flag bit */
PSO_NL, /* Value is a newline type */
PSO_BSR, /* Value is a \R type */
PSO_LIMH, /* Read integer value for heap limit */
PSO_LIMM, /* Read integer value for match limit */
PSO_LIMD }; /* Read integer value for depth limit */
@ -749,6 +750,7 @@ static pso pso_list[] = {
{ (uint8_t *)STRING_NO_DOTSTAR_ANCHOR_RIGHTPAR, 18, PSO_OPT, PCRE2_NO_DOTSTAR_ANCHOR },
{ (uint8_t *)STRING_NO_JIT_RIGHTPAR, 7, PSO_FLG, PCRE2_NOJIT },
{ (uint8_t *)STRING_NO_START_OPT_RIGHTPAR, 13, PSO_OPT, PCRE2_NO_START_OPTIMIZE },
{ (uint8_t *)STRING_LIMIT_HEAP_EQ, 11, PSO_LIMH, 0 },
{ (uint8_t *)STRING_LIMIT_MATCH_EQ, 12, PSO_LIMM, 0 },
{ (uint8_t *)STRING_LIMIT_DEPTH_EQ, 12, PSO_LIMD, 0 },
{ (uint8_t *)STRING_LIMIT_RECURSION_EQ, 16, PSO_LIMD, 0 },
@ -8853,6 +8855,7 @@ uint32_t firstcu, reqcu; /* Value of first/req code unit */
uint32_t setflags = 0; /* NL and BSR set flags */
uint32_t skipatstart; /* When checking (*UTF) etc */
uint32_t limit_heap = UINT32_MAX;
uint32_t limit_match = UINT32_MAX; /* Unset match limits */
uint32_t limit_depth = UINT32_MAX;
@ -9026,6 +9029,7 @@ while (patlen - skipatstart >= 2 &&
case PSO_LIMM:
case PSO_LIMD:
case PSO_LIMH:
c = 0;
pp = skipatstart;
if (!IS_DIGIT(ptr[pp]))
@ -9045,7 +9049,8 @@ while (patlen - skipatstart >= 2 &&
ptr += pp;
goto HAD_EARLY_ERROR;
}
if (p->type == PSO_LIMM) limit_match = c;
if (p->type == PSO_LIMH) limit_heap = c;
else if (p->type == PSO_LIMM) limit_match = c;
else limit_depth = c;
skipatstart += pp - skipatstart;
break;
@ -9288,6 +9293,7 @@ re->magic_number = MAGIC_NUMBER;
re->compile_options = options;
re->overall_options = cb.external_options;
re->flags = PCRE2_CODE_UNIT_WIDTH/8 | cb.external_flags | setflags;
re->limit_heap = limit_heap;
re->limit_match = limit_match;
re->limit_depth = limit_depth;
re->first_codeunit = 0;

View File

@ -84,6 +84,7 @@ if (where == NULL) /* Requests a length */
return PCRE2_ERROR_BADOPTION;
case PCRE2_CONFIG_BSR:
case PCRE2_CONFIG_HEAPLIMIT:
case PCRE2_CONFIG_JIT:
case PCRE2_CONFIG_LINKSIZE:
case PCRE2_CONFIG_MATCHLIMIT:
@ -116,6 +117,10 @@ switch (what)
#endif
break;
case PCRE2_CONFIG_HEAPLIMIT:
*((uint32_t *)where) = HEAP_LIMIT;
break;
case PCRE2_CONFIG_JIT:
#ifdef SUPPORT_JIT
*((uint32_t *)where) = 1;

View File

@ -168,6 +168,7 @@ const pcre2_match_context PRIV(default_match_context) = {
NULL,
NULL,
PCRE2_UNSET, /* Offset limit */
HEAP_LIMIT,
MATCH_LIMIT,
MATCH_LIMIT_DEPTH };
@ -346,6 +347,13 @@ mcontext->callout_data = callout_data;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_heap_limit(pcre2_match_context *mcontext, uint32_t limit)
{
mcontext->heap_limit = limit;
return 0;
}
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
pcre2_set_match_limit(pcre2_match_context *mcontext, uint32_t limit)
{

View File

@ -256,6 +256,7 @@ static const unsigned char match_error_texts[] =
"match with end before start is not supported\0"
"too many replacements (more than INT_MAX)\0"
"bad serialized data\0"
"heap limit exceeded\0"
;

View File

@ -240,6 +240,16 @@ not rely on this. */
#define COMPILE_ERROR_BASE 100
/* The initial frames vector for remembering backtracking points in
pcre2_match() is allocated on the system stack, of this size (bytes). The size
must be a multiple of sizeof(PCRE2_SPTR) in all environments, so making it a
multiple of 8 is best. Typical frame sizes are a few hundred bytes (it depends
on the number of capturing parentheses) so 20K handles quite a few frames. A
larger vector on the heap is obtained for patterns that need more frames. The
maximum size of this can be limited. */
#define START_FRAMES_SIZE 20480
/* Define the default BSR convention. */
#ifdef BSR_ANYCRLF
@ -922,6 +932,7 @@ a positive value. */
#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)"
#define STRING_NOTEMPTY_RIGHTPAR "NOTEMPTY)"
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR "NOTEMPTY_ATSTART)"
#define STRING_LIMIT_HEAP_EQ "LIMIT_HEAP="
#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH="
#define STRING_LIMIT_DEPTH_EQ "LIMIT_DEPTH="
#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION="
@ -1196,6 +1207,7 @@ only. */
#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
#define STRING_NOTEMPTY_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_RIGHT_PARENTHESIS
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_UNDERSCORE STR_A STR_T STR_S STR_T STR_A STR_R STR_T STR_RIGHT_PARENTHESIS
#define STRING_LIMIT_HEAP_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_H STR_E STR_A STR_P STR_EQUALS_SIGN
#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
#define STRING_LIMIT_DEPTH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_D STR_E STR_P STR_T STR_H STR_EQUALS_SIGN
#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN

View File

@ -585,6 +585,7 @@ typedef struct pcre2_real_match_context {
int (*callout)(pcre2_callout_block *, void *);
void *callout_data;
PCRE2_SIZE offset_limit;
uint32_t heap_limit;
uint32_t match_limit;
uint32_t depth_limit;
} pcre2_real_match_context;
@ -614,6 +615,7 @@ typedef struct pcre2_real_code {
uint32_t compile_options; /* Options passed to pcre2_compile() */
uint32_t overall_options; /* Options after processing the pattern */
uint32_t flags; /* Various state flags */
uint32_t limit_heap; /* Limit set in the pattern */
uint32_t limit_match; /* Limit set in the pattern */
uint32_t limit_depth; /* Limit set in the pattern */
uint32_t first_codeunit; /* Starting code unit */
@ -808,9 +810,10 @@ typedef struct match_block {
heapframe *match_frames; /* Points to vector of frames */
heapframe *match_frames_top; /* Points after the end of the vector */
heapframe *stack_frames; /* The original vector on the stack */
uint32_t match_call_count; /* Number of times a new frame is created */
PCRE2_SIZE heap_limit; /* As it says */
uint32_t match_limit; /* As it says */
uint32_t match_limit_depth; /* As it says */
uint32_t match_call_count; /* Number of times a new frame is created */
BOOL hitend; /* Hit the end of the subject at some point */
BOOL hasthen; /* Pattern contains (*THEN) */
const uint8_t *lcc; /* Points to lower casing table */

View File

@ -64,15 +64,6 @@ information, and fields within it. */
#define RECURSE_UNSET 0xffffffffu /* Bigger than max group number */
/* The initial frames vector for remembering backtracking points is allocated
on the system stack, of this size (bytes). The size must be a multiple of
sizeof(PCRE2_SPTR) in all environments, so making it a multiple of 8 is best.
Typical frame sizes are a few hundred bytes (it depends on the number of
capturing parentheses) so 10K handles quite a few frames. A larger vector on
the heap is obtained for patterns that need more frames. */
#define START_FRAMES_SIZE 10240
/* Masks for identifying the public options that are permitted at match time. */
#define PUBLIC_MATCH_OPTIONS \
@ -618,14 +609,22 @@ backtracking point. */
MATCH_RECURSE:
/* Set up a new backtracking frame. If the vector is full, get a new one
on the heap, doubling the size. */
on the heap, doubling the size, but constrained by the heap limit. */
N = (heapframe *)((char *)F + frame_size);
if (N >= mb->match_frames_top)
{
PCRE2_SIZE newsize = mb->frame_vector_size * 2;
heapframe *new = mb->memctl.malloc(newsize, mb->memctl.memory_data);
heapframe *new;
if ((newsize / 1024) > mb->heap_limit)
{
PCRE2_SIZE maxsize = ((mb->heap_limit * 1024)/frame_size) * frame_size;
if (mb->frame_vector_size == maxsize) return PCRE2_ERROR_HEAPLIMIT;
newsize = maxsize;
}
new = mb->memctl.malloc(newsize, mb->memctl.memory_data);
if (new == NULL) return PCRE2_ERROR_NOMEMORY;
memcpy(new, mb->match_frames, mb->frame_vector_size);
@ -6266,9 +6265,22 @@ correct when calling match() more than once for non-anchored patterns. */
frame_size = sizeof(heapframe) + ((re->top_bracket - 1) * 2 * sizeof(PCRE2_SIZE));
/* Limits set in the pattern override the match context only if they are
smaller. */
mb->heap_limit = (mcontext->heap_limit < re->limit_heap)?
mcontext->heap_limit : re->limit_heap;
mb->match_limit = (mcontext->match_limit < re->limit_match)?
mcontext->match_limit : re->limit_match;
mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)?
mcontext->depth_limit : re->limit_depth;
/* If a pattern has very many capturing parentheses, the frame size may be very
large. Ensure that there are at least 10 available frames by getting an initial
vector on the heap if necessary. */
vector on the heap if necessary, except when the heap limit prevents this. Get
fewer if possible. (The heap limit is in kilobytes.) */
if (frame_size <= START_FRAMES_SIZE/10)
{
@ -6278,6 +6290,11 @@ if (frame_size <= START_FRAMES_SIZE/10)
else
{
mb->frame_vector_size = frame_size * 10;
if ((mb->frame_vector_size / 1024) > mb->heap_limit)
{
if (frame_size > mb->heap_limit * 1024) return PCRE2_ERROR_HEAPLIMIT;
mb->frame_vector_size = ((mb->heap_limit * 1024)/frame_size) * frame_size;
}
mb->match_frames = mb->memctl.malloc(mb->frame_vector_size,
mb->memctl.memory_data);
if (mb->match_frames == NULL) return PCRE2_ERROR_NOMEMORY;
@ -6292,14 +6309,6 @@ to avoid uninitialized memory read errors when it is copied to a new frame. */
memset((char *)(mb->match_frames) + offsetof(heapframe, ovector), 0xff,
re->top_bracket * 2 * sizeof(PCRE2_SIZE));
/* Limits set in the pattern override the match context only if they are
smaller. */
mb->match_limit = (mcontext->match_limit < re->limit_match)?
mcontext->match_limit : re->limit_match;
mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)?
mcontext->depth_limit : re->limit_depth;
/* Pointers to the individual character tables */
mb->lcc = re->tables + lcc_offset;

View File

@ -80,6 +80,7 @@ if (where == NULL) /* Requests field length */
case PCRE2_INFO_FIRSTCODEUNIT:
case PCRE2_INFO_HASBACKSLASHC:
case PCRE2_INFO_HASCRORLF:
case PCRE2_INFO_HEAPLIMIT:
case PCRE2_INFO_JCHANGED:
case PCRE2_INFO_LASTCODETYPE:
case PCRE2_INFO_LASTCODEUNIT:
@ -171,6 +172,11 @@ switch(what)
*((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0;
break;
case PCRE2_INFO_HEAPLIMIT:
*((uint32_t *)where) = re->limit_heap;
if (re->limit_heap == UINT32_MAX) return PCRE2_ERROR_UNSET;
break;
case PCRE2_INFO_JCHANGED:
*((uint32_t *)where) = (re->flags & PCRE2_JCHANGED) != 0;
break;

View File

@ -212,6 +212,7 @@ static const uint8_t *character_tables = NULL;
static uint32_t pcre2_options = 0;
static uint32_t process_options = 0;
static PCRE2_SIZE heap_limit = PCRE2_UNSET;
static uint32_t match_limit = 0;
static uint32_t depth_limit = 0;
@ -330,7 +331,7 @@ static const char *incexname[4] = { "--include", "--exclude",
/* Structure for options and list of them */
enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER,
enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER, OP_SIZE,
OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
typedef struct option_item {
@ -356,16 +357,17 @@ used to identify them. */
#define N_LOFFSETS (-10)
#define N_FOFFSETS (-11)
#define N_LBUFFER (-12)
#define N_M_LIMIT (-13)
#define N_M_LIMIT_DEP (-14)
#define N_BUFSIZE (-15)
#define N_NOJIT (-16)
#define N_FILE_LIST (-17)
#define N_BINARY_FILES (-18)
#define N_EXCLUDE_FROM (-19)
#define N_INCLUDE_FROM (-20)
#define N_OM_SEPARATOR (-21)
#define N_MAX_BUFSIZE (-22)
#define N_H_LIMIT (-13)
#define N_M_LIMIT (-14)
#define N_M_LIMIT_DEP (-15)
#define N_BUFSIZE (-16)
#define N_NOJIT (-17)
#define N_FILE_LIST (-18)
#define N_BINARY_FILES (-19)
#define N_EXCLUDE_FROM (-20)
#define N_INCLUDE_FROM (-21)
#define N_OM_SEPARATOR (-22)
#define N_MAX_BUFSIZE (-23)
static option_item optionlist[] = {
{ OP_NODATA, N_NULL, NULL, "", "terminate options" },
@ -397,6 +399,7 @@ static option_item optionlist[] = {
{ OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
{ OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
{ OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
{ OP_SIZE, N_H_LIMIT, &heap_limit, "heap-limit=number", "set PCRE2 heap limit option (kilobytes)" },
{ OP_U32NUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE2 match limit option" },
{ OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" },
{ OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsolete synonym for depth-limit" },
@ -525,9 +528,9 @@ pcre2grep_exit(int rc)
{
if (resource_error)
{
fprintf(stderr, "pcre2grep: Error %d, %d or %d means that a resource limit "
"was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
PCRE2_ERROR_DEPTHLIMIT);
fprintf(stderr, "pcre2grep: Error %d, %d, %d or %d means that a resource "
"limit was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
PCRE2_ERROR_DEPTHLIMIT, PCRE2_ERROR_HEAPLIMIT);
fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n");
}
exit(rc);
@ -1647,7 +1650,7 @@ for (i = 1; p != NULL; p = p->next, i++)
FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
fprintf(stderr, "\n\n");
if (*mrc == PCRE2_ERROR_MATCHLIMIT || *mrc == PCRE2_ERROR_DEPTHLIMIT ||
*mrc == PCRE2_ERROR_JIT_STACKLIMIT)
*mrc == PCRE2_ERROR_HEAPLIMIT || *mrc == PCRE2_ERROR_JIT_STACKLIMIT)
resource_error = TRUE;
if (error_count++ > 20)
{
@ -3796,7 +3799,7 @@ for (i = 1; i < argc; i++)
/* Otherwise, deal with a single string or numeric data value. */
else if (op->type != OP_NUMBER && op->type != OP_U32NUMBER &&
op->type != OP_OP_NUMBER)
op->type != OP_OP_NUMBER && op->type != OP_SIZE)
{
*((char **)op->dataptr) = option_data;
}
@ -3804,6 +3807,7 @@ for (i = 1; i < argc; i++)
{
unsigned long int n = decode_number(option_data, op, longop);
if (op->type == OP_U32NUMBER) *((uint32_t *)op->dataptr) = n;
else if (op->type == OP_SIZE) *((PCRE2_SIZE *)op->dataptr) = n;
else *((int *)op->dataptr) = n;
}
}
@ -3839,6 +3843,7 @@ if (output_text != NULL &&
/* Put limits into the match data block. */
if (heap_limit != PCRE2_UNSET) pcre2_set_heap_limit(match_context, heap_limit);
if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
if (depth_limit > 0) pcre2_set_depth_limit(match_context, depth_limit);

View File

@ -588,6 +588,7 @@ static modstruct modlist[] = {
{ "get", MOD_DAT, MOD_NN, DO(get_numbers), DO(get_names) },
{ "getall", MOD_DAT, MOD_CTL, CTL_GETALL, DO(control) },
{ "global", MOD_PNDP, MOD_CTL, CTL_GLOBAL, PO(control) },
{ "heap_limit", MOD_CTM, MOD_INT, 0, MO(heap_limit) },
{ "hex", MOD_PAT, MOD_CTL, CTL_HEXPAT, PO(control) },
{ "info", MOD_PAT, MOD_CTL, CTL_INFO, PO(control) },
{ "jit", MOD_PAT, MOD_IND, 7, PO(jit) },
@ -1207,6 +1208,14 @@ are supported. */
else \
pcre2_set_depth_limit_32(G(a,32),b)
#define PCRE2_SET_HEAP_LIMIT(a,b) \
if (test_mode == PCRE8_MODE) \
pcre2_set_heap_limit_8(G(a,8),b); \
else if (test_mode == PCRE16_MODE) \
pcre2_set_heap_limit_16(G(a,16),b); \
else \
pcre2_set_heap_limit_32(G(a,32),b)
#define PCRE2_SET_MATCH_LIMIT(a,b) \
if (test_mode == PCRE8_MODE) \
pcre2_set_match_limit_8(G(a,8),b); \
@ -1643,6 +1652,12 @@ the three different cases. */
else \
G(pcre2_set_depth_limit_,BITTWO)(G(a,BITTWO),b)
#define PCRE2_SET_HEAP_LIMIT(a,b) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
G(pcre2_set_heap_limit_,BITONE)(G(a,BITONE),b); \
else \
G(pcre2_set_heap_limit_,BITTWO)(G(a,BITTWO),b)
#define PCRE2_SET_MATCH_LIMIT(a,b) \
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
G(pcre2_set_match_limit_,BITONE)(G(a,BITONE),b); \
@ -1856,6 +1871,7 @@ the three different cases. */
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
pcre2_set_compile_recursion_guard_8(G(a,8),b,c)
#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_8(G(a,8),b)
#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_8(G(a,8),b)
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b)
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b)
@ -1952,6 +1968,7 @@ the three different cases. */
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
pcre2_set_compile_recursion_guard_16(G(a,16),b,c)
#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_16(G(a,16),b)
#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_16(G(a,16),b)
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_16(G(a,16),b)
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b)
@ -2048,6 +2065,7 @@ the three different cases. */
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_32(G(a,32),b)
#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_32(G(a,32),b)
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_32(G(a,32),b)
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b)
@ -4040,14 +4058,28 @@ if ((pat_patctl.control & CTL_INFO) != 0)
{
void *nametable;
uint8_t *start_bits;
BOOL match_limit_set, depth_limit_set;
BOOL heap_limit_set, match_limit_set, depth_limit_set;
uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit,
hasbackslashc, hascrorlf, jchanged, last_ctype, last_cunit, match_empty,
match_limit, minlength, nameentrysize, namecount, newline_convention,
depth_limit;
depth_limit, heap_limit, match_limit, minlength, nameentrysize, namecount,
newline_convention;
/* These info requests may return PCRE2_ERROR_UNSET. */
switch(pattern_info(PCRE2_INFO_HEAPLIMIT, &heap_limit, TRUE))
{
case 0:
heap_limit_set = TRUE;
break;
case PCRE2_ERROR_UNSET:
heap_limit_set = FALSE;
break;
default:
return PR_ABEND;
}
switch(pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit, TRUE))
{
case 0:
@ -4106,6 +4138,9 @@ if ((pat_patctl.control & CTL_INFO) != 0)
if (maxlookbehind > 0)
fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
if (heap_limit_set)
fprintf(outfile, "Heap limit = %u\n", heap_limit);
if (match_limit_set)
fprintf(outfile, "Match limit = %u\n", match_limit);
@ -5353,10 +5388,15 @@ uint32_t max = UINT32_MAX;
PCRE2_SET_MATCH_LIMIT(dat_context, max);
PCRE2_SET_DEPTH_LIMIT(dat_context, max);
PCRE2_SET_HEAP_LIMIT(dat_context, max);
for (;;)
{
if (errnumber == PCRE2_ERROR_MATCHLIMIT)
if (errnumber == PCRE2_ERROR_HEAPLIMIT)
{
PCRE2_SET_HEAP_LIMIT(dat_context, mid);
}
else if (errnumber == PCRE2_ERROR_MATCHLIMIT)
{
PCRE2_SET_MATCH_LIMIT(dat_context, mid);
}
@ -5393,13 +5433,23 @@ for (;;)
capcount == PCRE2_ERROR_NOMATCH ||
capcount == PCRE2_ERROR_PARTIAL)
{
/* If we've not hit the error with a heap limit less than the size of the
initial stack frame vector, the heap is not being used, so the minimum
limit is zero; there's no need to go on. The other limits are always
greater than zero. */
if (errnumber == PCRE2_ERROR_HEAPLIMIT && mid < START_FRAMES_SIZE/1024)
{
fprintf(outfile, "Minimum %s limit = 0\n", msg);
break;
}
if (mid == min + 1)
{
fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
break;
}
max = mid;
mid = (min + mid)/2;
mid = (min + max)/2;
}
else break; /* Some other error */
}
@ -6662,21 +6712,33 @@ else for (gmatched = 0;; gmatched++)
(double)CLOCKS_PER_SEC);
}
/* Find the match and depth limits if requested. The match limit is not
relevant for DFA matching and the depth limit is not relevant for JIT. */
/* Find the heap, match and depth limits if requested. The match and heap
limits are not relevant for DFA matching and the depth limit is not relevant
for JIT. */
if ((dat_datctl.control & CTL_FINDLIMITS) != 0)
{
if ((dat_datctl.control & CTL_DFA) == 0)
{
if (FLD(compiled_code, executable_jit) == NULL ||
(dat_datctl.options & PCRE2_NO_JIT) != 0)
{
capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_HEAPLIMIT,
"heap");
}
capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_MATCHLIMIT,
"match");
}
else capcount = 0;
if (FLD(compiled_code, executable_jit) == NULL ||
(dat_datctl.options & PCRE2_NO_JIT) != 0 ||
(dat_datctl.control & CTL_DFA) != 0)
{
capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_DEPTHLIMIT,
"depth");
}
}
/* Otherwise just run a single match, setting up a callout if required (the
default). There is a copy of the pattern in pbuffer8 for use by callouts. */
@ -7402,6 +7464,8 @@ printf(" \\C is supported\n");
printf(" Internal link size = %d\n", optval);
(void)PCRE2_CONFIG(PCRE2_CONFIG_PARENSLIMIT, &optval);
printf(" Parentheses nest limit = %d\n", optval);
(void)PCRE2_CONFIG(PCRE2_CONFIG_HEAPLIMIT, &optval);
printf(" Default heap limit = %d\n", optval);
(void)PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, &optval);
printf(" Default match limit = %d\n", optval);
(void)PCRE2_CONFIG(PCRE2_CONFIG_DEPTHLIMIT, &optval);

13
testdata/testoutput15 vendored
View File

@ -12,11 +12,13 @@ Starting code units: a z
Last code unit = 'z'
Subject length lower bound = 2
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits
Minimum heap limit = 0
Minimum match limit = 7
Minimum depth limit = 7
0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazz
1: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
aaaaaaaaaaaaaz\=find_limits
Minimum heap limit = 0
Minimum match limit = 20481
Minimum depth limit = 30
No match
@ -26,6 +28,7 @@ Capturing subpattern count = 1
May match empty string
Subject length lower bound = 0
/* this is a C style comment */\=find_limits
Minimum heap limit = 0
Minimum match limit = 64
Minimum depth limit = 7
0: /* this is a C style comment */
@ -33,21 +36,25 @@ Minimum depth limit = 7
/^(?>a)++/
aa\=find_limits
Minimum heap limit = 0
Minimum match limit = 5
Minimum depth limit = 3
0: aa
aaaaaaaaa\=find_limits
Minimum heap limit = 0
Minimum match limit = 12
Minimum depth limit = 3
0: aaaaaaaaa
/(a)(?1)++/
aa\=find_limits
Minimum heap limit = 0
Minimum match limit = 7
Minimum depth limit = 5
0: aa
1: a
aaaaaaaaa\=find_limits
Minimum heap limit = 0
Minimum match limit = 21
Minimum depth limit = 5
0: aaaaaaaaa
@ -55,30 +62,35 @@ Minimum depth limit = 5
/a(?:.)*?a/ims
abbbbbbbbbbbbbbbbbbbbba\=find_limits
Minimum heap limit = 0
Minimum match limit = 24
Minimum depth limit = 3
0: abbbbbbbbbbbbbbbbbbbbba
/a(?:.(*THEN))*?a/ims
abbbbbbbbbbbbbbbbbbbbba\=find_limits
Minimum heap limit = 0
Minimum match limit = 66
Minimum depth limit = 45
0: abbbbbbbbbbbbbbbbbbbbba
/a(?:.(*THEN:ABC))*?a/ims
abbbbbbbbbbbbbbbbbbbbba\=find_limits
Minimum heap limit = 0
Minimum match limit = 66
Minimum depth limit = 45
0: abbbbbbbbbbbbbbbbbbbbba
/^(?>a+)(?>b+)(?>c+)(?>d+)(?>e+)/
aabbccddee\=find_limits
Minimum heap limit = 0
Minimum match limit = 7
Minimum depth limit = 7
0: aabbccddee
/^(?>(a+))(?>(b+))(?>(c+))(?>(d+))(?>(e+))/
aabbccddee\=find_limits
Minimum heap limit = 0
Minimum match limit = 12
Minimum depth limit = 12
0: aabbccddee
@ -90,6 +102,7 @@ Minimum depth limit = 12
/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/
aabbccddee\=find_limits
Minimum heap limit = 0
Minimum match limit = 10
Minimum depth limit = 10
0: aabbccddee

View File

@ -15609,7 +15609,7 @@ Last code unit = 'c'
Subject length lower bound = 4
# End of testinput2
Error -63: PCRE2_ERROR_BADDATA (unknown error number)
Error -64: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data
Error -2: partial match
Error -1: no match