Add explicit heap limiting options to pcre2_match(), with associated features
for listing, configuring, etc.
This commit is contained in:
parent
f0126dc7ae
commit
14989bd454
|
@ -78,6 +78,7 @@
|
|||
# fix by David Gaussmann
|
||||
# 2016-10-07 PH added PCREGREP_MAX_BUFSIZE
|
||||
# 2017-03-11 PH turned HEAP_MATCH_RECURSE into a NO-OP for 10.30
|
||||
# 2017-04-08 PH added HEAP_LIMIT
|
||||
|
||||
PROJECT(PCRE2 C)
|
||||
|
||||
|
@ -143,6 +144,9 @@ SET(PCRE2_LINK_SIZE "2" CACHE STRING
|
|||
SET(PCRE2_PARENS_NEST_LIMIT "250" CACHE STRING
|
||||
"Default nested parentheses limit. See PARENS_NEST_LIMIT in config.h.in for details.")
|
||||
|
||||
SET(PCRE2_HEAP_LIMIT "20000000" CACHE STRING
|
||||
"Default limit on heap memory (kilobytes). See HEAP_LIMIT in config.h.in for details.")
|
||||
|
||||
SET(PCRE2_MATCH_LIMIT "10000000" CACHE STRING
|
||||
"Default limit on internal looping. See MATCH_LIMIT in config.h.in for details.")
|
||||
|
||||
|
@ -765,6 +769,7 @@ IF(PCRE2_SHOW_REPORT)
|
|||
MESSAGE(STATUS " Rebuild char tables ............. : ${PCRE2_REBUILD_CHARTABLES}")
|
||||
MESSAGE(STATUS " Internal link size .............. : ${PCRE2_LINK_SIZE}")
|
||||
MESSAGE(STATUS " Parentheses nest limit .......... : ${PCRE2_PARENS_NEST_LIMIT}")
|
||||
MESSAGE(STATUS " Heap limit ...................... : ${PCRE2_HEAP_LIMIT}")
|
||||
MESSAGE(STATUS " Match limit ..................... : ${PCRE2_MATCH_LIMIT}")
|
||||
MESSAGE(STATUS " Match depth limit ............... : ${PCRE2_MATCH_LIMIT_DEPTH}")
|
||||
MESSAGE(STATUS " Build shared libs ............... : ${BUILD_SHARED_LIBS}")
|
||||
|
|
|
@ -121,6 +121,11 @@ single-branch conditions with a false condition (e.g. DEFINE) at the start of a
|
|||
branch. For example, /(?(DEFINE)...)^A/ and /(...){0}^B/ are now flagged as
|
||||
anchored.
|
||||
|
||||
22. Added an explicit limit on the amount of heap used by pcre2_match(), set by
|
||||
pcre2_set_heap_limit() or (*LIMIT_HEAP=xxx). Upgraded pcre2test to show the
|
||||
heap limit along with other pattern information, and to find the minimum when
|
||||
the find_limits modifier is set.
|
||||
|
||||
|
||||
Version 10.23 14-February-2017
|
||||
------------------------------
|
||||
|
|
|
@ -69,6 +69,7 @@ dist_html_DATA = \
|
|||
doc/html/pcre2_set_character_tables.html \
|
||||
doc/html/pcre2_set_compile_recursion_guard.html \
|
||||
doc/html/pcre2_set_depth_limit.html \
|
||||
doc/html/pcre2_set_heap_limit.html \
|
||||
doc/html/pcre2_set_match_limit.html \
|
||||
doc/html/pcre2_set_max_pattern_length.html \
|
||||
doc/html/pcre2_set_offset_limit.html \
|
||||
|
@ -152,6 +153,7 @@ dist_man_MANS = \
|
|||
doc/pcre2_set_character_tables.3 \
|
||||
doc/pcre2_set_compile_recursion_guard.3 \
|
||||
doc/pcre2_set_depth_limit.3 \
|
||||
doc/pcre2_set_heap_limit.3 \
|
||||
doc/pcre2_set_match_limit.3 \
|
||||
doc/pcre2_set_max_pattern_length.3 \
|
||||
doc/pcre2_set_offset_limit.3 \
|
||||
|
|
23
README
23
README
|
@ -223,10 +223,10 @@ library. They are also documented in the pcre2build man page.
|
|||
|
||||
--with-parens-nest-limit=500
|
||||
|
||||
. PCRE2 has a counter that can be set to limit the amount of resources it uses
|
||||
when matching a pattern. If the limit is exceeded during a match, the match
|
||||
fails. The default is ten million. You can change the default by setting, for
|
||||
example,
|
||||
. PCRE2 has a counter that can be set to limit the amount of computing resource
|
||||
it uses when matching a pattern with the Perl-compatible matching function.
|
||||
If the limit is exceeded during a match, the match fails. The default is ten
|
||||
million. You can change the default by setting, for example,
|
||||
|
||||
--with-match-limit=500000
|
||||
|
||||
|
@ -235,8 +235,8 @@ library. They are also documented in the pcre2build man page.
|
|||
pcre2api man page (search for pcre2_set_match_limit).
|
||||
|
||||
. There is a separate counter that limits the depth of nested backtracking
|
||||
during a matching process, which in turn limits the amount of memory that is
|
||||
used. This also has a default of ten million, which is essentially
|
||||
during a matching process, which indirectly limits the amount of heap memory
|
||||
that is used. This also has a default of ten million, which is essentially
|
||||
"unlimited". You can change the default by setting, for example,
|
||||
|
||||
--with-match-limit-depth=5000
|
||||
|
@ -244,6 +244,15 @@ library. They are also documented in the pcre2build man page.
|
|||
There is more discussion in the pcre2api man page (search for
|
||||
pcre2_set_depth_limit).
|
||||
|
||||
. You can also set an explicit limit on the amount of heap memory used by
|
||||
the pcre2_match() interpreter:
|
||||
|
||||
--with-heap-limit=500
|
||||
|
||||
The units are kilobytes. This limit does not apply when the JIT optimization
|
||||
(which has its own memory control features) is used. There is more discussion
|
||||
on the pcre2api man page (search for pcre2_set_heap_limit).
|
||||
|
||||
. In the 8-bit library, the default maximum compiled pattern size is around
|
||||
64K bytes. You can increase this by adding --with-link-size=3 to the
|
||||
"configure" command. PCRE2 then uses three bytes instead of two for offsets
|
||||
|
@ -865,4 +874,4 @@ The distribution should contain the files listed below.
|
|||
Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
Last updated: 17 March 2017
|
||||
Last updated: 11 April 2017
|
||||
|
|
2
RunTest
2
RunTest
|
@ -489,7 +489,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
|||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput2 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -63,-62,-2,-1,0,100,188,189,190,191 >>testtry
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -64,-62,-2,-1,0,100,188,189,190,191 >>testtry
|
||||
checkresult $? 2 "$opt"
|
||||
fi
|
||||
done
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
#cmakedefine NEVER_BACKSLASH_C 1
|
||||
|
||||
#define LINK_SIZE @PCRE2_LINK_SIZE@
|
||||
#define HEAP_LIMIT @PCRE2_HEAP_LIMIT@
|
||||
#define MATCH_LIMIT @PCRE2_MATCH_LIMIT@
|
||||
#define MATCH_LIMIT_DEPTH @PCRE2_MATCH_LIMIT_DEPTH@
|
||||
#define NEWLINE_DEFAULT @NEWLINE_DEFAULT@
|
||||
|
|
25
configure.ac
25
configure.ac
|
@ -263,6 +263,12 @@ AC_ARG_WITH(parens-nest-limit,
|
|||
[nested parentheses limit (default=250)]),
|
||||
, with_parens_nest_limit=250)
|
||||
|
||||
# Handle --with-heap-limit
|
||||
AC_ARG_WITH(heap-limit,
|
||||
AS_HELP_STRING([--with-heap-limit=N],
|
||||
[default limit on heap memory (kilobytes, default=20000000)]),
|
||||
, with_heap_limit=20000000)
|
||||
|
||||
# Handle --with-match-limit=N
|
||||
AC_ARG_WITH(match-limit,
|
||||
AS_HELP_STRING([--with-match-limit=N],
|
||||
|
@ -680,12 +686,12 @@ AC_DEFINE_UNQUOTED([PARENS_NEST_LIMIT], [$with_parens_nest_limit], [
|
|||
stack that is used while compiling a pattern.])
|
||||
|
||||
AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [
|
||||
The value of MATCH_LIMIT determines the default number of times the internal
|
||||
match() function can record a backtrack position during a single matching
|
||||
attempt. There is a runtime interface for setting a different limit. The
|
||||
limit exists in order to catch runaway regular expressions that take for ever
|
||||
to determine that they do not match. The default is set very large so that it
|
||||
does not accidentally catch legitimate cases.])
|
||||
The value of MATCH_LIMIT determines the default number of times the
|
||||
pcre2_match() function can record a backtrack position during a single
|
||||
matching attempt. There is a runtime interface for setting a different limit.
|
||||
The limit exists in order to catch runaway regular expressions that take for
|
||||
ever to determine that they do not match. The default is set very large so
|
||||
that it does not accidentally catch legitimate cases.])
|
||||
|
||||
# --with-match-limit-recursion is an obsolete synonym for --with-match-limit-depth
|
||||
|
||||
|
@ -694,7 +700,7 @@ cat <<EOF
|
|||
|
||||
WARNING: --with-match-limit-recursion is an obsolete option. Please use
|
||||
--with-match-limit-depth in future. If both are set, --with-match-limit-depth
|
||||
will be used.
|
||||
will be used. See also --with-heap-limit.
|
||||
|
||||
EOF
|
||||
if test "$with_match_limit_depth" = "MATCH_LIMIT"; then
|
||||
|
@ -711,6 +717,10 @@ AC_DEFINE_UNQUOTED([MATCH_LIMIT_DEPTH], [$with_match_limit_depth], [
|
|||
be less than the value of MATCH_LIMIT. The default is to use the same value
|
||||
as MATCH_LIMIT. There is a runtime method for setting a different limit.])
|
||||
|
||||
AC_DEFINE_UNQUOTED([HEAP_LIMIT], [$with_heap_limit], [
|
||||
This limits the amount of memory that pcre2_match() may use while matching
|
||||
a pattern. The value is in kilobytes.])
|
||||
|
||||
AC_DEFINE([MAX_NAME_SIZE], [32], [
|
||||
This limit is parameterized just in case anybody ever wants to
|
||||
change it. Care must be taken if it is increased, because it guards
|
||||
|
@ -971,6 +981,7 @@ $PACKAGE-$VERSION configuration summary:
|
|||
Rebuild char tables ................ : ${enable_rebuild_chartables}
|
||||
Internal link size ................. : ${with_link_size}
|
||||
Nested parentheses limit ........... : ${with_parens_nest_limit}
|
||||
Heap limit ......................... : ${with_heap_limit} kilobytes
|
||||
Match limit ........................ : ${with_match_limit}
|
||||
Match depth limit .................. : ${with_match_limit_depth}
|
||||
Build shared libs .................. : ${enable_shared}
|
||||
|
|
|
@ -223,10 +223,10 @@ library. They are also documented in the pcre2build man page.
|
|||
|
||||
--with-parens-nest-limit=500
|
||||
|
||||
. PCRE2 has a counter that can be set to limit the amount of resources it uses
|
||||
when matching a pattern. If the limit is exceeded during a match, the match
|
||||
fails. The default is ten million. You can change the default by setting, for
|
||||
example,
|
||||
. PCRE2 has a counter that can be set to limit the amount of computing resource
|
||||
it uses when matching a pattern with the Perl-compatible matching function.
|
||||
If the limit is exceeded during a match, the match fails. The default is ten
|
||||
million. You can change the default by setting, for example,
|
||||
|
||||
--with-match-limit=500000
|
||||
|
||||
|
@ -235,8 +235,8 @@ library. They are also documented in the pcre2build man page.
|
|||
pcre2api man page (search for pcre2_set_match_limit).
|
||||
|
||||
. There is a separate counter that limits the depth of nested backtracking
|
||||
during a matching process, which in turn limits the amount of memory that is
|
||||
used. This also has a default of ten million, which is essentially
|
||||
during a matching process, which indirectly limits the amount of heap memory
|
||||
that is used. This also has a default of ten million, which is essentially
|
||||
"unlimited". You can change the default by setting, for example,
|
||||
|
||||
--with-match-limit-depth=5000
|
||||
|
@ -244,6 +244,15 @@ library. They are also documented in the pcre2build man page.
|
|||
There is more discussion in the pcre2api man page (search for
|
||||
pcre2_set_depth_limit).
|
||||
|
||||
. You can also set an explicit limit on the amount of heap memory used by
|
||||
the pcre2_match() interpreter:
|
||||
|
||||
--with-heap-limit=500
|
||||
|
||||
The units are kilobytes. This limit does not apply when the JIT optimization
|
||||
(which has its own memory control features) is used. There is more discussion
|
||||
on the pcre2api man page (search for pcre2_set_heap_limit).
|
||||
|
||||
. In the 8-bit library, the default maximum compiled pattern size is around
|
||||
64K bytes. You can increase this by adding --with-link-size=3 to the
|
||||
"configure" command. PCRE2 then uses three bytes instead of two for offsets
|
||||
|
@ -865,4 +874,4 @@ The distribution should contain the files listed below.
|
|||
Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
Last updated: 17 March 2017
|
||||
Last updated: 11 April 2017
|
||||
|
|
|
@ -213,6 +213,9 @@ in the library.
|
|||
<tr><td><a href="pcre2_set_depth_limit.html">pcre2_set_depth_limit</a></td>
|
||||
<td> Set the match backtracking depth limit</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_heap_limit.html">pcre2_set_heap_limit</a></td>
|
||||
<td> Set the match backtracking heap limit</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_match_limit.html">pcre2_set_match_limit</a></td>
|
||||
<td> Set the match limit</td></tr>
|
||||
|
||||
|
|
|
@ -45,6 +45,7 @@ point to a uint32_t integer variable. The available codes are:
|
|||
PCRE2_CONFIG_BSR Indicates what \R matches by default:
|
||||
PCRE2_BSR_UNICODE
|
||||
PCRE2_BSR_ANYCRLF
|
||||
PCRE2_CONFIG_HEAPLIMIT Default heap memory limit
|
||||
PCRE2_CONFIG_DEPTHLIMIT Default backtracking depth limit
|
||||
PCRE2_CONFIG_JIT Availability of just-in-time compiler support (1=yes 0=no)
|
||||
PCRE2_CONFIG_JITTARGET Information (a string) about the target architecture for the JIT compiler
|
||||
|
|
|
@ -44,6 +44,7 @@ A match context is needed only if you want to:
|
|||
<pre>
|
||||
Set up a callout function
|
||||
Set a matching offset limit
|
||||
Change the heap memory limit
|
||||
Change the backtracking match limit
|
||||
Change the backtracking depth limit
|
||||
Set custom memory management specifically for the match
|
||||
|
|
|
@ -51,6 +51,7 @@ request are as follows:
|
|||
PCRE2_INFO_FRAMESIZE Size of backtracking frame
|
||||
PCRE2_INFO_HASBACKSLASHC Return 1 if pattern contains \C
|
||||
PCRE2_INFO_HASCRORLF Return 1 if explicit CR or LF matches exist in the pattern
|
||||
PCRE2_INFO_HEAPLIMIT Heap memory limit if set, otherwise PCRE2_ERROR_UNSET
|
||||
PCRE2_INFO_JCHANGED Return 1 if (?J) or (?-J) was used
|
||||
PCRE2_INFO_JITSIZE Size of JIT compiled code, or 0
|
||||
PCRE2_INFO_LASTCODETYPE Type of must-be-present information
|
||||
|
|
|
@ -182,6 +182,10 @@ document for an overview of all the PCRE2 documentation.
|
|||
<b> PCRE2_SIZE <i>value</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre2_set_heap_limit(pcre2_match_context *<i>mcontext</i>,</b>
|
||||
<b> uint32_t <i>value</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre2_set_match_limit(pcre2_match_context *<i>mcontext</i>,</b>
|
||||
<b> uint32_t <i>value</i>);</b>
|
||||
<br>
|
||||
|
@ -793,6 +797,7 @@ A match context is required if you want to:
|
|||
<pre>
|
||||
Set up a callout function
|
||||
Set an offset limit for matching an unanchored pattern
|
||||
Change the limit on the amount of heap used when matching
|
||||
Change the backtracking match limit
|
||||
Change the backtracking depth limit
|
||||
Set custom memory management specifically for the match
|
||||
|
@ -851,14 +856,47 @@ subject strings. See also the PCRE2_FIRSTLINE option, which requires a match to
|
|||
start within the first line of the subject. If this is set with an offset
|
||||
limit, a match must occur in the first line and also within the offset limit.
|
||||
In other words, whichever limit comes first is used.
|
||||
<b>int pcre2_set_heap_limit(pcre2_match_context *<i>mcontext</i>,</b>
|
||||
<b> uint32_t <i>value</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
The <i>heap_limit</i> parameter specifies, in units of kilobytes, the maximum
|
||||
amount of heap memory that <b>pcre2_match()</b> may use to hold backtracking
|
||||
information when running an interpretive match. This limit does not apply to
|
||||
matching with the JIT optimization, which has its own memory control
|
||||
arrangements (see the
|
||||
<a href="pcre2jit.html"><b>pcre2jit</b></a>
|
||||
documentation for more details), nor does it apply to <b>pcre2_dfa_match()</b>.
|
||||
If the limit is reached, the negative error code PCRE2_ERROR_HEAPLIMIT is
|
||||
returned. The default limit is set when PCRE2 is built; the default default is
|
||||
very large and is essentially "unlimited".
|
||||
</P>
|
||||
<P>
|
||||
A value for the heap limit may also be supplied by an item at the start of a
|
||||
pattern of the form
|
||||
<pre>
|
||||
(*LIMIT_HEAP=ddd)
|
||||
</pre>
|
||||
where ddd is a decimal number. However, such a setting is ignored unless ddd is
|
||||
less than the limit set by the caller of <b>pcre2_match()</b> or, if no such
|
||||
limit is set, less than the default.
|
||||
</P>
|
||||
<P>
|
||||
The <b>pcre2_match()</b> function starts out using a 20K vector on the system
|
||||
stack for recording backtracking points. The more nested backtracking points
|
||||
there are (that is, the deeper the search tree), the more memory is needed.
|
||||
Heap memory is used only if the initial vector is too small. If the heap limit
|
||||
is set to a value less than 21 (in particular, zero) no heap memory will be
|
||||
used. In this case, only patterns that do not have a lot of nested backtracking
|
||||
can be successfully processed.
|
||||
<b>int pcre2_set_match_limit(pcre2_match_context *<i>mcontext</i>,</b>
|
||||
<b> uint32_t <i>value</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
The <i>match_limit</i> parameter provides a means of preventing PCRE2 from using
|
||||
up too many resources when processing patterns that are not going to match, but
|
||||
which have a very large number of possibilities in their search trees. The
|
||||
classic example is a pattern that uses nested unlimited repeats.
|
||||
up too many computing resources when processing patterns that are not going to
|
||||
match, but which have a very large number of possibilities in their search
|
||||
trees. The classic example is a pattern that uses nested unlimited repeats.
|
||||
</P>
|
||||
<P>
|
||||
There is an internal counter in <b>pcre2_match()</b> that is incremented each
|
||||
|
@ -895,16 +933,20 @@ limit is set, less than the default.
|
|||
This parameter limits the depth of nested backtracking in <b>pcre2_match()</b>.
|
||||
Each time a nested backtracking point is passed, a new memory "frame" is used
|
||||
to remember the state of matching at that point. Thus, this parameter
|
||||
indirectly limits the amount of memory that is used in a match.
|
||||
indirectly limits the amount of memory that is used in a match. However,
|
||||
because the size of each memory "frame" depends on the number of capturing
|
||||
parentheses, the actual memory limit varies from pattern to pattern. This limit
|
||||
was more useful in versions before 10.30, where function recursion was used for
|
||||
backtracking.
|
||||
</P>
|
||||
<P>
|
||||
This limit is not relevant, and is ignored, when matching is done using JIT
|
||||
compiled code. However, it is supported by <b>pcre2_dfa_match()</b>, which uses
|
||||
it to limit the depth of internal recursive function calls that implement
|
||||
lookaround assertions and pattern recursions. This is, therefore, an indirect
|
||||
limit on the amount of system stack that is used. A recursive pattern such as
|
||||
/(.)(?1)/, when matched to a very long string using <b>pcre2_dfa_match()</b>,
|
||||
can use a great deal of stack.
|
||||
The depth limit is not relevant, and is ignored, when matching is done using
|
||||
JIT compiled code. However, it is supported by <b>pcre2_dfa_match()</b>, which
|
||||
uses it to limit the depth of internal recursive function calls that implement
|
||||
atomic groups, lookaround assertions, and pattern recursions. This is,
|
||||
therefore, an indirect limit on the amount of system stack that is used. A
|
||||
recursive pattern such as /(.)(?1)/, when matched to a very long string using
|
||||
<b>pcre2_dfa_match()</b>, can use a great deal of stack.
|
||||
</P>
|
||||
<P>
|
||||
The default value for the depth limit can be set when PCRE2 is built; the
|
||||
|
@ -958,6 +1000,12 @@ The output is a uint32_t integer that gives the default limit for the depth of
|
|||
nested backtracking in <b>pcre2_match()</b> or the depth of nested recursions
|
||||
and lookarounds in <b>pcre2_dfa_match()</b>. Further details are given with
|
||||
<b>pcre2_set_depth_limit()</b> above.
|
||||
<pre>
|
||||
PCRE2_CONFIG_HEAPLIMIT
|
||||
</pre>
|
||||
The output is a uint32_t integer that gives, in kilobytes, the default limit
|
||||
for the amount of heap memory used by <b>pcre2_match()</b>. Further details are
|
||||
given with <b>pcre2_set_heap_limit()</b> above.
|
||||
<pre>
|
||||
PCRE2_CONFIG_JIT
|
||||
</pre>
|
||||
|
@ -1786,6 +1834,13 @@ Return 1 if the pattern contains any explicit matches for CR or LF characters,
|
|||
otherwise 0. The third argument should point to an <b>uint32_t</b> variable. An
|
||||
explicit match is either a literal CR or LF character, or \r or \n or one of
|
||||
the equivalent hexadecimal or octal escape sequences.
|
||||
<pre>
|
||||
PCRE2_INFO_HEAPLIMIT
|
||||
</pre>
|
||||
If the pattern set a heap memory limit by including an item of the form
|
||||
(*LIMIT_HEAP=nnnn) at the start, the value is returned. The third argument
|
||||
should point to an unsigned 32-bit integer. If no such value has been set, the
|
||||
call to <b>pcre2_pattern_info()</b> returns the error PCRE2_ERROR_UNSET.
|
||||
<pre>
|
||||
PCRE2_INFO_JCHANGED
|
||||
</pre>
|
||||
|
@ -2554,7 +2609,8 @@ The backtracking match limit was reached.
|
|||
</pre>
|
||||
If a pattern contains many nested backtracking points, heap memory is used to
|
||||
remember them. This error is given when the memory allocation function (default
|
||||
or custom) fails.
|
||||
or custom) fails. Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given
|
||||
if the amount of memory needed exceeds the heap limit.
|
||||
<pre>
|
||||
PCRE2_ERROR_NULL
|
||||
</pre>
|
||||
|
@ -3271,7 +3327,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC42" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 04 April 2017
|
||||
Last updated: 11 April 2017
|
||||
<br>
|
||||
Copyright © 1997-2017 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -265,17 +265,41 @@ to the <b>configure</b> command. This setting has no effect on the
|
|||
(though the counting is done differently).
|
||||
</P>
|
||||
<P>
|
||||
In some environments it is desirable to limit the depth of nested backtracking
|
||||
in order to restrict the maximum amount of heap memory that is used. A second
|
||||
limit controls this; it defaults to the value that is set for
|
||||
--with-match-limit. You can set a lower default limit by adding, for example,
|
||||
The <b>pcre2_match()</b> function starts out using a 20K vector on the system
|
||||
stack to record backtracking points. The more nested backtracking points there
|
||||
are (that is, the deeper the search tree), the more memory is needed. If the
|
||||
initial vector is not large enough, heap memory is used, up to a certain limit,
|
||||
which is specified in kilobytes. The limit can be changed at run time, as
|
||||
described in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
documentation. The default limit (in effect unlimited) is 20 million. You can
|
||||
change this by a setting such as
|
||||
<pre>
|
||||
--with-heap-limit=500
|
||||
</pre>
|
||||
which limits the amount of heap to 500 kilobytes. This limit applies only to
|
||||
interpretive matching in pcre2_match(). It does not apply when JIT (which has
|
||||
its own memory arrangements) is used, nor does it apply to
|
||||
<b>pcre2_dfa_match()</b>.
|
||||
</P>
|
||||
<P>
|
||||
You can also explicitly limit the depth of nested backtracking in the
|
||||
<b>pcre2_match()</b> interpreter. This limit defaults to the value that is set
|
||||
for --with-match-limit. You can set a lower default limit by adding, for
|
||||
example,
|
||||
<pre>
|
||||
--with-match-limit_depth=10000
|
||||
</pre>
|
||||
to the <b>configure</b> command. This value can also be overridden at run time.
|
||||
As well as applying to <b>pcre2_match()</b>, this limit also controls the depth
|
||||
of recursive function calls in <b>pcre2_dfa_match()</b>. These are used for
|
||||
lookaround assertions, atomic groups, and recursion within patterns.
|
||||
to the <b>configure</b> command. This value can be overridden at run time. This
|
||||
depth limit indirectly limits the amount of heap memory that is used, but
|
||||
because the size of each backtracking "frame" depends on the number of
|
||||
capturing parentheses in a pattern, the amount of heap that is used before the
|
||||
limit is reached varies from pattern to pattern. This limit was more useful in
|
||||
versions before 10.30, where function recursion was used for backtracking.
|
||||
However, as well as applying to <b>pcre2_match()</b>, this limit also controls
|
||||
the depth of recursive function calls in <b>pcre2_dfa_match()</b>. These are
|
||||
used for lookaround assertions, atomic groups, and recursion within patterns.
|
||||
The limit does not apply to JIT matching.
|
||||
</P>
|
||||
<br><a name="SEC12" href="#TOC1">CREATING CHARACTER TABLES AT BUILD TIME</a><br>
|
||||
<P>
|
||||
|
@ -530,7 +554,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC25" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 31 March 2017
|
||||
Last updated: 10 April 2017
|
||||
<br>
|
||||
Copyright © 1997-2017 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -404,6 +404,10 @@ file name is followed by a colon; for context lines, a hyphen separator is used.
|
|||
If a line number is also being output, it follows the file name.
|
||||
</P>
|
||||
<P>
|
||||
<b>--heap-limit</b>=<i>number</i>
|
||||
See <b>--match-limit</b> below.
|
||||
</P>
|
||||
<P>
|
||||
<b>--help</b>
|
||||
Output a help message, giving brief details of the command options and file
|
||||
type support, and then exit. Anything else on the command line is
|
||||
|
@ -505,7 +509,7 @@ used. There is no short form for this option.
|
|||
<b>--match-limit</b>=<i>number</i>
|
||||
Processing some regular expression patterns may take a very long time to search
|
||||
for all possible matching strings. Others may require a very large amount of
|
||||
memory. There are two options that set resource limits for matching.
|
||||
memory. There are three options that set resource limits for matching.
|
||||
<br>
|
||||
<br>
|
||||
The <b>--match-limit</b> option provides a means of limiting computing resource
|
||||
|
@ -516,13 +520,24 @@ counter that is incremented each time around its main processing loop. If the
|
|||
value set by <b>--match-limit</b> is reached, an error occurs.
|
||||
<br>
|
||||
<br>
|
||||
The <b>--heap-limit</b> option specifies, as a number of kilobytes, the amount
|
||||
of heap memory that may be used for matching. Heap memory is needed only if
|
||||
matching the pattern requires a significant number of nested backtracking
|
||||
points to be remembered. This parameter can be set to zero to forbid the use of
|
||||
heap memory altogether.
|
||||
<br>
|
||||
<br>
|
||||
The <b>--depth-limit</b> option limits the depth of nested backtracking points,
|
||||
which in turn limits the amount of memory that is used. This limit is of use
|
||||
only if it is set smaller than <b>--match-limit</b>.
|
||||
which indirectly limits the amount of memory that is used. The amount of memory
|
||||
needed for each backtracking point depends on the number of capturing
|
||||
parentheses in the pattern, so the amount of memory that is used before this
|
||||
limit acts varies from pattern to pattern. This limit is of use only if it is
|
||||
set smaller than <b>--match-limit</b>.
|
||||
<br>
|
||||
<br>
|
||||
There are no short forms for these options. The default settings are specified
|
||||
when the PCRE2 library is compiled, with the default default being 10 million.
|
||||
when the PCRE2 library is compiled, with the default defaults being very large
|
||||
and so effectively unlimited.
|
||||
</P>
|
||||
<P>
|
||||
\fB--max-buffer-size=<i>number</i>
|
||||
|
@ -764,11 +779,12 @@ Many of the short and long forms of <b>pcre2grep</b>'s options are the same
|
|||
as in the GNU <b>grep</b> program. Any long option of the form
|
||||
<b>--xxx-regexp</b> (GNU terminology) is also available as <b>--xxx-regex</b>
|
||||
(PCRE2 terminology). However, the <b>--depth-limit</b>, <b>--file-list</b>,
|
||||
<b>--file-offsets</b>, <b>--include-dir</b>, <b>--line-offsets</b>,
|
||||
<b>--locale</b>, <b>--match-limit</b>, <b>-M</b>, <b>--multiline</b>, <b>-N</b>,
|
||||
<b>--newline</b>, <b>--om-separator</b>, <b>--output</b>, <b>-u</b>, and
|
||||
<b>--utf-8</b> options are specific to <b>pcre2grep</b>, as is the use of the
|
||||
<b>--only-matching</b> option with a capturing parentheses number.
|
||||
<b>--file-offsets</b>, <b>--heap-limit</b>, <b>--include-dir</b>,
|
||||
<b>--line-offsets</b>, <b>--locale</b>, <b>--match-limit</b>, <b>-M</b>,
|
||||
<b>--multiline</b>, <b>-N</b>, <b>--newline</b>, <b>--om-separator</b>,
|
||||
<b>--output</b>, <b>-u</b>, and <b>--utf-8</b> options are specific to
|
||||
<b>pcre2grep</b>, as is the use of the <b>--only-matching</b> option with a
|
||||
capturing parentheses number.
|
||||
</P>
|
||||
<P>
|
||||
Although most of the common options work the same way, a few are different in
|
||||
|
@ -891,9 +907,9 @@ there are more than 20 such errors, <b>pcre2grep</b> gives up.
|
|||
</P>
|
||||
<P>
|
||||
The <b>--match-limit</b> option of <b>pcre2grep</b> can be used to set the
|
||||
overall resource limit; there is a second option called <b>--depth-limit</b>
|
||||
that sets a limit on the amount of memory that is used (see the discussion of
|
||||
these options above).
|
||||
overall resource limit. There are also other limits that affect the amount of
|
||||
memory used during matching; see the discussion of <b>--heap-limit</b> and
|
||||
<b>--depth-limit</b> above.
|
||||
</P>
|
||||
<br><a name="SEC12" href="#TOC1">DIAGNOSTICS</a><br>
|
||||
<P>
|
||||
|
@ -918,7 +934,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC15" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 06 April 2017
|
||||
Last updated: 11 April 2017
|
||||
<br>
|
||||
Copyright © 1997-2017 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -170,14 +170,15 @@ the application to apply the JIT optimization by calling
|
|||
<b>pcre2_jit_compile()</b> is ignored.
|
||||
</P>
|
||||
<br><b>
|
||||
Setting match and backtracking depth limits
|
||||
Setting match resource limits
|
||||
</b><br>
|
||||
<P>
|
||||
The pcre2_match() function contains a counter that is incremented every time it
|
||||
goes round its main loop. The caller of <b>pcre2_match()</b> can set a limit on
|
||||
this counter, which therefore limits the amount of computing resource used for
|
||||
a match. The maximum depth of nested backtracking can also be limited, and this
|
||||
restricts the amount of heap memory that is used.
|
||||
a match. The maximum depth of nested backtracking can also be limited; this
|
||||
indirectly restricts the amount of heap memory that is used, but there is also
|
||||
an explicit memory limit that can be set.
|
||||
</P>
|
||||
<P>
|
||||
These facilities are provided to catch runaway matches that are provoked by
|
||||
|
@ -186,6 +187,7 @@ unlimited repeats applied to a long string that does not match). When one of
|
|||
these limits is reached, <b>pcre2_match()</b> gives an error return. The limits
|
||||
can also be set by items at the start of the pattern of the form
|
||||
<pre>
|
||||
(*LIMIT_HEAP=d)
|
||||
(*LIMIT_MATCH=d)
|
||||
(*LIMIT_DEPTH=d)
|
||||
</pre>
|
||||
|
@ -200,11 +202,13 @@ Prior to release 10.30, LIMIT_DEPTH was called LIMIT_RECURSION. This name is
|
|||
still recognized for backwards compatibility.
|
||||
</P>
|
||||
<P>
|
||||
The match limit is used (but in a different way) when JIT is being used, but it
|
||||
is not relevant, and is ignored, when matching with <b>pcre2_dfa_match()</b>.
|
||||
However, the depth limit is relevant for DFA matching, which uses function
|
||||
recursion for recursions within the pattern. In this case, the depth limit
|
||||
controls the amount of system stack that is used.
|
||||
The heap limit applies only when the <b>pcre2_match()</b> interpreter is used
|
||||
for matching. It does not apply to JIT or DFA matching. The match limit is used
|
||||
(but in a different way) when JIT is being used, but it is not relevant, and is
|
||||
ignored, when matching with <b>pcre2_dfa_match()</b>. The depth limit is ignored
|
||||
by JIT but is relevant for DFA matching, which uses function recursion for
|
||||
recursions within the pattern. In this case, the depth limit controls the
|
||||
amount of system stack that is used.
|
||||
<a name="newlines"></a></P>
|
||||
<br><b>
|
||||
Newline conventions
|
||||
|
@ -3434,7 +3438,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC30" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 03 April 2017
|
||||
Last updated: 11 April 2017
|
||||
<br>
|
||||
Copyright © 1997-2017 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -83,11 +83,12 @@ From release 10.30, the interpretive (non-JIT) version of <b>pcre2_match()</b>
|
|||
uses very little system stack at run time. In earlier releases recursive
|
||||
function calls could use a great deal of stack, and this could cause problems,
|
||||
but this usage has been eliminated. Backtracking positions are now explicitly
|
||||
remembered in memory frames controlled by the code. An initial 10K vector of
|
||||
frames is allocated on the system stack (enough for about 50 frames for small
|
||||
patterns), but if this is insufficient, heap memory is used. Rewriting patterns
|
||||
to be time-efficient, as described below, may also reduce the memory
|
||||
requirements.
|
||||
remembered in memory frames controlled by the code. An initial 20K vector of
|
||||
frames is allocated on the system stack (enough for about 100 frames for small
|
||||
patterns), but if this is insufficient, heap memory is used. The amount of heap
|
||||
memory can be limited; if the limit is set to zero, only the initial stack
|
||||
vector is used. Rewriting patterns to be time-efficient, as described below,
|
||||
may also reduce the memory requirements.
|
||||
</P>
|
||||
<P>
|
||||
In contrast to <b>pcre2_match()</b>, <b>pcre2_dfa_match()</b> does use recursive
|
||||
|
@ -243,7 +244,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 31 March 2017
|
||||
Last updated: 08 April 2017
|
||||
<br>
|
||||
Copyright © 1997-2017 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -235,6 +235,12 @@ Behave as if each pattern line has the <b>jit</b> modifier; after successful
|
|||
compilation, each pattern is passed to the just-in-time compiler, if available.
|
||||
</P>
|
||||
<P>
|
||||
<b>-jitverify</b>
|
||||
Behave as if each pattern line has the <b>jitverify</b> modifier; after
|
||||
successful compilation, each pattern is passed to the just-in-time compiler, if
|
||||
available, and the use of JIT is verified.
|
||||
</P>
|
||||
<P>
|
||||
\fB-pattern\fB <i>modifier-list</i>
|
||||
Behave as if each pattern line contains the given modifiers.
|
||||
</P>
|
||||
|
@ -1088,6 +1094,7 @@ pattern.
|
|||
get=<number or name> extract captured substring
|
||||
getall extract all captured substrings
|
||||
/g global global matching
|
||||
heap_limit=<n> set a limit on heap memory
|
||||
jitstack=<n> set size of JIT stack
|
||||
mark show mark values
|
||||
match_limit=<n> set a match limit
|
||||
|
@ -1330,11 +1337,11 @@ stack that is larger than the default 32K is necessary only for very
|
|||
complicated patterns.
|
||||
</P>
|
||||
<br><b>
|
||||
Setting match and depth limits
|
||||
Setting heap, match, and depth limits
|
||||
</b><br>
|
||||
<P>
|
||||
The <b>match_limit</b> and <b>depth_limit</b> modifiers set the appropriate
|
||||
limits in the match context. These values are ignored when the
|
||||
The <b>heap_limit</b>, <b>match_limit</b>, and <b>depth_limit</b> modifiers set
|
||||
the appropriate limits in the match context. These values are ignored when the
|
||||
<b>find_limits</b> modifier is specified.
|
||||
</P>
|
||||
<br><b>
|
||||
|
@ -1343,8 +1350,8 @@ Finding minimum limits
|
|||
<P>
|
||||
If the <b>find_limits</b> modifier is present on a subject line, <b>pcre2test</b>
|
||||
calls the relevant matching function several times, setting different values in
|
||||
the match context via <b>pcre2_set_match_limit()</b> or
|
||||
<b>pcre2_set_depth_limit()</b> until it finds the minimum values for each
|
||||
the match context via <b>pcre2_set_heap_limit(), \fBpcre2_set_match_limit()</b>,
|
||||
or <b>pcre2_set_depth_limit()</b> until it finds the minimum values for each
|
||||
parameter that allows the match to complete without error.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -1360,8 +1367,8 @@ increasing length of subject string.
|
|||
</P>
|
||||
<P>
|
||||
For non-DFA matching, the minimum <i>depth_limit</i> number is a measure of how
|
||||
much memory for recording backtracking points is needed to complete the match
|
||||
attempt. In the case of DFA matching, <i>depth_limit</i> controls the depth of
|
||||
much nested backtracking happens (that is, how deeply the pattern's tree is
|
||||
searched). In the case of DFA matching, <i>depth_limit</i> controls the depth of
|
||||
recursive calls of the internal function that is used for handling pattern
|
||||
recursion, lookaround assertions, and atomic groups.
|
||||
</P>
|
||||
|
@ -1800,7 +1807,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 04 April 2017
|
||||
Last updated: 11 April 2017
|
||||
<br>
|
||||
Copyright © 1997-2017 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -213,6 +213,9 @@ in the library.
|
|||
<tr><td><a href="pcre2_set_depth_limit.html">pcre2_set_depth_limit</a></td>
|
||||
<td> Set the match backtracking depth limit</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_heap_limit.html">pcre2_set_heap_limit</a></td>
|
||||
<td> Set the match backtracking heap limit</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_match_limit.html">pcre2_set_match_limit</a></td>
|
||||
<td> Set the match limit</td></tr>
|
||||
|
||||
|
|
160
doc/pcre2.txt
160
doc/pcre2.txt
|
@ -283,6 +283,9 @@ PCRE2 NATIVE API MATCH CONTEXT FUNCTIONS
|
|||
int pcre2_set_offset_limit(pcre2_match_context *mcontext,
|
||||
PCRE2_SIZE value);
|
||||
|
||||
int pcre2_set_heap_limit(pcre2_match_context *mcontext,
|
||||
uint32_t value);
|
||||
|
||||
int pcre2_set_match_limit(pcre2_match_context *mcontext,
|
||||
uint32_t value);
|
||||
|
||||
|
@ -840,6 +843,7 @@ PCRE2 CONTEXTS
|
|||
|
||||
Set up a callout function
|
||||
Set an offset limit for matching an unanchored pattern
|
||||
Change the limit on the amount of heap used when matching
|
||||
Change the backtracking match limit
|
||||
Change the backtracking depth limit
|
||||
Set custom memory management specifically for the match
|
||||
|
@ -896,14 +900,44 @@ PCRE2 CONTEXTS
|
|||
also within the offset limit. In other words, whichever limit comes
|
||||
first is used.
|
||||
|
||||
int pcre2_set_heap_limit(pcre2_match_context *mcontext,
|
||||
uint32_t value);
|
||||
|
||||
The heap_limit parameter specifies, in units of kilobytes, the maximum
|
||||
amount of heap memory that pcre2_match() may use to hold backtracking
|
||||
information when running an interpretive match. This limit does not
|
||||
apply to matching with the JIT optimization, which has its own memory
|
||||
control arrangements (see the pcre2jit documentation for more details),
|
||||
nor does it apply to pcre2_dfa_match(). If the limit is reached, the
|
||||
negative error code PCRE2_ERROR_HEAPLIMIT is returned. The default
|
||||
limit is set when PCRE2 is built; the default default is very large and
|
||||
is essentially "unlimited".
|
||||
|
||||
A value for the heap limit may also be supplied by an item at the start
|
||||
of a pattern of the form
|
||||
|
||||
(*LIMIT_HEAP=ddd)
|
||||
|
||||
where ddd is a decimal number. However, such a setting is ignored
|
||||
unless ddd is less than the limit set by the caller of pcre2_match()
|
||||
or, if no such limit is set, less than the default.
|
||||
|
||||
The pcre2_match() function starts out using a 20K vector on the system
|
||||
stack for recording backtracking points. The more nested backtracking
|
||||
points there are (that is, the deeper the search tree), the more memory
|
||||
is needed. Heap memory is used only if the initial vector is too
|
||||
small. If the heap limit is set to a value less than 21 (in particular,
|
||||
zero) no heap memory will be used. In this case, only patterns that do
|
||||
not have a lot of nested backtracking can be successfully processed.
|
||||
|
||||
int pcre2_set_match_limit(pcre2_match_context *mcontext,
|
||||
uint32_t value);
|
||||
|
||||
The match_limit parameter provides a means of preventing PCRE2 from
|
||||
using up too many resources when processing patterns that are not going
|
||||
to match, but which have a very large number of possibilities in their
|
||||
search trees. The classic example is a pattern that uses nested unlim-
|
||||
ited repeats.
|
||||
using up too many computing resources when processing patterns that are
|
||||
not going to match, but which have a very large number of possibilities
|
||||
in their search trees. The classic example is a pattern that uses
|
||||
nested unlimited repeats.
|
||||
|
||||
There is an internal counter in pcre2_match() that is incremented each
|
||||
time round its main matching loop. If this value reaches the match
|
||||
|
@ -938,15 +972,19 @@ PCRE2 CONTEXTS
|
|||
pcre2_match(). Each time a nested backtracking point is passed, a new
|
||||
memory "frame" is used to remember the state of matching at that point.
|
||||
Thus, this parameter indirectly limits the amount of memory that is
|
||||
used in a match.
|
||||
used in a match. However, because the size of each memory "frame"
|
||||
depends on the number of capturing parentheses, the actual memory limit
|
||||
varies from pattern to pattern. This limit was more useful in versions
|
||||
before 10.30, where function recursion was used for backtracking.
|
||||
|
||||
This limit is not relevant, and is ignored, when matching is done using
|
||||
JIT compiled code. However, it is supported by pcre2_dfa_match(), which
|
||||
uses it to limit the depth of internal recursive function calls that
|
||||
implement lookaround assertions and pattern recursions. This is, there-
|
||||
fore, an indirect limit on the amount of system stack that is used. A
|
||||
recursive pattern such as /(.)(?1)/, when matched to a very long string
|
||||
using pcre2_dfa_match(), can use a great deal of stack.
|
||||
The depth limit is not relevant, and is ignored, when matching is done
|
||||
using JIT compiled code. However, it is supported by pcre2_dfa_match(),
|
||||
which uses it to limit the depth of internal recursive function calls
|
||||
that implement atomic groups, lookaround assertions, and pattern recur-
|
||||
sions. This is, therefore, an indirect limit on the amount of system
|
||||
stack that is used. A recursive pattern such as /(.)(?1)/, when matched
|
||||
to a very long string using pcre2_dfa_match(), can use a great deal of
|
||||
stack.
|
||||
|
||||
The default value for the depth limit can be set when PCRE2 is built;
|
||||
the default default is the same value as the default for the match
|
||||
|
@ -999,6 +1037,12 @@ CHECKING BUILD-TIME OPTIONS
|
|||
recursions and lookarounds in pcre2_dfa_match(). Further details are
|
||||
given with pcre2_set_depth_limit() above.
|
||||
|
||||
PCRE2_CONFIG_HEAPLIMIT
|
||||
|
||||
The output is a uint32_t integer that gives, in kilobytes, the default
|
||||
limit for the amount of heap memory used by pcre2_match(). Further
|
||||
details are given with pcre2_set_heap_limit() above.
|
||||
|
||||
PCRE2_CONFIG_JIT
|
||||
|
||||
The output is a uint32_t integer that is set to one if support for
|
||||
|
@ -1803,6 +1847,14 @@ INFORMATION ABOUT A COMPILED PATTERN
|
|||
\r or \n or one of the equivalent hexadecimal or octal escape
|
||||
sequences.
|
||||
|
||||
PCRE2_INFO_HEAPLIMIT
|
||||
|
||||
If the pattern set a heap memory limit by including an item of the form
|
||||
(*LIMIT_HEAP=nnnn) at the start, the value is returned. The third argu-
|
||||
ment should point to an unsigned 32-bit integer. If no such value has
|
||||
been set, the call to pcre2_pattern_info() returns the error
|
||||
PCRE2_ERROR_UNSET.
|
||||
|
||||
PCRE2_INFO_JCHANGED
|
||||
|
||||
Return 1 if the (?J) or (?-J) option setting is used in the pattern,
|
||||
|
@ -2517,7 +2569,9 @@ ERROR RETURNS FROM pcre2_match()
|
|||
|
||||
If a pattern contains many nested backtracking points, heap memory is
|
||||
used to remember them. This error is given when the memory allocation
|
||||
function (default or custom) fails.
|
||||
function (default or custom) fails. Note that a different error,
|
||||
PCRE2_ERROR_HEAPLIMIT, is given if the amount of memory needed exceeds
|
||||
the heap limit.
|
||||
|
||||
PCRE2_ERROR_NULL
|
||||
|
||||
|
@ -3187,7 +3241,7 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 04 April 2017
|
||||
Last updated: 11 April 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
@ -3427,19 +3481,40 @@ LIMITING PCRE2 RESOURCE USAGE
|
|||
pcre2_dfa_match() matching function, but it does also limit JIT match-
|
||||
ing (though the counting is done differently).
|
||||
|
||||
In some environments it is desirable to limit the depth of nested back-
|
||||
tracking in order to restrict the maximum amount of heap memory that is
|
||||
used. A second limit controls this; it defaults to the value that is
|
||||
set for --with-match-limit. You can set a lower default limit by
|
||||
adding, for example,
|
||||
The pcre2_match() function starts out using a 20K vector on the system
|
||||
stack to record backtracking points. The more nested backtracking
|
||||
points there are (that is, the deeper the search tree), the more memory
|
||||
is needed. If the initial vector is not large enough, heap memory is
|
||||
used, up to a certain limit, which is specified in kilobytes. The limit
|
||||
can be changed at run time, as described in the pcre2api documentation.
|
||||
The default limit (in effect unlimited) is 20 million. You can change
|
||||
this by a setting such as
|
||||
|
||||
--with-heap-limit=500
|
||||
|
||||
which limits the amount of heap to 500 kilobytes. This limit applies
|
||||
only to interpretive matching in pcre2_match(). It does not apply when
|
||||
JIT (which has its own memory arrangements) is used, nor does it apply
|
||||
to pcre2_dfa_match().
|
||||
|
||||
You can also explicitly limit the depth of nested backtracking in the
|
||||
pcre2_match() interpreter. This limit defaults to the value that is set
|
||||
for --with-match-limit. You can set a lower default limit by adding,
|
||||
for example,
|
||||
|
||||
--with-match-limit_depth=10000
|
||||
|
||||
to the configure command. This value can also be overridden at run
|
||||
time. As well as applying to pcre2_match(), this limit also controls
|
||||
the depth of recursive function calls in pcre2_dfa_match(). These are
|
||||
used for lookaround assertions, atomic groups, and recursion within
|
||||
patterns.
|
||||
to the configure command. This value can be overridden at run time.
|
||||
This depth limit indirectly limits the amount of heap memory that is
|
||||
used, but because the size of each backtracking "frame" depends on the
|
||||
number of capturing parentheses in a pattern, the amount of heap that
|
||||
is used before the limit is reached varies from pattern to pattern.
|
||||
This limit was more useful in versions before 10.30, where function
|
||||
recursion was used for backtracking. However, as well as applying to
|
||||
pcre2_match(), this limit also controls the depth of recursive function
|
||||
calls in pcre2_dfa_match(). These are used for lookaround assertions,
|
||||
atomic groups, and recursion within patterns. The limit does not apply
|
||||
to JIT matching.
|
||||
|
||||
|
||||
CREATING CHARACTER TABLES AT BUILD TIME
|
||||
|
@ -3701,7 +3776,7 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 31 March 2017
|
||||
Last updated: 10 April 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
@ -5522,14 +5597,15 @@ SPECIAL START-OF-PATTERN ITEMS
|
|||
attempt by the application to apply the JIT optimization by calling
|
||||
pcre2_jit_compile() is ignored.
|
||||
|
||||
Setting match and backtracking depth limits
|
||||
Setting match resource limits
|
||||
|
||||
The pcre2_match() function contains a counter that is incremented every
|
||||
time it goes round its main loop. The caller of pcre2_match() can set a
|
||||
limit on this counter, which therefore limits the amount of computing
|
||||
resource used for a match. The maximum depth of nested backtracking can
|
||||
also be limited, and this restricts the amount of heap memory that is
|
||||
used.
|
||||
also be limited; this indirectly restricts the amount of heap memory
|
||||
that is used, but there is also an explicit memory limit that can be
|
||||
set.
|
||||
|
||||
These facilities are provided to catch runaway matches that are pro-
|
||||
voked by patterns with huge matching trees (a typical example is a pat-
|
||||
|
@ -5538,6 +5614,7 @@ SPECIAL START-OF-PATTERN ITEMS
|
|||
error return. The limits can also be set by items at the start of the
|
||||
pattern of the form
|
||||
|
||||
(*LIMIT_HEAP=d)
|
||||
(*LIMIT_MATCH=d)
|
||||
(*LIMIT_DEPTH=d)
|
||||
|
||||
|
@ -5551,12 +5628,13 @@ SPECIAL START-OF-PATTERN ITEMS
|
|||
Prior to release 10.30, LIMIT_DEPTH was called LIMIT_RECURSION. This
|
||||
name is still recognized for backwards compatibility.
|
||||
|
||||
The match limit is used (but in a different way) when JIT is being
|
||||
used, but it is not relevant, and is ignored, when matching with
|
||||
pcre2_dfa_match(). However, the depth limit is relevant for DFA match-
|
||||
ing, which uses function recursion for recursions within the pattern.
|
||||
In this case, the depth limit controls the amount of system stack that
|
||||
is used.
|
||||
The heap limit applies only when the pcre2_match() interpreter is used
|
||||
for matching. It does not apply to JIT or DFA matching. The match limit
|
||||
is used (but in a different way) when JIT is being used, but it is not
|
||||
relevant, and is ignored, when matching with pcre2_dfa_match(). The
|
||||
depth limit is ignored by JIT but is relevant for DFA matching, which
|
||||
uses function recursion for recursions within the pattern. In this
|
||||
case, the depth limit controls the amount of system stack that is used.
|
||||
|
||||
Newline conventions
|
||||
|
||||
|
@ -8480,7 +8558,7 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 03 April 2017
|
||||
Last updated: 11 April 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
@ -8557,10 +8635,12 @@ STACK AND HEAP USAGE AT RUN TIME
|
|||
sive function calls could use a great deal of stack, and this could
|
||||
cause problems, but this usage has been eliminated. Backtracking posi-
|
||||
tions are now explicitly remembered in memory frames controlled by the
|
||||
code. An initial 10K vector of frames is allocated on the system stack
|
||||
(enough for about 50 frames for small patterns), but if this is insuf-
|
||||
ficient, heap memory is used. Rewriting patterns to be time-efficient,
|
||||
as described below, may also reduce the memory requirements.
|
||||
code. An initial 20K vector of frames is allocated on the system stack
|
||||
(enough for about 100 frames for small patterns), but if this is insuf-
|
||||
ficient, heap memory is used. The amount of heap memory can be limited;
|
||||
if the limit is set to zero, only the initial stack vector is used.
|
||||
Rewriting patterns to be time-efficient, as described below, may also
|
||||
reduce the memory requirements.
|
||||
|
||||
In contrast to pcre2_match(), pcre2_dfa_match() does use recursive
|
||||
function calls, but only for processing atomic groups, lookaround
|
||||
|
@ -8706,7 +8786,7 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 31 March 2017
|
||||
Last updated: 08 April 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2_CONFIG 3 "24 March 2017" "PCRE2 10.30"
|
||||
.TH PCRE2_CONFIG 3 "11 April 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
@ -31,6 +31,7 @@ point to a uint32_t integer variable. The available codes are:
|
|||
PCRE2_CONFIG_BSR Indicates what \eR matches by default:
|
||||
PCRE2_BSR_UNICODE
|
||||
PCRE2_BSR_ANYCRLF
|
||||
PCRE2_CONFIG_HEAPLIMIT Default heap memory limit
|
||||
PCRE2_CONFIG_DEPTHLIMIT Default backtracking depth limit
|
||||
.\" JOIN
|
||||
PCRE2_CONFIG_JIT Availability of just-in-time compiler
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2_MATCH 3 "04 April 2017" "PCRE2 10.30"
|
||||
.TH PCRE2_MATCH 3 "11 April 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
@ -32,6 +32,7 @@ A match context is needed only if you want to:
|
|||
.sp
|
||||
Set up a callout function
|
||||
Set a matching offset limit
|
||||
Change the heap memory limit
|
||||
Change the backtracking match limit
|
||||
Change the backtracking depth limit
|
||||
Set custom memory management specifically for the match
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2_PATTERN_INFO 3 "25 March 2017" "PCRE2 10.30"
|
||||
.TH PCRE2_PATTERN_INFO 3 "11 April 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
@ -43,6 +43,9 @@ request are as follows:
|
|||
.\" JOIN
|
||||
PCRE2_INFO_HASCRORLF Return 1 if explicit CR or LF matches
|
||||
exist in the pattern
|
||||
.\" JOIN
|
||||
PCRE2_INFO_HEAPLIMIT Heap memory limit if set,
|
||||
otherwise PCRE2_ERROR_UNSET
|
||||
PCRE2_INFO_JCHANGED Return 1 if (?J) or (?-J) was used
|
||||
PCRE2_INFO_JITSIZE Size of JIT compiled code, or 0
|
||||
PCRE2_INFO_LASTCODETYPE Type of must-be-present information
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
.TH PCRE2_SET_DEPTH_LIMIT 3 "11 April 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.B #include <pcre2.h>
|
||||
.PP
|
||||
.nf
|
||||
.B int pcre2_set_heap_limit(pcre2_match_context *\fImcontext\fP,
|
||||
.B " uint32_t \fIvalue\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
.sp
|
||||
This function sets the backtracking heap limit field in a match context. The
|
||||
result is always zero.
|
||||
.P
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
.\" HREF
|
||||
\fBpcre2api\fP
|
||||
.\"
|
||||
page and a description of the POSIX API in the
|
||||
.\" HREF
|
||||
\fBpcre2posix\fP
|
||||
.\"
|
||||
page.
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2API 3 "04 April 2017" "PCRE2 10.30"
|
||||
.TH PCRE2API 3 "11 April 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.sp
|
||||
|
@ -123,6 +123,9 @@ document for an overview of all the PCRE2 documentation.
|
|||
.B int pcre2_set_offset_limit(pcre2_match_context *\fImcontext\fP,
|
||||
.B " PCRE2_SIZE \fIvalue\fP);"
|
||||
.sp
|
||||
.B int pcre2_set_heap_limit(pcre2_match_context *\fImcontext\fP,
|
||||
.B " uint32_t \fIvalue\fP);"
|
||||
.sp
|
||||
.B int pcre2_set_match_limit(pcre2_match_context *\fImcontext\fP,
|
||||
.B " uint32_t \fIvalue\fP);"
|
||||
.sp
|
||||
|
@ -753,6 +756,7 @@ A match context is required if you want to:
|
|||
.sp
|
||||
Set up a callout function
|
||||
Set an offset limit for matching an unanchored pattern
|
||||
Change the limit on the amount of heap used when matching
|
||||
Change the backtracking match limit
|
||||
Change the backtracking depth limit
|
||||
Set custom memory management specifically for the match
|
||||
|
@ -816,14 +820,49 @@ limit, a match must occur in the first line and also within the offset limit.
|
|||
In other words, whichever limit comes first is used.
|
||||
.sp
|
||||
.nf
|
||||
.B int pcre2_set_heap_limit(pcre2_match_context *\fImcontext\fP,
|
||||
.B " uint32_t \fIvalue\fP);"
|
||||
.fi
|
||||
.sp
|
||||
The \fIheap_limit\fP parameter specifies, in units of kilobytes, the maximum
|
||||
amount of heap memory that \fBpcre2_match()\fP may use to hold backtracking
|
||||
information when running an interpretive match. This limit does not apply to
|
||||
matching with the JIT optimization, which has its own memory control
|
||||
arrangements (see the
|
||||
.\" HREF
|
||||
\fBpcre2jit\fP
|
||||
.\"
|
||||
documentation for more details), nor does it apply to \fBpcre2_dfa_match()\fP.
|
||||
If the limit is reached, the negative error code PCRE2_ERROR_HEAPLIMIT is
|
||||
returned. The default limit is set when PCRE2 is built; the default default is
|
||||
very large and is essentially "unlimited".
|
||||
.P
|
||||
A value for the heap limit may also be supplied by an item at the start of a
|
||||
pattern of the form
|
||||
.sp
|
||||
(*LIMIT_HEAP=ddd)
|
||||
.sp
|
||||
where ddd is a decimal number. However, such a setting is ignored unless ddd is
|
||||
less than the limit set by the caller of \fBpcre2_match()\fP or, if no such
|
||||
limit is set, less than the default.
|
||||
.P
|
||||
The \fBpcre2_match()\fP function starts out using a 20K vector on the system
|
||||
stack for recording backtracking points. The more nested backtracking points
|
||||
there are (that is, the deeper the search tree), the more memory is needed.
|
||||
Heap memory is used only if the initial vector is too small. If the heap limit
|
||||
is set to a value less than 21 (in particular, zero) no heap memory will be
|
||||
used. In this case, only patterns that do not have a lot of nested backtracking
|
||||
can be successfully processed.
|
||||
.sp
|
||||
.nf
|
||||
.B int pcre2_set_match_limit(pcre2_match_context *\fImcontext\fP,
|
||||
.B " uint32_t \fIvalue\fP);"
|
||||
.fi
|
||||
.sp
|
||||
The \fImatch_limit\fP parameter provides a means of preventing PCRE2 from using
|
||||
up too many resources when processing patterns that are not going to match, but
|
||||
which have a very large number of possibilities in their search trees. The
|
||||
classic example is a pattern that uses nested unlimited repeats.
|
||||
up too many computing resources when processing patterns that are not going to
|
||||
match, but which have a very large number of possibilities in their search
|
||||
trees. The classic example is a pattern that uses nested unlimited repeats.
|
||||
.P
|
||||
There is an internal counter in \fBpcre2_match()\fP that is incremented each
|
||||
time round its main matching loop. If this value reaches the match limit,
|
||||
|
@ -859,15 +898,19 @@ limit is set, less than the default.
|
|||
This parameter limits the depth of nested backtracking in \fBpcre2_match()\fP.
|
||||
Each time a nested backtracking point is passed, a new memory "frame" is used
|
||||
to remember the state of matching at that point. Thus, this parameter
|
||||
indirectly limits the amount of memory that is used in a match.
|
||||
indirectly limits the amount of memory that is used in a match. However,
|
||||
because the size of each memory "frame" depends on the number of capturing
|
||||
parentheses, the actual memory limit varies from pattern to pattern. This limit
|
||||
was more useful in versions before 10.30, where function recursion was used for
|
||||
backtracking.
|
||||
.P
|
||||
This limit is not relevant, and is ignored, when matching is done using JIT
|
||||
compiled code. However, it is supported by \fBpcre2_dfa_match()\fP, which uses
|
||||
it to limit the depth of internal recursive function calls that implement
|
||||
lookaround assertions and pattern recursions. This is, therefore, an indirect
|
||||
limit on the amount of system stack that is used. A recursive pattern such as
|
||||
/(.)(?1)/, when matched to a very long string using \fBpcre2_dfa_match()\fP,
|
||||
can use a great deal of stack.
|
||||
The depth limit is not relevant, and is ignored, when matching is done using
|
||||
JIT compiled code. However, it is supported by \fBpcre2_dfa_match()\fP, which
|
||||
uses it to limit the depth of internal recursive function calls that implement
|
||||
atomic groups, lookaround assertions, and pattern recursions. This is,
|
||||
therefore, an indirect limit on the amount of system stack that is used. A
|
||||
recursive pattern such as /(.)(?1)/, when matched to a very long string using
|
||||
\fBpcre2_dfa_match()\fP, can use a great deal of stack.
|
||||
.P
|
||||
The default value for the depth limit can be set when PCRE2 is built; the
|
||||
default default is the same value as the default for the match limit. If the
|
||||
|
@ -921,6 +964,12 @@ The output is a uint32_t integer that gives the default limit for the depth of
|
|||
nested backtracking in \fBpcre2_match()\fP or the depth of nested recursions
|
||||
and lookarounds in \fBpcre2_dfa_match()\fP. Further details are given with
|
||||
\fBpcre2_set_depth_limit()\fP above.
|
||||
.sp
|
||||
PCRE2_CONFIG_HEAPLIMIT
|
||||
.sp
|
||||
The output is a uint32_t integer that gives, in kilobytes, the default limit
|
||||
for the amount of heap memory used by \fBpcre2_match()\fP. Further details are
|
||||
given with \fBpcre2_set_heap_limit()\fP above.
|
||||
.sp
|
||||
PCRE2_CONFIG_JIT
|
||||
.sp
|
||||
|
@ -1784,6 +1833,13 @@ Return 1 if the pattern contains any explicit matches for CR or LF characters,
|
|||
otherwise 0. The third argument should point to an \fBuint32_t\fP variable. An
|
||||
explicit match is either a literal CR or LF character, or \er or \en or one of
|
||||
the equivalent hexadecimal or octal escape sequences.
|
||||
.sp
|
||||
PCRE2_INFO_HEAPLIMIT
|
||||
.sp
|
||||
If the pattern set a heap memory limit by including an item of the form
|
||||
(*LIMIT_HEAP=nnnn) at the start, the value is returned. The third argument
|
||||
should point to an unsigned 32-bit integer. If no such value has been set, the
|
||||
call to \fBpcre2_pattern_info()\fP returns the error PCRE2_ERROR_UNSET.
|
||||
.sp
|
||||
PCRE2_INFO_JCHANGED
|
||||
.sp
|
||||
|
@ -2603,7 +2659,8 @@ The backtracking match limit was reached.
|
|||
.sp
|
||||
If a pattern contains many nested backtracking points, heap memory is used to
|
||||
remember them. This error is given when the memory allocation function (default
|
||||
or custom) fails.
|
||||
or custom) fails. Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given
|
||||
if the amount of memory needed exceeds the heap limit.
|
||||
.sp
|
||||
PCRE2_ERROR_NULL
|
||||
.sp
|
||||
|
@ -3322,6 +3379,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 04 April 2017
|
||||
Last updated: 11 April 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2BUILD 3 "31 March 2017" "PCRE2 10.30"
|
||||
.TH PCRE2BUILD 3 "10 April 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.
|
||||
|
@ -260,17 +260,42 @@ to the \fBconfigure\fP command. This setting has no effect on the
|
|||
\fBpcre2_dfa_match()\fP matching function, but it does also limit JIT matching
|
||||
(though the counting is done differently).
|
||||
.P
|
||||
In some environments it is desirable to limit the depth of nested backtracking
|
||||
in order to restrict the maximum amount of heap memory that is used. A second
|
||||
limit controls this; it defaults to the value that is set for
|
||||
--with-match-limit. You can set a lower default limit by adding, for example,
|
||||
The \fBpcre2_match()\fP function starts out using a 20K vector on the system
|
||||
stack to record backtracking points. The more nested backtracking points there
|
||||
are (that is, the deeper the search tree), the more memory is needed. If the
|
||||
initial vector is not large enough, heap memory is used, up to a certain limit,
|
||||
which is specified in kilobytes. The limit can be changed at run time, as
|
||||
described in the
|
||||
.\" HREF
|
||||
\fBpcre2api\fP
|
||||
.\"
|
||||
documentation. The default limit (in effect unlimited) is 20 million. You can
|
||||
change this by a setting such as
|
||||
.sp
|
||||
--with-heap-limit=500
|
||||
.sp
|
||||
which limits the amount of heap to 500 kilobytes. This limit applies only to
|
||||
interpretive matching in pcre2_match(). It does not apply when JIT (which has
|
||||
its own memory arrangements) is used, nor does it apply to
|
||||
\fBpcre2_dfa_match()\fP.
|
||||
.P
|
||||
You can also explicitly limit the depth of nested backtracking in the
|
||||
\fBpcre2_match()\fP interpreter. This limit defaults to the value that is set
|
||||
for --with-match-limit. You can set a lower default limit by adding, for
|
||||
example,
|
||||
.sp
|
||||
--with-match-limit_depth=10000
|
||||
.sp
|
||||
to the \fBconfigure\fP command. This value can also be overridden at run time.
|
||||
As well as applying to \fBpcre2_match()\fP, this limit also controls the depth
|
||||
of recursive function calls in \fBpcre2_dfa_match()\fP. These are used for
|
||||
lookaround assertions, atomic groups, and recursion within patterns.
|
||||
to the \fBconfigure\fP command. This value can be overridden at run time. This
|
||||
depth limit indirectly limits the amount of heap memory that is used, but
|
||||
because the size of each backtracking "frame" depends on the number of
|
||||
capturing parentheses in a pattern, the amount of heap that is used before the
|
||||
limit is reached varies from pattern to pattern. This limit was more useful in
|
||||
versions before 10.30, where function recursion was used for backtracking.
|
||||
However, as well as applying to \fBpcre2_match()\fP, this limit also controls
|
||||
the depth of recursive function calls in \fBpcre2_dfa_match()\fP. These are
|
||||
used for lookaround assertions, atomic groups, and recursion within patterns.
|
||||
The limit does not apply to JIT matching.
|
||||
.
|
||||
.
|
||||
.SH "CREATING CHARACTER TABLES AT BUILD TIME"
|
||||
|
@ -547,6 +572,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 31 March 2017
|
||||
Last updated: 10 April 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2GREP 1 "06 April 2017" "PCRE2 10.30"
|
||||
.TH PCRE2GREP 1 "11 April 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
pcre2grep - a grep with Perl-compatible regular expressions.
|
||||
.SH SYNOPSIS
|
||||
|
@ -347,6 +347,9 @@ file names are shown when multiple files are searched. For matching lines, the
|
|||
file name is followed by a colon; for context lines, a hyphen separator is used.
|
||||
If a line number is also being output, it follows the file name.
|
||||
.TP
|
||||
\fB--heap-limit\fP=\fInumber\fP
|
||||
See \fB--match-limit\fP below.
|
||||
.TP
|
||||
\fB--help\fP
|
||||
Output a help message, giving brief details of the command options and file
|
||||
type support, and then exit. Anything else on the command line is
|
||||
|
@ -436,7 +439,7 @@ used. There is no short form for this option.
|
|||
\fB--match-limit\fP=\fInumber\fP
|
||||
Processing some regular expression patterns may take a very long time to search
|
||||
for all possible matching strings. Others may require a very large amount of
|
||||
memory. There are two options that set resource limits for matching.
|
||||
memory. There are three options that set resource limits for matching.
|
||||
.sp
|
||||
The \fB--match-limit\fP option provides a means of limiting computing resource
|
||||
usage when processing patterns that are not going to match, but which have a
|
||||
|
@ -445,12 +448,22 @@ is a pattern that uses nested unlimited repeats. Internally, PCRE2 has a
|
|||
counter that is incremented each time around its main processing loop. If the
|
||||
value set by \fB--match-limit\fP is reached, an error occurs.
|
||||
.sp
|
||||
The \fB--heap-limit\fP option specifies, as a number of kilobytes, the amount
|
||||
of heap memory that may be used for matching. Heap memory is needed only if
|
||||
matching the pattern requires a significant number of nested backtracking
|
||||
points to be remembered. This parameter can be set to zero to forbid the use of
|
||||
heap memory altogether.
|
||||
.sp
|
||||
The \fB--depth-limit\fP option limits the depth of nested backtracking points,
|
||||
which in turn limits the amount of memory that is used. This limit is of use
|
||||
only if it is set smaller than \fB--match-limit\fP.
|
||||
which indirectly limits the amount of memory that is used. The amount of memory
|
||||
needed for each backtracking point depends on the number of capturing
|
||||
parentheses in the pattern, so the amount of memory that is used before this
|
||||
limit acts varies from pattern to pattern. This limit is of use only if it is
|
||||
set smaller than \fB--match-limit\fP.
|
||||
.sp
|
||||
There are no short forms for these options. The default settings are specified
|
||||
when the PCRE2 library is compiled, with the default default being 10 million.
|
||||
when the PCRE2 library is compiled, with the default defaults being very large
|
||||
and so effectively unlimited.
|
||||
.TP
|
||||
\fB--max-buffer-size=\fInumber\fP
|
||||
This limits the expansion of the processing buffer, whose initial size can be
|
||||
|
@ -670,11 +683,12 @@ Many of the short and long forms of \fBpcre2grep\fP's options are the same
|
|||
as in the GNU \fBgrep\fP program. Any long option of the form
|
||||
\fB--xxx-regexp\fP (GNU terminology) is also available as \fB--xxx-regex\fP
|
||||
(PCRE2 terminology). However, the \fB--depth-limit\fP, \fB--file-list\fP,
|
||||
\fB--file-offsets\fP, \fB--include-dir\fP, \fB--line-offsets\fP,
|
||||
\fB--locale\fP, \fB--match-limit\fP, \fB-M\fP, \fB--multiline\fP, \fB-N\fP,
|
||||
\fB--newline\fP, \fB--om-separator\fP, \fB--output\fP, \fB-u\fP, and
|
||||
\fB--utf-8\fP options are specific to \fBpcre2grep\fP, as is the use of the
|
||||
\fB--only-matching\fP option with a capturing parentheses number.
|
||||
\fB--file-offsets\fP, \fB--heap-limit\fP, \fB--include-dir\fP,
|
||||
\fB--line-offsets\fP, \fB--locale\fP, \fB--match-limit\fP, \fB-M\fP,
|
||||
\fB--multiline\fP, \fB-N\fP, \fB--newline\fP, \fB--om-separator\fP,
|
||||
\fB--output\fP, \fB-u\fP, and \fB--utf-8\fP options are specific to
|
||||
\fBpcre2grep\fP, as is the use of the \fB--only-matching\fP option with a
|
||||
capturing parentheses number.
|
||||
.P
|
||||
Although most of the common options work the same way, a few are different in
|
||||
\fBpcre2grep\fP. For example, the \fB--include\fP option's argument is a glob
|
||||
|
@ -799,9 +813,9 @@ message and the line that caused the problem to the standard error stream. If
|
|||
there are more than 20 such errors, \fBpcre2grep\fP gives up.
|
||||
.P
|
||||
The \fB--match-limit\fP option of \fBpcre2grep\fP can be used to set the
|
||||
overall resource limit; there is a second option called \fB--depth-limit\fP
|
||||
that sets a limit on the amount of memory that is used (see the discussion of
|
||||
these options above).
|
||||
overall resource limit. There are also other limits that affect the amount of
|
||||
memory used during matching; see the discussion of \fB--heap-limit\fP and
|
||||
\fB--depth-limit\fP above.
|
||||
.
|
||||
.
|
||||
.SH DIAGNOSTICS
|
||||
|
@ -834,6 +848,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 06 April 2017
|
||||
Last updated: 11 April 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -383,6 +383,9 @@ OPTIONS
|
|||
colon; for context lines, a hyphen separator is used. If a
|
||||
line number is also being output, it follows the file name.
|
||||
|
||||
--heap-limit=number
|
||||
See --match-limit below.
|
||||
|
||||
--help Output a help message, giving brief details of the command
|
||||
options and file type support, and then exit. Anything else
|
||||
on the command line is ignored.
|
||||
|
@ -482,7 +485,7 @@ OPTIONS
|
|||
--match-limit=number
|
||||
Processing some regular expression patterns may take a very
|
||||
long time to search for all possible matching strings. Others
|
||||
may require a very large amount of memory. There are two
|
||||
may require a very large amount of memory. There are three
|
||||
options that set resource limits for matching.
|
||||
|
||||
The --match-limit option provides a means of limiting comput-
|
||||
|
@ -494,14 +497,25 @@ OPTIONS
|
|||
processing loop. If the value set by --match-limit is
|
||||
reached, an error occurs.
|
||||
|
||||
The --heap-limit option specifies, as a number of kilobytes,
|
||||
the amount of heap memory that may be used for matching. Heap
|
||||
memory is needed only if matching the pattern requires a sig-
|
||||
nificant number of nested backtracking points to be remem-
|
||||
bered. This parameter can be set to zero to forbid the use of
|
||||
heap memory altogether.
|
||||
|
||||
The --depth-limit option limits the depth of nested back-
|
||||
tracking points, which in turn limits the amount of memory
|
||||
that is used. This limit is of use only if it is set smaller
|
||||
than --match-limit.
|
||||
tracking points, which indirectly limits the amount of memory
|
||||
that is used. The amount of memory needed for each backtrack-
|
||||
ing point depends on the number of capturing parentheses in
|
||||
the pattern, so the amount of memory that is used before this
|
||||
limit acts varies from pattern to pattern. This limit is of
|
||||
use only if it is set smaller than --match-limit.
|
||||
|
||||
There are no short forms for these options. The default set-
|
||||
tings are specified when the PCRE2 library is compiled, with
|
||||
the default default being 10 million.
|
||||
the default defaults being very large and so effectively
|
||||
unlimited.
|
||||
|
||||
--max-buffer-size=number
|
||||
This limits the expansion of the processing buffer, whose
|
||||
|
@ -748,11 +762,11 @@ OPTIONS COMPATIBILITY
|
|||
Many of the short and long forms of pcre2grep's options are the same as
|
||||
in the GNU grep program. Any long option of the form --xxx-regexp (GNU
|
||||
terminology) is also available as --xxx-regex (PCRE2 terminology). How-
|
||||
ever, the --depth-limit, --file-list, --file-offsets, --include-dir,
|
||||
--line-offsets, --locale, --match-limit, -M, --multiline, -N, --new-
|
||||
line, --om-separator, --output, -u, and --utf-8 options are specific to
|
||||
pcre2grep, as is the use of the --only-matching option with a capturing
|
||||
parentheses number.
|
||||
ever, the --depth-limit, --file-list, --file-offsets, --heap-limit,
|
||||
--include-dir, --line-offsets, --locale, --match-limit, -M, --multi-
|
||||
line, -N, --newline, --om-separator, --output, -u, and --utf-8 options
|
||||
are specific to pcre2grep, as is the use of the --only-matching option
|
||||
with a capturing parentheses number.
|
||||
|
||||
Although most of the common options work the same way, a few are dif-
|
||||
ferent in pcre2grep. For example, the --include option's argument is a
|
||||
|
@ -873,9 +887,9 @@ MATCHING ERRORS
|
|||
such errors, pcre2grep gives up.
|
||||
|
||||
The --match-limit option of pcre2grep can be used to set the overall
|
||||
resource limit; there is a second option called --depth-limit that sets
|
||||
a limit on the amount of memory that is used (see the discussion of
|
||||
these options above).
|
||||
resource limit. There are also other limits that affect the amount of
|
||||
memory used during matching; see the discussion of --heap-limit and
|
||||
--depth-limit above.
|
||||
|
||||
|
||||
DIAGNOSTICS
|
||||
|
@ -901,5 +915,5 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 06 April 2017
|
||||
Last updated: 11 April 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2PATTERN 3 "03 April 2017" "PCRE2 10.30"
|
||||
.TH PCRE2PATTERN 3 "11 April 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH "PCRE2 REGULAR EXPRESSION DETAILS"
|
||||
|
@ -138,14 +138,15 @@ the application to apply the JIT optimization by calling
|
|||
\fBpcre2_jit_compile()\fP is ignored.
|
||||
.
|
||||
.
|
||||
.SS "Setting match and backtracking depth limits"
|
||||
.SS "Setting match resource limits"
|
||||
.rs
|
||||
.sp
|
||||
The pcre2_match() function contains a counter that is incremented every time it
|
||||
goes round its main loop. The caller of \fBpcre2_match()\fP can set a limit on
|
||||
this counter, which therefore limits the amount of computing resource used for
|
||||
a match. The maximum depth of nested backtracking can also be limited, and this
|
||||
restricts the amount of heap memory that is used.
|
||||
a match. The maximum depth of nested backtracking can also be limited; this
|
||||
indirectly restricts the amount of heap memory that is used, but there is also
|
||||
an explicit memory limit that can be set.
|
||||
.P
|
||||
These facilities are provided to catch runaway matches that are provoked by
|
||||
patterns with huge matching trees (a typical example is a pattern with nested
|
||||
|
@ -153,6 +154,7 @@ unlimited repeats applied to a long string that does not match). When one of
|
|||
these limits is reached, \fBpcre2_match()\fP gives an error return. The limits
|
||||
can also be set by items at the start of the pattern of the form
|
||||
.sp
|
||||
(*LIMIT_HEAP=d)
|
||||
(*LIMIT_MATCH=d)
|
||||
(*LIMIT_DEPTH=d)
|
||||
.sp
|
||||
|
@ -165,11 +167,13 @@ setting of one of these limits, the lower value is used.
|
|||
Prior to release 10.30, LIMIT_DEPTH was called LIMIT_RECURSION. This name is
|
||||
still recognized for backwards compatibility.
|
||||
.P
|
||||
The match limit is used (but in a different way) when JIT is being used, but it
|
||||
is not relevant, and is ignored, when matching with \fBpcre2_dfa_match()\fP.
|
||||
However, the depth limit is relevant for DFA matching, which uses function
|
||||
recursion for recursions within the pattern. In this case, the depth limit
|
||||
controls the amount of system stack that is used.
|
||||
The heap limit applies only when the \fBpcre2_match()\fP interpreter is used
|
||||
for matching. It does not apply to JIT or DFA matching. The match limit is used
|
||||
(but in a different way) when JIT is being used, but it is not relevant, and is
|
||||
ignored, when matching with \fBpcre2_dfa_match()\fP. The depth limit is ignored
|
||||
by JIT but is relevant for DFA matching, which uses function recursion for
|
||||
recursions within the pattern. In this case, the depth limit controls the
|
||||
amount of system stack that is used.
|
||||
.
|
||||
.
|
||||
.\" HTML <a name="newlines"></a>
|
||||
|
@ -3465,6 +3469,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 03 April 2017
|
||||
Last updated: 11 April 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2PERFORM 3 "31 March 2017" "PCRE2 10.30"
|
||||
.TH PCRE2PERFORM 3 "08 April 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH "PCRE2 PERFORMANCE"
|
||||
|
@ -69,11 +69,12 @@ From release 10.30, the interpretive (non-JIT) version of \fBpcre2_match()\fP
|
|||
uses very little system stack at run time. In earlier releases recursive
|
||||
function calls could use a great deal of stack, and this could cause problems,
|
||||
but this usage has been eliminated. Backtracking positions are now explicitly
|
||||
remembered in memory frames controlled by the code. An initial 10K vector of
|
||||
frames is allocated on the system stack (enough for about 50 frames for small
|
||||
patterns), but if this is insufficient, heap memory is used. Rewriting patterns
|
||||
to be time-efficient, as described below, may also reduce the memory
|
||||
requirements.
|
||||
remembered in memory frames controlled by the code. An initial 20K vector of
|
||||
frames is allocated on the system stack (enough for about 100 frames for small
|
||||
patterns), but if this is insufficient, heap memory is used. The amount of heap
|
||||
memory can be limited; if the limit is set to zero, only the initial stack
|
||||
vector is used. Rewriting patterns to be time-efficient, as described below,
|
||||
may also reduce the memory requirements.
|
||||
.P
|
||||
In contrast to \fBpcre2_match()\fP, \fBpcre2_dfa_match()\fP does use recursive
|
||||
function calls, but only for processing atomic groups, lookaround assertions,
|
||||
|
@ -231,6 +232,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 31 March 2017
|
||||
Last updated: 08 April 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2TEST 1 "08 April 2017" "PCRE 10.30"
|
||||
.TH PCRE2TEST 1 "11 April 2017" "PCRE 10.30"
|
||||
.SH NAME
|
||||
pcre2test - a program for testing Perl-compatible regular expressions.
|
||||
.SH SYNOPSIS
|
||||
|
@ -1063,6 +1063,7 @@ pattern.
|
|||
get=<number or name> extract captured substring
|
||||
getall extract all captured substrings
|
||||
/g global global matching
|
||||
heap_limit=<n> set a limit on heap memory
|
||||
jitstack=<n> set size of JIT stack
|
||||
mark show mark values
|
||||
match_limit=<n> set a match limit
|
||||
|
@ -1293,11 +1294,11 @@ stack that is larger than the default 32K is necessary only for very
|
|||
complicated patterns.
|
||||
.
|
||||
.
|
||||
.SS "Setting match and depth limits"
|
||||
.SS "Setting heap, match, and depth limits"
|
||||
.rs
|
||||
.sp
|
||||
The \fBmatch_limit\fP and \fBdepth_limit\fP modifiers set the appropriate
|
||||
limits in the match context. These values are ignored when the
|
||||
The \fBheap_limit\fP, \fBmatch_limit\fP, and \fBdepth_limit\fP modifiers set
|
||||
the appropriate limits in the match context. These values are ignored when the
|
||||
\fBfind_limits\fP modifier is specified.
|
||||
.
|
||||
.
|
||||
|
@ -1306,8 +1307,8 @@ limits in the match context. These values are ignored when the
|
|||
.sp
|
||||
If the \fBfind_limits\fP modifier is present on a subject line, \fBpcre2test\fP
|
||||
calls the relevant matching function several times, setting different values in
|
||||
the match context via \fBpcre2_set_match_limit()\fP or
|
||||
\fBpcre2_set_depth_limit()\fP until it finds the minimum values for each
|
||||
the match context via \fBpcre2_set_heap_limit(), \fBpcre2_set_match_limit()\fP,
|
||||
or \fBpcre2_set_depth_limit()\fP until it finds the minimum values for each
|
||||
parameter that allows the match to complete without error.
|
||||
.P
|
||||
If JIT is being used, only the match limit is relevant. If DFA matching is
|
||||
|
@ -1320,8 +1321,8 @@ numbers of matching possibilities, it can become large very quickly with
|
|||
increasing length of subject string.
|
||||
.P
|
||||
For non-DFA matching, the minimum \fIdepth_limit\fP number is a measure of how
|
||||
much memory for recording backtracking points is needed to complete the match
|
||||
attempt. In the case of DFA matching, \fIdepth_limit\fP controls the depth of
|
||||
much nested backtracking happens (that is, how deeply the pattern's tree is
|
||||
searched). In the case of DFA matching, \fIdepth_limit\fP controls the depth of
|
||||
recursive calls of the internal function that is used for handling pattern
|
||||
recursion, lookaround assertions, and atomic groups.
|
||||
.
|
||||
|
@ -1782,6 +1783,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 08 April 2017
|
||||
Last updated: 11 April 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -185,6 +185,12 @@ COMMAND LINE OPTIONS
|
|||
successful compilation, each pattern is passed to the just-
|
||||
in-time compiler, if available.
|
||||
|
||||
-jitverify
|
||||
Behave as if each pattern line has the jitverify modifier;
|
||||
after successful compilation, each pattern is passed to the
|
||||
just-in-time compiler, if available, and the use of JIT is
|
||||
verified.
|
||||
|
||||
-pattern modifier-list
|
||||
Behave as if each pattern line contains the given modifiers.
|
||||
|
||||
|
@ -972,6 +978,7 @@ SUBJECT MODIFIERS
|
|||
get=<number or name> extract captured substring
|
||||
getall extract all captured substrings
|
||||
/g global global matching
|
||||
heap_limit=<n> set a limit on heap memory
|
||||
jitstack=<n> set size of JIT stack
|
||||
mark show mark values
|
||||
match_limit=<n> set a match limit
|
||||
|
@ -1196,19 +1203,20 @@ SUBJECT MODIFIERS
|
|||
Providing a stack that is larger than the default 32K is necessary only
|
||||
for very complicated patterns.
|
||||
|
||||
Setting match and depth limits
|
||||
Setting heap, match, and depth limits
|
||||
|
||||
The match_limit and depth_limit modifiers set the appropriate limits in
|
||||
the match context. These values are ignored when the find_limits modi-
|
||||
fier is specified.
|
||||
The heap_limit, match_limit, and depth_limit modifiers set the appro-
|
||||
priate limits in the match context. These values are ignored when the
|
||||
find_limits modifier is specified.
|
||||
|
||||
Finding minimum limits
|
||||
|
||||
If the find_limits modifier is present on a subject line, pcre2test
|
||||
calls the relevant matching function several times, setting different
|
||||
values in the match context via pcre2_set_match_limit() or
|
||||
pcre2_set_depth_limit() until it finds the minimum values for each
|
||||
parameter that allows the match to complete without error.
|
||||
values in the match context via pcre2_set_heap_limit(),
|
||||
pcre2_set_match_limit(), or pcre2_set_depth_limit() until it finds the
|
||||
minimum values for each parameter that allows the match to complete
|
||||
without error.
|
||||
|
||||
If JIT is being used, only the match limit is relevant. If DFA matching
|
||||
is being used, only the depth limit is relevant.
|
||||
|
@ -1220,8 +1228,8 @@ SUBJECT MODIFIERS
|
|||
quickly with increasing length of subject string.
|
||||
|
||||
For non-DFA matching, the minimum depth_limit number is a measure of
|
||||
how much memory for recording backtracking points is needed to complete
|
||||
the match attempt. In the case of DFA matching, depth_limit controls
|
||||
how much nested backtracking happens (that is, how deeply the pattern's
|
||||
tree is searched). In the case of DFA matching, depth_limit controls
|
||||
the depth of recursive calls of the internal function that is used for
|
||||
handling pattern recursion, lookaround assertions, and atomic groups.
|
||||
|
||||
|
@ -1632,5 +1640,5 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 04 April 2017
|
||||
Last updated: 11 April 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
|
|
|
@ -132,6 +132,10 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
/* Define to 1 if you have the <zlib.h> header file. */
|
||||
#undef HAVE_ZLIB_H
|
||||
|
||||
/* This limits the amount of memory that pcre2_match() may use while matching
|
||||
a pattern. The value is in kilobytes. */
|
||||
#undef HEAP_LIMIT
|
||||
|
||||
/* The value of LINK_SIZE determines the number of bytes used to store links
|
||||
as offsets within the compiled regex. The default is 2, which allows for
|
||||
compiled patterns up to 64K long. This covers the vast majority of cases.
|
||||
|
@ -143,7 +147,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#undef LT_OBJDIR
|
||||
|
||||
/* The value of MATCH_LIMIT determines the default number of times the
|
||||
internal match() function can record a backtrack position during a single
|
||||
pcre2_match() function can record a backtrack position during a single
|
||||
matching attempt. There is a runtime interface for setting a different
|
||||
limit. The limit exists in order to catch runaway regular expressions that
|
||||
take for ever to determine that they do not match. The default is set very
|
||||
|
|
|
@ -268,6 +268,7 @@ numbers must not be changed. */
|
|||
#define PCRE2_ERROR_BADSUBSPATTERN (-60)
|
||||
#define PCRE2_ERROR_TOOMANYREPLACE (-61)
|
||||
#define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
|
||||
#define PCRE2_ERROR_HEAPLIMIT (-63)
|
||||
|
||||
/* Request types for pcre2_pattern_info() */
|
||||
|
||||
|
@ -297,6 +298,7 @@ numbers must not be changed. */
|
|||
#define PCRE2_INFO_SIZE 22
|
||||
#define PCRE2_INFO_HASBACKSLASHC 23
|
||||
#define PCRE2_INFO_FRAMESIZE 24
|
||||
#define PCRE2_INFO_HEAPLIMIT 25
|
||||
|
||||
/* Request types for pcre2_config(). */
|
||||
|
||||
|
@ -313,6 +315,7 @@ numbers must not be changed. */
|
|||
#define PCRE2_CONFIG_UNICODE 9
|
||||
#define PCRE2_CONFIG_UNICODE_VERSION 10
|
||||
#define PCRE2_CONFIG_VERSION 11
|
||||
#define PCRE2_CONFIG_HEAPLIMIT 12
|
||||
|
||||
/* Types for code units in patterns and subject strings. */
|
||||
|
||||
|
@ -452,6 +455,8 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
|||
int (*)(pcre2_callout_block *, void *), void *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_heap_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_match_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
|
@ -676,6 +681,7 @@ pcre2_compile are called by application code. */
|
|||
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
|
||||
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
|
||||
#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
|
||||
#define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_)
|
||||
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
|
||||
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
|
||||
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
|
||||
|
|
|
@ -268,6 +268,7 @@ numbers must not be changed. */
|
|||
#define PCRE2_ERROR_BADSUBSPATTERN (-60)
|
||||
#define PCRE2_ERROR_TOOMANYREPLACE (-61)
|
||||
#define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
|
||||
#define PCRE2_ERROR_HEAPLIMIT (-63)
|
||||
|
||||
/* Request types for pcre2_pattern_info() */
|
||||
|
||||
|
@ -297,6 +298,7 @@ numbers must not be changed. */
|
|||
#define PCRE2_INFO_SIZE 22
|
||||
#define PCRE2_INFO_HASBACKSLASHC 23
|
||||
#define PCRE2_INFO_FRAMESIZE 24
|
||||
#define PCRE2_INFO_HEAPLIMIT 25
|
||||
|
||||
/* Request types for pcre2_config(). */
|
||||
|
||||
|
@ -313,6 +315,7 @@ numbers must not be changed. */
|
|||
#define PCRE2_CONFIG_UNICODE 9
|
||||
#define PCRE2_CONFIG_UNICODE_VERSION 10
|
||||
#define PCRE2_CONFIG_VERSION 11
|
||||
#define PCRE2_CONFIG_HEAPLIMIT 12
|
||||
|
||||
/* Types for code units in patterns and subject strings. */
|
||||
|
||||
|
@ -452,6 +455,8 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
|||
int (*)(pcre2_callout_block *, void *), void *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_heap_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_match_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
|
@ -676,6 +681,7 @@ pcre2_compile are called by application code. */
|
|||
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
|
||||
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
|
||||
#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
|
||||
#define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_)
|
||||
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
|
||||
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
|
||||
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
|
||||
|
|
|
@ -727,6 +727,7 @@ enum { PSO_OPT, /* Value is an option bit */
|
|||
PSO_FLG, /* Value is a flag bit */
|
||||
PSO_NL, /* Value is a newline type */
|
||||
PSO_BSR, /* Value is a \R type */
|
||||
PSO_LIMH, /* Read integer value for heap limit */
|
||||
PSO_LIMM, /* Read integer value for match limit */
|
||||
PSO_LIMD }; /* Read integer value for depth limit */
|
||||
|
||||
|
@ -749,6 +750,7 @@ static pso pso_list[] = {
|
|||
{ (uint8_t *)STRING_NO_DOTSTAR_ANCHOR_RIGHTPAR, 18, PSO_OPT, PCRE2_NO_DOTSTAR_ANCHOR },
|
||||
{ (uint8_t *)STRING_NO_JIT_RIGHTPAR, 7, PSO_FLG, PCRE2_NOJIT },
|
||||
{ (uint8_t *)STRING_NO_START_OPT_RIGHTPAR, 13, PSO_OPT, PCRE2_NO_START_OPTIMIZE },
|
||||
{ (uint8_t *)STRING_LIMIT_HEAP_EQ, 11, PSO_LIMH, 0 },
|
||||
{ (uint8_t *)STRING_LIMIT_MATCH_EQ, 12, PSO_LIMM, 0 },
|
||||
{ (uint8_t *)STRING_LIMIT_DEPTH_EQ, 12, PSO_LIMD, 0 },
|
||||
{ (uint8_t *)STRING_LIMIT_RECURSION_EQ, 16, PSO_LIMD, 0 },
|
||||
|
@ -8853,6 +8855,7 @@ uint32_t firstcu, reqcu; /* Value of first/req code unit */
|
|||
uint32_t setflags = 0; /* NL and BSR set flags */
|
||||
|
||||
uint32_t skipatstart; /* When checking (*UTF) etc */
|
||||
uint32_t limit_heap = UINT32_MAX;
|
||||
uint32_t limit_match = UINT32_MAX; /* Unset match limits */
|
||||
uint32_t limit_depth = UINT32_MAX;
|
||||
|
||||
|
@ -9026,6 +9029,7 @@ while (patlen - skipatstart >= 2 &&
|
|||
|
||||
case PSO_LIMM:
|
||||
case PSO_LIMD:
|
||||
case PSO_LIMH:
|
||||
c = 0;
|
||||
pp = skipatstart;
|
||||
if (!IS_DIGIT(ptr[pp]))
|
||||
|
@ -9045,7 +9049,8 @@ while (patlen - skipatstart >= 2 &&
|
|||
ptr += pp;
|
||||
goto HAD_EARLY_ERROR;
|
||||
}
|
||||
if (p->type == PSO_LIMM) limit_match = c;
|
||||
if (p->type == PSO_LIMH) limit_heap = c;
|
||||
else if (p->type == PSO_LIMM) limit_match = c;
|
||||
else limit_depth = c;
|
||||
skipatstart += pp - skipatstart;
|
||||
break;
|
||||
|
@ -9288,6 +9293,7 @@ re->magic_number = MAGIC_NUMBER;
|
|||
re->compile_options = options;
|
||||
re->overall_options = cb.external_options;
|
||||
re->flags = PCRE2_CODE_UNIT_WIDTH/8 | cb.external_flags | setflags;
|
||||
re->limit_heap = limit_heap;
|
||||
re->limit_match = limit_match;
|
||||
re->limit_depth = limit_depth;
|
||||
re->first_codeunit = 0;
|
||||
|
|
|
@ -84,6 +84,7 @@ if (where == NULL) /* Requests a length */
|
|||
return PCRE2_ERROR_BADOPTION;
|
||||
|
||||
case PCRE2_CONFIG_BSR:
|
||||
case PCRE2_CONFIG_HEAPLIMIT:
|
||||
case PCRE2_CONFIG_JIT:
|
||||
case PCRE2_CONFIG_LINKSIZE:
|
||||
case PCRE2_CONFIG_MATCHLIMIT:
|
||||
|
@ -116,6 +117,10 @@ switch (what)
|
|||
#endif
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_HEAPLIMIT:
|
||||
*((uint32_t *)where) = HEAP_LIMIT;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_JIT:
|
||||
#ifdef SUPPORT_JIT
|
||||
*((uint32_t *)where) = 1;
|
||||
|
|
|
@ -168,6 +168,7 @@ const pcre2_match_context PRIV(default_match_context) = {
|
|||
NULL,
|
||||
NULL,
|
||||
PCRE2_UNSET, /* Offset limit */
|
||||
HEAP_LIMIT,
|
||||
MATCH_LIMIT,
|
||||
MATCH_LIMIT_DEPTH };
|
||||
|
||||
|
@ -346,6 +347,13 @@ mcontext->callout_data = callout_data;
|
|||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_heap_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||
{
|
||||
mcontext->heap_limit = limit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_match_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||
{
|
||||
|
|
|
@ -256,6 +256,7 @@ static const unsigned char match_error_texts[] =
|
|||
"match with end before start is not supported\0"
|
||||
"too many replacements (more than INT_MAX)\0"
|
||||
"bad serialized data\0"
|
||||
"heap limit exceeded\0"
|
||||
;
|
||||
|
||||
|
||||
|
|
|
@ -240,6 +240,16 @@ not rely on this. */
|
|||
|
||||
#define COMPILE_ERROR_BASE 100
|
||||
|
||||
/* The initial frames vector for remembering backtracking points in
|
||||
pcre2_match() is allocated on the system stack, of this size (bytes). The size
|
||||
must be a multiple of sizeof(PCRE2_SPTR) in all environments, so making it a
|
||||
multiple of 8 is best. Typical frame sizes are a few hundred bytes (it depends
|
||||
on the number of capturing parentheses) so 20K handles quite a few frames. A
|
||||
larger vector on the heap is obtained for patterns that need more frames. The
|
||||
maximum size of this can be limited. */
|
||||
|
||||
#define START_FRAMES_SIZE 20480
|
||||
|
||||
/* Define the default BSR convention. */
|
||||
|
||||
#ifdef BSR_ANYCRLF
|
||||
|
@ -922,6 +932,7 @@ a positive value. */
|
|||
#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)"
|
||||
#define STRING_NOTEMPTY_RIGHTPAR "NOTEMPTY)"
|
||||
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR "NOTEMPTY_ATSTART)"
|
||||
#define STRING_LIMIT_HEAP_EQ "LIMIT_HEAP="
|
||||
#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH="
|
||||
#define STRING_LIMIT_DEPTH_EQ "LIMIT_DEPTH="
|
||||
#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION="
|
||||
|
@ -1196,6 +1207,7 @@ only. */
|
|||
#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
|
||||
#define STRING_NOTEMPTY_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_RIGHT_PARENTHESIS
|
||||
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_UNDERSCORE STR_A STR_T STR_S STR_T STR_A STR_R STR_T STR_RIGHT_PARENTHESIS
|
||||
#define STRING_LIMIT_HEAP_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_H STR_E STR_A STR_P STR_EQUALS_SIGN
|
||||
#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
|
||||
#define STRING_LIMIT_DEPTH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_D STR_E STR_P STR_T STR_H STR_EQUALS_SIGN
|
||||
#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
|
||||
|
|
|
@ -585,6 +585,7 @@ typedef struct pcre2_real_match_context {
|
|||
int (*callout)(pcre2_callout_block *, void *);
|
||||
void *callout_data;
|
||||
PCRE2_SIZE offset_limit;
|
||||
uint32_t heap_limit;
|
||||
uint32_t match_limit;
|
||||
uint32_t depth_limit;
|
||||
} pcre2_real_match_context;
|
||||
|
@ -614,6 +615,7 @@ typedef struct pcre2_real_code {
|
|||
uint32_t compile_options; /* Options passed to pcre2_compile() */
|
||||
uint32_t overall_options; /* Options after processing the pattern */
|
||||
uint32_t flags; /* Various state flags */
|
||||
uint32_t limit_heap; /* Limit set in the pattern */
|
||||
uint32_t limit_match; /* Limit set in the pattern */
|
||||
uint32_t limit_depth; /* Limit set in the pattern */
|
||||
uint32_t first_codeunit; /* Starting code unit */
|
||||
|
@ -808,9 +810,10 @@ typedef struct match_block {
|
|||
heapframe *match_frames; /* Points to vector of frames */
|
||||
heapframe *match_frames_top; /* Points after the end of the vector */
|
||||
heapframe *stack_frames; /* The original vector on the stack */
|
||||
uint32_t match_call_count; /* Number of times a new frame is created */
|
||||
PCRE2_SIZE heap_limit; /* As it says */
|
||||
uint32_t match_limit; /* As it says */
|
||||
uint32_t match_limit_depth; /* As it says */
|
||||
uint32_t match_call_count; /* Number of times a new frame is created */
|
||||
BOOL hitend; /* Hit the end of the subject at some point */
|
||||
BOOL hasthen; /* Pattern contains (*THEN) */
|
||||
const uint8_t *lcc; /* Points to lower casing table */
|
||||
|
|
|
@ -64,15 +64,6 @@ information, and fields within it. */
|
|||
|
||||
#define RECURSE_UNSET 0xffffffffu /* Bigger than max group number */
|
||||
|
||||
/* The initial frames vector for remembering backtracking points is allocated
|
||||
on the system stack, of this size (bytes). The size must be a multiple of
|
||||
sizeof(PCRE2_SPTR) in all environments, so making it a multiple of 8 is best.
|
||||
Typical frame sizes are a few hundred bytes (it depends on the number of
|
||||
capturing parentheses) so 10K handles quite a few frames. A larger vector on
|
||||
the heap is obtained for patterns that need more frames. */
|
||||
|
||||
#define START_FRAMES_SIZE 10240
|
||||
|
||||
/* Masks for identifying the public options that are permitted at match time. */
|
||||
|
||||
#define PUBLIC_MATCH_OPTIONS \
|
||||
|
@ -618,14 +609,22 @@ backtracking point. */
|
|||
MATCH_RECURSE:
|
||||
|
||||
/* Set up a new backtracking frame. If the vector is full, get a new one
|
||||
on the heap, doubling the size. */
|
||||
on the heap, doubling the size, but constrained by the heap limit. */
|
||||
|
||||
N = (heapframe *)((char *)F + frame_size);
|
||||
if (N >= mb->match_frames_top)
|
||||
{
|
||||
PCRE2_SIZE newsize = mb->frame_vector_size * 2;
|
||||
heapframe *new = mb->memctl.malloc(newsize, mb->memctl.memory_data);
|
||||
heapframe *new;
|
||||
|
||||
if ((newsize / 1024) > mb->heap_limit)
|
||||
{
|
||||
PCRE2_SIZE maxsize = ((mb->heap_limit * 1024)/frame_size) * frame_size;
|
||||
if (mb->frame_vector_size == maxsize) return PCRE2_ERROR_HEAPLIMIT;
|
||||
newsize = maxsize;
|
||||
}
|
||||
|
||||
new = mb->memctl.malloc(newsize, mb->memctl.memory_data);
|
||||
if (new == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
memcpy(new, mb->match_frames, mb->frame_vector_size);
|
||||
|
||||
|
@ -6266,9 +6265,22 @@ correct when calling match() more than once for non-anchored patterns. */
|
|||
|
||||
frame_size = sizeof(heapframe) + ((re->top_bracket - 1) * 2 * sizeof(PCRE2_SIZE));
|
||||
|
||||
/* Limits set in the pattern override the match context only if they are
|
||||
smaller. */
|
||||
|
||||
mb->heap_limit = (mcontext->heap_limit < re->limit_heap)?
|
||||
mcontext->heap_limit : re->limit_heap;
|
||||
|
||||
mb->match_limit = (mcontext->match_limit < re->limit_match)?
|
||||
mcontext->match_limit : re->limit_match;
|
||||
|
||||
mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)?
|
||||
mcontext->depth_limit : re->limit_depth;
|
||||
|
||||
/* If a pattern has very many capturing parentheses, the frame size may be very
|
||||
large. Ensure that there are at least 10 available frames by getting an initial
|
||||
vector on the heap if necessary. */
|
||||
vector on the heap if necessary, except when the heap limit prevents this. Get
|
||||
fewer if possible. (The heap limit is in kilobytes.) */
|
||||
|
||||
if (frame_size <= START_FRAMES_SIZE/10)
|
||||
{
|
||||
|
@ -6278,6 +6290,11 @@ if (frame_size <= START_FRAMES_SIZE/10)
|
|||
else
|
||||
{
|
||||
mb->frame_vector_size = frame_size * 10;
|
||||
if ((mb->frame_vector_size / 1024) > mb->heap_limit)
|
||||
{
|
||||
if (frame_size > mb->heap_limit * 1024) return PCRE2_ERROR_HEAPLIMIT;
|
||||
mb->frame_vector_size = ((mb->heap_limit * 1024)/frame_size) * frame_size;
|
||||
}
|
||||
mb->match_frames = mb->memctl.malloc(mb->frame_vector_size,
|
||||
mb->memctl.memory_data);
|
||||
if (mb->match_frames == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
|
@ -6292,14 +6309,6 @@ to avoid uninitialized memory read errors when it is copied to a new frame. */
|
|||
memset((char *)(mb->match_frames) + offsetof(heapframe, ovector), 0xff,
|
||||
re->top_bracket * 2 * sizeof(PCRE2_SIZE));
|
||||
|
||||
/* Limits set in the pattern override the match context only if they are
|
||||
smaller. */
|
||||
|
||||
mb->match_limit = (mcontext->match_limit < re->limit_match)?
|
||||
mcontext->match_limit : re->limit_match;
|
||||
mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)?
|
||||
mcontext->depth_limit : re->limit_depth;
|
||||
|
||||
/* Pointers to the individual character tables */
|
||||
|
||||
mb->lcc = re->tables + lcc_offset;
|
||||
|
|
|
@ -80,6 +80,7 @@ if (where == NULL) /* Requests field length */
|
|||
case PCRE2_INFO_FIRSTCODEUNIT:
|
||||
case PCRE2_INFO_HASBACKSLASHC:
|
||||
case PCRE2_INFO_HASCRORLF:
|
||||
case PCRE2_INFO_HEAPLIMIT:
|
||||
case PCRE2_INFO_JCHANGED:
|
||||
case PCRE2_INFO_LASTCODETYPE:
|
||||
case PCRE2_INFO_LASTCODEUNIT:
|
||||
|
@ -171,6 +172,11 @@ switch(what)
|
|||
*((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_HEAPLIMIT:
|
||||
*((uint32_t *)where) = re->limit_heap;
|
||||
if (re->limit_heap == UINT32_MAX) return PCRE2_ERROR_UNSET;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_JCHANGED:
|
||||
*((uint32_t *)where) = (re->flags & PCRE2_JCHANGED) != 0;
|
||||
break;
|
||||
|
|
|
@ -212,6 +212,7 @@ static const uint8_t *character_tables = NULL;
|
|||
|
||||
static uint32_t pcre2_options = 0;
|
||||
static uint32_t process_options = 0;
|
||||
static PCRE2_SIZE heap_limit = PCRE2_UNSET;
|
||||
static uint32_t match_limit = 0;
|
||||
static uint32_t depth_limit = 0;
|
||||
|
||||
|
@ -330,7 +331,7 @@ static const char *incexname[4] = { "--include", "--exclude",
|
|||
|
||||
/* Structure for options and list of them */
|
||||
|
||||
enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER,
|
||||
enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER, OP_SIZE,
|
||||
OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
|
||||
|
||||
typedef struct option_item {
|
||||
|
@ -356,16 +357,17 @@ used to identify them. */
|
|||
#define N_LOFFSETS (-10)
|
||||
#define N_FOFFSETS (-11)
|
||||
#define N_LBUFFER (-12)
|
||||
#define N_M_LIMIT (-13)
|
||||
#define N_M_LIMIT_DEP (-14)
|
||||
#define N_BUFSIZE (-15)
|
||||
#define N_NOJIT (-16)
|
||||
#define N_FILE_LIST (-17)
|
||||
#define N_BINARY_FILES (-18)
|
||||
#define N_EXCLUDE_FROM (-19)
|
||||
#define N_INCLUDE_FROM (-20)
|
||||
#define N_OM_SEPARATOR (-21)
|
||||
#define N_MAX_BUFSIZE (-22)
|
||||
#define N_H_LIMIT (-13)
|
||||
#define N_M_LIMIT (-14)
|
||||
#define N_M_LIMIT_DEP (-15)
|
||||
#define N_BUFSIZE (-16)
|
||||
#define N_NOJIT (-17)
|
||||
#define N_FILE_LIST (-18)
|
||||
#define N_BINARY_FILES (-19)
|
||||
#define N_EXCLUDE_FROM (-20)
|
||||
#define N_INCLUDE_FROM (-21)
|
||||
#define N_OM_SEPARATOR (-22)
|
||||
#define N_MAX_BUFSIZE (-23)
|
||||
|
||||
static option_item optionlist[] = {
|
||||
{ OP_NODATA, N_NULL, NULL, "", "terminate options" },
|
||||
|
@ -397,6 +399,7 @@ static option_item optionlist[] = {
|
|||
{ OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
|
||||
{ OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
|
||||
{ OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
|
||||
{ OP_SIZE, N_H_LIMIT, &heap_limit, "heap-limit=number", "set PCRE2 heap limit option (kilobytes)" },
|
||||
{ OP_U32NUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE2 match limit option" },
|
||||
{ OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" },
|
||||
{ OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsolete synonym for depth-limit" },
|
||||
|
@ -525,9 +528,9 @@ pcre2grep_exit(int rc)
|
|||
{
|
||||
if (resource_error)
|
||||
{
|
||||
fprintf(stderr, "pcre2grep: Error %d, %d or %d means that a resource limit "
|
||||
"was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
|
||||
PCRE2_ERROR_DEPTHLIMIT);
|
||||
fprintf(stderr, "pcre2grep: Error %d, %d, %d or %d means that a resource "
|
||||
"limit was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
|
||||
PCRE2_ERROR_DEPTHLIMIT, PCRE2_ERROR_HEAPLIMIT);
|
||||
fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n");
|
||||
}
|
||||
exit(rc);
|
||||
|
@ -1647,7 +1650,7 @@ for (i = 1; p != NULL; p = p->next, i++)
|
|||
FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
|
||||
fprintf(stderr, "\n\n");
|
||||
if (*mrc == PCRE2_ERROR_MATCHLIMIT || *mrc == PCRE2_ERROR_DEPTHLIMIT ||
|
||||
*mrc == PCRE2_ERROR_JIT_STACKLIMIT)
|
||||
*mrc == PCRE2_ERROR_HEAPLIMIT || *mrc == PCRE2_ERROR_JIT_STACKLIMIT)
|
||||
resource_error = TRUE;
|
||||
if (error_count++ > 20)
|
||||
{
|
||||
|
@ -3796,7 +3799,7 @@ for (i = 1; i < argc; i++)
|
|||
/* Otherwise, deal with a single string or numeric data value. */
|
||||
|
||||
else if (op->type != OP_NUMBER && op->type != OP_U32NUMBER &&
|
||||
op->type != OP_OP_NUMBER)
|
||||
op->type != OP_OP_NUMBER && op->type != OP_SIZE)
|
||||
{
|
||||
*((char **)op->dataptr) = option_data;
|
||||
}
|
||||
|
@ -3804,6 +3807,7 @@ for (i = 1; i < argc; i++)
|
|||
{
|
||||
unsigned long int n = decode_number(option_data, op, longop);
|
||||
if (op->type == OP_U32NUMBER) *((uint32_t *)op->dataptr) = n;
|
||||
else if (op->type == OP_SIZE) *((PCRE2_SIZE *)op->dataptr) = n;
|
||||
else *((int *)op->dataptr) = n;
|
||||
}
|
||||
}
|
||||
|
@ -3839,6 +3843,7 @@ if (output_text != NULL &&
|
|||
|
||||
/* Put limits into the match data block. */
|
||||
|
||||
if (heap_limit != PCRE2_UNSET) pcre2_set_heap_limit(match_context, heap_limit);
|
||||
if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
|
||||
if (depth_limit > 0) pcre2_set_depth_limit(match_context, depth_limit);
|
||||
|
||||
|
|
|
@ -588,6 +588,7 @@ static modstruct modlist[] = {
|
|||
{ "get", MOD_DAT, MOD_NN, DO(get_numbers), DO(get_names) },
|
||||
{ "getall", MOD_DAT, MOD_CTL, CTL_GETALL, DO(control) },
|
||||
{ "global", MOD_PNDP, MOD_CTL, CTL_GLOBAL, PO(control) },
|
||||
{ "heap_limit", MOD_CTM, MOD_INT, 0, MO(heap_limit) },
|
||||
{ "hex", MOD_PAT, MOD_CTL, CTL_HEXPAT, PO(control) },
|
||||
{ "info", MOD_PAT, MOD_CTL, CTL_INFO, PO(control) },
|
||||
{ "jit", MOD_PAT, MOD_IND, 7, PO(jit) },
|
||||
|
@ -1207,6 +1208,14 @@ are supported. */
|
|||
else \
|
||||
pcre2_set_depth_limit_32(G(a,32),b)
|
||||
|
||||
#define PCRE2_SET_HEAP_LIMIT(a,b) \
|
||||
if (test_mode == PCRE8_MODE) \
|
||||
pcre2_set_heap_limit_8(G(a,8),b); \
|
||||
else if (test_mode == PCRE16_MODE) \
|
||||
pcre2_set_heap_limit_16(G(a,16),b); \
|
||||
else \
|
||||
pcre2_set_heap_limit_32(G(a,32),b)
|
||||
|
||||
#define PCRE2_SET_MATCH_LIMIT(a,b) \
|
||||
if (test_mode == PCRE8_MODE) \
|
||||
pcre2_set_match_limit_8(G(a,8),b); \
|
||||
|
@ -1643,6 +1652,12 @@ the three different cases. */
|
|||
else \
|
||||
G(pcre2_set_depth_limit_,BITTWO)(G(a,BITTWO),b)
|
||||
|
||||
#define PCRE2_SET_HEAP_LIMIT(a,b) \
|
||||
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
||||
G(pcre2_set_heap_limit_,BITONE)(G(a,BITONE),b); \
|
||||
else \
|
||||
G(pcre2_set_heap_limit_,BITTWO)(G(a,BITTWO),b)
|
||||
|
||||
#define PCRE2_SET_MATCH_LIMIT(a,b) \
|
||||
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
||||
G(pcre2_set_match_limit_,BITONE)(G(a,BITONE),b); \
|
||||
|
@ -1856,6 +1871,7 @@ the three different cases. */
|
|||
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
|
||||
pcre2_set_compile_recursion_guard_8(G(a,8),b,c)
|
||||
#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_8(G(a,8),b)
|
||||
#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_8(G(a,8),b)
|
||||
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
|
||||
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b)
|
||||
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b)
|
||||
|
@ -1952,6 +1968,7 @@ the three different cases. */
|
|||
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
|
||||
pcre2_set_compile_recursion_guard_16(G(a,16),b,c)
|
||||
#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_16(G(a,16),b)
|
||||
#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_16(G(a,16),b)
|
||||
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
|
||||
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_16(G(a,16),b)
|
||||
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b)
|
||||
|
@ -2048,6 +2065,7 @@ the three different cases. */
|
|||
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
|
||||
pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
|
||||
#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_32(G(a,32),b)
|
||||
#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_32(G(a,32),b)
|
||||
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
|
||||
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_32(G(a,32),b)
|
||||
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b)
|
||||
|
@ -4040,14 +4058,28 @@ if ((pat_patctl.control & CTL_INFO) != 0)
|
|||
{
|
||||
void *nametable;
|
||||
uint8_t *start_bits;
|
||||
BOOL match_limit_set, depth_limit_set;
|
||||
BOOL heap_limit_set, match_limit_set, depth_limit_set;
|
||||
uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit,
|
||||
hasbackslashc, hascrorlf, jchanged, last_ctype, last_cunit, match_empty,
|
||||
match_limit, minlength, nameentrysize, namecount, newline_convention,
|
||||
depth_limit;
|
||||
depth_limit, heap_limit, match_limit, minlength, nameentrysize, namecount,
|
||||
newline_convention;
|
||||
|
||||
/* These info requests may return PCRE2_ERROR_UNSET. */
|
||||
|
||||
switch(pattern_info(PCRE2_INFO_HEAPLIMIT, &heap_limit, TRUE))
|
||||
{
|
||||
case 0:
|
||||
heap_limit_set = TRUE;
|
||||
break;
|
||||
|
||||
case PCRE2_ERROR_UNSET:
|
||||
heap_limit_set = FALSE;
|
||||
break;
|
||||
|
||||
default:
|
||||
return PR_ABEND;
|
||||
}
|
||||
|
||||
switch(pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit, TRUE))
|
||||
{
|
||||
case 0:
|
||||
|
@ -4106,6 +4138,9 @@ if ((pat_patctl.control & CTL_INFO) != 0)
|
|||
if (maxlookbehind > 0)
|
||||
fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
|
||||
|
||||
if (heap_limit_set)
|
||||
fprintf(outfile, "Heap limit = %u\n", heap_limit);
|
||||
|
||||
if (match_limit_set)
|
||||
fprintf(outfile, "Match limit = %u\n", match_limit);
|
||||
|
||||
|
@ -5353,10 +5388,15 @@ uint32_t max = UINT32_MAX;
|
|||
|
||||
PCRE2_SET_MATCH_LIMIT(dat_context, max);
|
||||
PCRE2_SET_DEPTH_LIMIT(dat_context, max);
|
||||
PCRE2_SET_HEAP_LIMIT(dat_context, max);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (errnumber == PCRE2_ERROR_MATCHLIMIT)
|
||||
if (errnumber == PCRE2_ERROR_HEAPLIMIT)
|
||||
{
|
||||
PCRE2_SET_HEAP_LIMIT(dat_context, mid);
|
||||
}
|
||||
else if (errnumber == PCRE2_ERROR_MATCHLIMIT)
|
||||
{
|
||||
PCRE2_SET_MATCH_LIMIT(dat_context, mid);
|
||||
}
|
||||
|
@ -5393,13 +5433,23 @@ for (;;)
|
|||
capcount == PCRE2_ERROR_NOMATCH ||
|
||||
capcount == PCRE2_ERROR_PARTIAL)
|
||||
{
|
||||
/* If we've not hit the error with a heap limit less than the size of the
|
||||
initial stack frame vector, the heap is not being used, so the minimum
|
||||
limit is zero; there's no need to go on. The other limits are always
|
||||
greater than zero. */
|
||||
|
||||
if (errnumber == PCRE2_ERROR_HEAPLIMIT && mid < START_FRAMES_SIZE/1024)
|
||||
{
|
||||
fprintf(outfile, "Minimum %s limit = 0\n", msg);
|
||||
break;
|
||||
}
|
||||
if (mid == min + 1)
|
||||
{
|
||||
fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
|
||||
break;
|
||||
}
|
||||
max = mid;
|
||||
mid = (min + mid)/2;
|
||||
mid = (min + max)/2;
|
||||
}
|
||||
else break; /* Some other error */
|
||||
}
|
||||
|
@ -6662,21 +6712,33 @@ else for (gmatched = 0;; gmatched++)
|
|||
(double)CLOCKS_PER_SEC);
|
||||
}
|
||||
|
||||
/* Find the match and depth limits if requested. The match limit is not
|
||||
relevant for DFA matching and the depth limit is not relevant for JIT. */
|
||||
/* Find the heap, match and depth limits if requested. The match and heap
|
||||
limits are not relevant for DFA matching and the depth limit is not relevant
|
||||
for JIT. */
|
||||
|
||||
if ((dat_datctl.control & CTL_FINDLIMITS) != 0)
|
||||
{
|
||||
if ((dat_datctl.control & CTL_DFA) == 0)
|
||||
{
|
||||
if (FLD(compiled_code, executable_jit) == NULL ||
|
||||
(dat_datctl.options & PCRE2_NO_JIT) != 0)
|
||||
{
|
||||
capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_HEAPLIMIT,
|
||||
"heap");
|
||||
}
|
||||
capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_MATCHLIMIT,
|
||||
"match");
|
||||
}
|
||||
else capcount = 0;
|
||||
|
||||
if (FLD(compiled_code, executable_jit) == NULL ||
|
||||
(dat_datctl.options & PCRE2_NO_JIT) != 0 ||
|
||||
(dat_datctl.control & CTL_DFA) != 0)
|
||||
{
|
||||
capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_DEPTHLIMIT,
|
||||
"depth");
|
||||
}
|
||||
}
|
||||
|
||||
/* Otherwise just run a single match, setting up a callout if required (the
|
||||
default). There is a copy of the pattern in pbuffer8 for use by callouts. */
|
||||
|
@ -7402,6 +7464,8 @@ printf(" \\C is supported\n");
|
|||
printf(" Internal link size = %d\n", optval);
|
||||
(void)PCRE2_CONFIG(PCRE2_CONFIG_PARENSLIMIT, &optval);
|
||||
printf(" Parentheses nest limit = %d\n", optval);
|
||||
(void)PCRE2_CONFIG(PCRE2_CONFIG_HEAPLIMIT, &optval);
|
||||
printf(" Default heap limit = %d\n", optval);
|
||||
(void)PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, &optval);
|
||||
printf(" Default match limit = %d\n", optval);
|
||||
(void)PCRE2_CONFIG(PCRE2_CONFIG_DEPTHLIMIT, &optval);
|
||||
|
|
|
@ -12,11 +12,13 @@ Starting code units: a z
|
|||
Last code unit = 'z'
|
||||
Subject length lower bound = 2
|
||||
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits
|
||||
Minimum heap limit = 0
|
||||
Minimum match limit = 7
|
||||
Minimum depth limit = 7
|
||||
0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazz
|
||||
1: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
|
||||
aaaaaaaaaaaaaz\=find_limits
|
||||
Minimum heap limit = 0
|
||||
Minimum match limit = 20481
|
||||
Minimum depth limit = 30
|
||||
No match
|
||||
|
@ -26,6 +28,7 @@ Capturing subpattern count = 1
|
|||
May match empty string
|
||||
Subject length lower bound = 0
|
||||
/* this is a C style comment */\=find_limits
|
||||
Minimum heap limit = 0
|
||||
Minimum match limit = 64
|
||||
Minimum depth limit = 7
|
||||
0: /* this is a C style comment */
|
||||
|
@ -33,21 +36,25 @@ Minimum depth limit = 7
|
|||
|
||||
/^(?>a)++/
|
||||
aa\=find_limits
|
||||
Minimum heap limit = 0
|
||||
Minimum match limit = 5
|
||||
Minimum depth limit = 3
|
||||
0: aa
|
||||
aaaaaaaaa\=find_limits
|
||||
Minimum heap limit = 0
|
||||
Minimum match limit = 12
|
||||
Minimum depth limit = 3
|
||||
0: aaaaaaaaa
|
||||
|
||||
/(a)(?1)++/
|
||||
aa\=find_limits
|
||||
Minimum heap limit = 0
|
||||
Minimum match limit = 7
|
||||
Minimum depth limit = 5
|
||||
0: aa
|
||||
1: a
|
||||
aaaaaaaaa\=find_limits
|
||||
Minimum heap limit = 0
|
||||
Minimum match limit = 21
|
||||
Minimum depth limit = 5
|
||||
0: aaaaaaaaa
|
||||
|
@ -55,30 +62,35 @@ Minimum depth limit = 5
|
|||
|
||||
/a(?:.)*?a/ims
|
||||
abbbbbbbbbbbbbbbbbbbbba\=find_limits
|
||||
Minimum heap limit = 0
|
||||
Minimum match limit = 24
|
||||
Minimum depth limit = 3
|
||||
0: abbbbbbbbbbbbbbbbbbbbba
|
||||
|
||||
/a(?:.(*THEN))*?a/ims
|
||||
abbbbbbbbbbbbbbbbbbbbba\=find_limits
|
||||
Minimum heap limit = 0
|
||||
Minimum match limit = 66
|
||||
Minimum depth limit = 45
|
||||
0: abbbbbbbbbbbbbbbbbbbbba
|
||||
|
||||
/a(?:.(*THEN:ABC))*?a/ims
|
||||
abbbbbbbbbbbbbbbbbbbbba\=find_limits
|
||||
Minimum heap limit = 0
|
||||
Minimum match limit = 66
|
||||
Minimum depth limit = 45
|
||||
0: abbbbbbbbbbbbbbbbbbbbba
|
||||
|
||||
/^(?>a+)(?>b+)(?>c+)(?>d+)(?>e+)/
|
||||
aabbccddee\=find_limits
|
||||
Minimum heap limit = 0
|
||||
Minimum match limit = 7
|
||||
Minimum depth limit = 7
|
||||
0: aabbccddee
|
||||
|
||||
/^(?>(a+))(?>(b+))(?>(c+))(?>(d+))(?>(e+))/
|
||||
aabbccddee\=find_limits
|
||||
Minimum heap limit = 0
|
||||
Minimum match limit = 12
|
||||
Minimum depth limit = 12
|
||||
0: aabbccddee
|
||||
|
@ -90,6 +102,7 @@ Minimum depth limit = 12
|
|||
|
||||
/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/
|
||||
aabbccddee\=find_limits
|
||||
Minimum heap limit = 0
|
||||
Minimum match limit = 10
|
||||
Minimum depth limit = 10
|
||||
0: aabbccddee
|
||||
|
|
|
@ -15609,7 +15609,7 @@ Last code unit = 'c'
|
|||
Subject length lower bound = 4
|
||||
|
||||
# End of testinput2
|
||||
Error -63: PCRE2_ERROR_BADDATA (unknown error number)
|
||||
Error -64: PCRE2_ERROR_BADDATA (unknown error number)
|
||||
Error -62: bad serialized data
|
||||
Error -2: partial match
|
||||
Error -1: no match
|
||||
|
|
Loading…
Reference in New Issue