Add explicit heap limiting options to pcre2_match(), with associated features
for listing, configuring, etc.
This commit is contained in:
parent
f0126dc7ae
commit
14989bd454
|
@ -78,6 +78,7 @@
|
|||
# fix by David Gaussmann
|
||||
# 2016-10-07 PH added PCREGREP_MAX_BUFSIZE
|
||||
# 2017-03-11 PH turned HEAP_MATCH_RECURSE into a NO-OP for 10.30
|
||||
# 2017-04-08 PH added HEAP_LIMIT
|
||||
|
||||
PROJECT(PCRE2 C)
|
||||
|
||||
|
@ -143,6 +144,9 @@ SET(PCRE2_LINK_SIZE "2" CACHE STRING
|
|||
SET(PCRE2_PARENS_NEST_LIMIT "250" CACHE STRING
|
||||
"Default nested parentheses limit. See PARENS_NEST_LIMIT in config.h.in for details.")
|
||||
|
||||
SET(PCRE2_HEAP_LIMIT "20000000" CACHE STRING
|
||||
"Default limit on heap memory (kilobytes). See HEAP_LIMIT in config.h.in for details.")
|
||||
|
||||
SET(PCRE2_MATCH_LIMIT "10000000" CACHE STRING
|
||||
"Default limit on internal looping. See MATCH_LIMIT in config.h.in for details.")
|
||||
|
||||
|
@ -765,6 +769,7 @@ IF(PCRE2_SHOW_REPORT)
|
|||
MESSAGE(STATUS " Rebuild char tables ............. : ${PCRE2_REBUILD_CHARTABLES}")
|
||||
MESSAGE(STATUS " Internal link size .............. : ${PCRE2_LINK_SIZE}")
|
||||
MESSAGE(STATUS " Parentheses nest limit .......... : ${PCRE2_PARENS_NEST_LIMIT}")
|
||||
MESSAGE(STATUS " Heap limit ...................... : ${PCRE2_HEAP_LIMIT}")
|
||||
MESSAGE(STATUS " Match limit ..................... : ${PCRE2_MATCH_LIMIT}")
|
||||
MESSAGE(STATUS " Match depth limit ............... : ${PCRE2_MATCH_LIMIT_DEPTH}")
|
||||
MESSAGE(STATUS " Build shared libs ............... : ${BUILD_SHARED_LIBS}")
|
||||
|
|
|
@ -121,6 +121,11 @@ single-branch conditions with a false condition (e.g. DEFINE) at the start of a
|
|||
branch. For example, /(?(DEFINE)...)^A/ and /(...){0}^B/ are now flagged as
|
||||
anchored.
|
||||
|
||||
22. Added an explicit limit on the amount of heap used by pcre2_match(), set by
|
||||
pcre2_set_heap_limit() or (*LIMIT_HEAP=xxx). Upgraded pcre2test to show the
|
||||
heap limit along with other pattern information, and to find the minimum when
|
||||
the find_limits modifier is set.
|
||||
|
||||
|
||||
Version 10.23 14-February-2017
|
||||
------------------------------
|
||||
|
|
|
@ -69,6 +69,7 @@ dist_html_DATA = \
|
|||
doc/html/pcre2_set_character_tables.html \
|
||||
doc/html/pcre2_set_compile_recursion_guard.html \
|
||||
doc/html/pcre2_set_depth_limit.html \
|
||||
doc/html/pcre2_set_heap_limit.html \
|
||||
doc/html/pcre2_set_match_limit.html \
|
||||
doc/html/pcre2_set_max_pattern_length.html \
|
||||
doc/html/pcre2_set_offset_limit.html \
|
||||
|
@ -152,6 +153,7 @@ dist_man_MANS = \
|
|||
doc/pcre2_set_character_tables.3 \
|
||||
doc/pcre2_set_compile_recursion_guard.3 \
|
||||
doc/pcre2_set_depth_limit.3 \
|
||||
doc/pcre2_set_heap_limit.3 \
|
||||
doc/pcre2_set_match_limit.3 \
|
||||
doc/pcre2_set_max_pattern_length.3 \
|
||||
doc/pcre2_set_offset_limit.3 \
|
||||
|
|
23
README
23
README
|
@ -223,10 +223,10 @@ library. They are also documented in the pcre2build man page.
|
|||
|
||||
--with-parens-nest-limit=500
|
||||
|
||||
. PCRE2 has a counter that can be set to limit the amount of resources it uses
|
||||
when matching a pattern. If the limit is exceeded during a match, the match
|
||||
fails. The default is ten million. You can change the default by setting, for
|
||||
example,
|
||||
. PCRE2 has a counter that can be set to limit the amount of computing resource
|
||||
it uses when matching a pattern with the Perl-compatible matching function.
|
||||
If the limit is exceeded during a match, the match fails. The default is ten
|
||||
million. You can change the default by setting, for example,
|
||||
|
||||
--with-match-limit=500000
|
||||
|
||||
|
@ -235,14 +235,23 @@ library. They are also documented in the pcre2build man page.
|
|||
pcre2api man page (search for pcre2_set_match_limit).
|
||||
|
||||
. There is a separate counter that limits the depth of nested backtracking
|
||||
during a matching process, which in turn limits the amount of memory that is
|
||||
used. This also has a default of ten million, which is essentially
|
||||
during a matching process, which indirectly limits the amount of heap memory
|
||||
that is used. This also has a default of ten million, which is essentially
|
||||
"unlimited". You can change the default by setting, for example,
|
||||
|
||||
--with-match-limit-depth=5000
|
||||
|
||||
There is more discussion in the pcre2api man page (search for
|
||||
pcre2_set_depth_limit).
|
||||
|
||||
. You can also set an explicit limit on the amount of heap memory used by
|
||||
the pcre2_match() interpreter:
|
||||
|
||||
--with-heap-limit=500
|
||||
|
||||
The units are kilobytes. This limit does not apply when the JIT optimization
|
||||
(which has its own memory control features) is used. There is more discussion
|
||||
on the pcre2api man page (search for pcre2_set_heap_limit).
|
||||
|
||||
. In the 8-bit library, the default maximum compiled pattern size is around
|
||||
64K bytes. You can increase this by adding --with-link-size=3 to the
|
||||
|
@ -865,4 +874,4 @@ The distribution should contain the files listed below.
|
|||
Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
Last updated: 17 March 2017
|
||||
Last updated: 11 April 2017
|
||||
|
|
2
RunTest
2
RunTest
|
@ -489,7 +489,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
|||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput2 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -63,-62,-2,-1,0,100,188,189,190,191 >>testtry
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -64,-62,-2,-1,0,100,188,189,190,191 >>testtry
|
||||
checkresult $? 2 "$opt"
|
||||
fi
|
||||
done
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
#cmakedefine NEVER_BACKSLASH_C 1
|
||||
|
||||
#define LINK_SIZE @PCRE2_LINK_SIZE@
|
||||
#define HEAP_LIMIT @PCRE2_HEAP_LIMIT@
|
||||
#define MATCH_LIMIT @PCRE2_MATCH_LIMIT@
|
||||
#define MATCH_LIMIT_DEPTH @PCRE2_MATCH_LIMIT_DEPTH@
|
||||
#define NEWLINE_DEFAULT @NEWLINE_DEFAULT@
|
||||
|
|
27
configure.ac
27
configure.ac
|
@ -263,6 +263,12 @@ AC_ARG_WITH(parens-nest-limit,
|
|||
[nested parentheses limit (default=250)]),
|
||||
, with_parens_nest_limit=250)
|
||||
|
||||
# Handle --with-heap-limit
|
||||
AC_ARG_WITH(heap-limit,
|
||||
AS_HELP_STRING([--with-heap-limit=N],
|
||||
[default limit on heap memory (kilobytes, default=20000000)]),
|
||||
, with_heap_limit=20000000)
|
||||
|
||||
# Handle --with-match-limit=N
|
||||
AC_ARG_WITH(match-limit,
|
||||
AS_HELP_STRING([--with-match-limit=N],
|
||||
|
@ -285,7 +291,7 @@ AC_ARG_WITH(match-limit-depth,
|
|||
|
||||
AC_ARG_WITH(match-limit-recursion,,
|
||||
, with_match_limit_recursion=UNSET)
|
||||
|
||||
|
||||
# Handle --enable-valgrind
|
||||
AC_ARG_ENABLE(valgrind,
|
||||
AS_HELP_STRING([--enable-valgrind],
|
||||
|
@ -680,12 +686,12 @@ AC_DEFINE_UNQUOTED([PARENS_NEST_LIMIT], [$with_parens_nest_limit], [
|
|||
stack that is used while compiling a pattern.])
|
||||
|
||||
AC_DEFINE_UNQUOTED([MATCH_LIMIT], [$with_match_limit], [
|
||||
The value of MATCH_LIMIT determines the default number of times the internal
|
||||
match() function can record a backtrack position during a single matching
|
||||
attempt. There is a runtime interface for setting a different limit. The
|
||||
limit exists in order to catch runaway regular expressions that take for ever
|
||||
to determine that they do not match. The default is set very large so that it
|
||||
does not accidentally catch legitimate cases.])
|
||||
The value of MATCH_LIMIT determines the default number of times the
|
||||
pcre2_match() function can record a backtrack position during a single
|
||||
matching attempt. There is a runtime interface for setting a different limit.
|
||||
The limit exists in order to catch runaway regular expressions that take for
|
||||
ever to determine that they do not match. The default is set very large so
|
||||
that it does not accidentally catch legitimate cases.])
|
||||
|
||||
# --with-match-limit-recursion is an obsolete synonym for --with-match-limit-depth
|
||||
|
||||
|
@ -694,7 +700,7 @@ cat <<EOF
|
|||
|
||||
WARNING: --with-match-limit-recursion is an obsolete option. Please use
|
||||
--with-match-limit-depth in future. If both are set, --with-match-limit-depth
|
||||
will be used.
|
||||
will be used. See also --with-heap-limit.
|
||||
|
||||
EOF
|
||||
if test "$with_match_limit_depth" = "MATCH_LIMIT"; then
|
||||
|
@ -711,6 +717,10 @@ AC_DEFINE_UNQUOTED([MATCH_LIMIT_DEPTH], [$with_match_limit_depth], [
|
|||
be less than the value of MATCH_LIMIT. The default is to use the same value
|
||||
as MATCH_LIMIT. There is a runtime method for setting a different limit.])
|
||||
|
||||
AC_DEFINE_UNQUOTED([HEAP_LIMIT], [$with_heap_limit], [
|
||||
This limits the amount of memory that pcre2_match() may use while matching
|
||||
a pattern. The value is in kilobytes.])
|
||||
|
||||
AC_DEFINE([MAX_NAME_SIZE], [32], [
|
||||
This limit is parameterized just in case anybody ever wants to
|
||||
change it. Care must be taken if it is increased, because it guards
|
||||
|
@ -971,6 +981,7 @@ $PACKAGE-$VERSION configuration summary:
|
|||
Rebuild char tables ................ : ${enable_rebuild_chartables}
|
||||
Internal link size ................. : ${with_link_size}
|
||||
Nested parentheses limit ........... : ${with_parens_nest_limit}
|
||||
Heap limit ......................... : ${with_heap_limit} kilobytes
|
||||
Match limit ........................ : ${with_match_limit}
|
||||
Match depth limit .................. : ${with_match_limit_depth}
|
||||
Build shared libs .................. : ${enable_shared}
|
||||
|
|
|
@ -223,10 +223,10 @@ library. They are also documented in the pcre2build man page.
|
|||
|
||||
--with-parens-nest-limit=500
|
||||
|
||||
. PCRE2 has a counter that can be set to limit the amount of resources it uses
|
||||
when matching a pattern. If the limit is exceeded during a match, the match
|
||||
fails. The default is ten million. You can change the default by setting, for
|
||||
example,
|
||||
. PCRE2 has a counter that can be set to limit the amount of computing resource
|
||||
it uses when matching a pattern with the Perl-compatible matching function.
|
||||
If the limit is exceeded during a match, the match fails. The default is ten
|
||||
million. You can change the default by setting, for example,
|
||||
|
||||
--with-match-limit=500000
|
||||
|
||||
|
@ -235,14 +235,23 @@ library. They are also documented in the pcre2build man page.
|
|||
pcre2api man page (search for pcre2_set_match_limit).
|
||||
|
||||
. There is a separate counter that limits the depth of nested backtracking
|
||||
during a matching process, which in turn limits the amount of memory that is
|
||||
used. This also has a default of ten million, which is essentially
|
||||
during a matching process, which indirectly limits the amount of heap memory
|
||||
that is used. This also has a default of ten million, which is essentially
|
||||
"unlimited". You can change the default by setting, for example,
|
||||
|
||||
--with-match-limit-depth=5000
|
||||
|
||||
There is more discussion in the pcre2api man page (search for
|
||||
pcre2_set_depth_limit).
|
||||
|
||||
. You can also set an explicit limit on the amount of heap memory used by
|
||||
the pcre2_match() interpreter:
|
||||
|
||||
--with-heap-limit=500
|
||||
|
||||
The units are kilobytes. This limit does not apply when the JIT optimization
|
||||
(which has its own memory control features) is used. There is more discussion
|
||||
on the pcre2api man page (search for pcre2_set_heap_limit).
|
||||
|
||||
. In the 8-bit library, the default maximum compiled pattern size is around
|
||||
64K bytes. You can increase this by adding --with-link-size=3 to the
|
||||
|
@ -865,4 +874,4 @@ The distribution should contain the files listed below.
|
|||
Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
Last updated: 17 March 2017
|
||||
Last updated: 11 April 2017
|
||||
|
|
|
@ -213,6 +213,9 @@ in the library.
|
|||
<tr><td><a href="pcre2_set_depth_limit.html">pcre2_set_depth_limit</a></td>
|
||||
<td> Set the match backtracking depth limit</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_heap_limit.html">pcre2_set_heap_limit</a></td>
|
||||
<td> Set the match backtracking heap limit</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_match_limit.html">pcre2_set_match_limit</a></td>
|
||||
<td> Set the match limit</td></tr>
|
||||
|
||||
|
|
|
@ -45,6 +45,7 @@ point to a uint32_t integer variable. The available codes are:
|
|||
PCRE2_CONFIG_BSR Indicates what \R matches by default:
|
||||
PCRE2_BSR_UNICODE
|
||||
PCRE2_BSR_ANYCRLF
|
||||
PCRE2_CONFIG_HEAPLIMIT Default heap memory limit
|
||||
PCRE2_CONFIG_DEPTHLIMIT Default backtracking depth limit
|
||||
PCRE2_CONFIG_JIT Availability of just-in-time compiler support (1=yes 0=no)
|
||||
PCRE2_CONFIG_JITTARGET Information (a string) about the target architecture for the JIT compiler
|
||||
|
|
|
@ -44,6 +44,7 @@ A match context is needed only if you want to:
|
|||
<pre>
|
||||
Set up a callout function
|
||||
Set a matching offset limit
|
||||
Change the heap memory limit
|
||||
Change the backtracking match limit
|
||||
Change the backtracking depth limit
|
||||
Set custom memory management specifically for the match
|
||||
|
|
|
@ -51,6 +51,7 @@ request are as follows:
|
|||
PCRE2_INFO_FRAMESIZE Size of backtracking frame
|
||||
PCRE2_INFO_HASBACKSLASHC Return 1 if pattern contains \C
|
||||
PCRE2_INFO_HASCRORLF Return 1 if explicit CR or LF matches exist in the pattern
|
||||
PCRE2_INFO_HEAPLIMIT Heap memory limit if set, otherwise PCRE2_ERROR_UNSET
|
||||
PCRE2_INFO_JCHANGED Return 1 if (?J) or (?-J) was used
|
||||
PCRE2_INFO_JITSIZE Size of JIT compiled code, or 0
|
||||
PCRE2_INFO_LASTCODETYPE Type of must-be-present information
|
||||
|
|
|
@ -182,6 +182,10 @@ document for an overview of all the PCRE2 documentation.
|
|||
<b> PCRE2_SIZE <i>value</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre2_set_heap_limit(pcre2_match_context *<i>mcontext</i>,</b>
|
||||
<b> uint32_t <i>value</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre2_set_match_limit(pcre2_match_context *<i>mcontext</i>,</b>
|
||||
<b> uint32_t <i>value</i>);</b>
|
||||
<br>
|
||||
|
@ -793,6 +797,7 @@ A match context is required if you want to:
|
|||
<pre>
|
||||
Set up a callout function
|
||||
Set an offset limit for matching an unanchored pattern
|
||||
Change the limit on the amount of heap used when matching
|
||||
Change the backtracking match limit
|
||||
Change the backtracking depth limit
|
||||
Set custom memory management specifically for the match
|
||||
|
@ -851,14 +856,47 @@ subject strings. See also the PCRE2_FIRSTLINE option, which requires a match to
|
|||
start within the first line of the subject. If this is set with an offset
|
||||
limit, a match must occur in the first line and also within the offset limit.
|
||||
In other words, whichever limit comes first is used.
|
||||
<b>int pcre2_set_heap_limit(pcre2_match_context *<i>mcontext</i>,</b>
|
||||
<b> uint32_t <i>value</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
The <i>heap_limit</i> parameter specifies, in units of kilobytes, the maximum
|
||||
amount of heap memory that <b>pcre2_match()</b> may use to hold backtracking
|
||||
information when running an interpretive match. This limit does not apply to
|
||||
matching with the JIT optimization, which has its own memory control
|
||||
arrangements (see the
|
||||
<a href="pcre2jit.html"><b>pcre2jit</b></a>
|
||||
documentation for more details), nor does it apply to <b>pcre2_dfa_match()</b>.
|
||||
If the limit is reached, the negative error code PCRE2_ERROR_HEAPLIMIT is
|
||||
returned. The default limit is set when PCRE2 is built; the default default is
|
||||
very large and is essentially "unlimited".
|
||||
</P>
|
||||
<P>
|
||||
A value for the heap limit may also be supplied by an item at the start of a
|
||||
pattern of the form
|
||||
<pre>
|
||||
(*LIMIT_HEAP=ddd)
|
||||
</pre>
|
||||
where ddd is a decimal number. However, such a setting is ignored unless ddd is
|
||||
less than the limit set by the caller of <b>pcre2_match()</b> or, if no such
|
||||
limit is set, less than the default.
|
||||
</P>
|
||||
<P>
|
||||
The <b>pcre2_match()</b> function starts out using a 20K vector on the system
|
||||
stack for recording backtracking points. The more nested backtracking points
|
||||
there are (that is, the deeper the search tree), the more memory is needed.
|
||||
Heap memory is used only if the initial vector is too small. If the heap limit
|
||||
is set to a value less than 21 (in particular, zero) no heap memory will be
|
||||
used. In this case, only patterns that do not have a lot of nested backtracking
|
||||
can be successfully processed.
|
||||
<b>int pcre2_set_match_limit(pcre2_match_context *<i>mcontext</i>,</b>
|
||||
<b> uint32_t <i>value</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
The <i>match_limit</i> parameter provides a means of preventing PCRE2 from using
|
||||
up too many resources when processing patterns that are not going to match, but
|
||||
which have a very large number of possibilities in their search trees. The
|
||||
classic example is a pattern that uses nested unlimited repeats.
|
||||
up too many computing resources when processing patterns that are not going to
|
||||
match, but which have a very large number of possibilities in their search
|
||||
trees. The classic example is a pattern that uses nested unlimited repeats.
|
||||
</P>
|
||||
<P>
|
||||
There is an internal counter in <b>pcre2_match()</b> that is incremented each
|
||||
|
@ -895,16 +933,20 @@ limit is set, less than the default.
|
|||
This parameter limits the depth of nested backtracking in <b>pcre2_match()</b>.
|
||||
Each time a nested backtracking point is passed, a new memory "frame" is used
|
||||
to remember the state of matching at that point. Thus, this parameter
|
||||
indirectly limits the amount of memory that is used in a match.
|
||||
indirectly limits the amount of memory that is used in a match. However,
|
||||
because the size of each memory "frame" depends on the number of capturing
|
||||
parentheses, the actual memory limit varies from pattern to pattern. This limit
|
||||
was more useful in versions before 10.30, where function recursion was used for
|
||||
backtracking.
|
||||
</P>
|
||||
<P>
|
||||
This limit is not relevant, and is ignored, when matching is done using JIT
|
||||
compiled code. However, it is supported by <b>pcre2_dfa_match()</b>, which uses
|
||||
it to limit the depth of internal recursive function calls that implement
|
||||
lookaround assertions and pattern recursions. This is, therefore, an indirect
|
||||
limit on the amount of system stack that is used. A recursive pattern such as
|
||||
/(.)(?1)/, when matched to a very long string using <b>pcre2_dfa_match()</b>,
|
||||
can use a great deal of stack.
|
||||
The depth limit is not relevant, and is ignored, when matching is done using
|
||||
JIT compiled code. However, it is supported by <b>pcre2_dfa_match()</b>, which
|
||||
uses it to limit the depth of internal recursive function calls that implement
|
||||
atomic groups, lookaround assertions, and pattern recursions. This is,
|
||||
therefore, an indirect limit on the amount of system stack that is used. A
|
||||
recursive pattern such as /(.)(?1)/, when matched to a very long string using
|
||||
<b>pcre2_dfa_match()</b>, can use a great deal of stack.
|
||||
</P>
|
||||
<P>
|
||||
The default value for the depth limit can be set when PCRE2 is built; the
|
||||
|
@ -958,6 +1000,12 @@ The output is a uint32_t integer that gives the default limit for the depth of
|
|||
nested backtracking in <b>pcre2_match()</b> or the depth of nested recursions
|
||||
and lookarounds in <b>pcre2_dfa_match()</b>. Further details are given with
|
||||
<b>pcre2_set_depth_limit()</b> above.
|
||||
<pre>
|
||||
PCRE2_CONFIG_HEAPLIMIT
|
||||
</pre>
|
||||
The output is a uint32_t integer that gives, in kilobytes, the default limit
|
||||
for the amount of heap memory used by <b>pcre2_match()</b>. Further details are
|
||||
given with <b>pcre2_set_heap_limit()</b> above.
|
||||
<pre>
|
||||
PCRE2_CONFIG_JIT
|
||||
</pre>
|
||||
|
@ -1786,6 +1834,13 @@ Return 1 if the pattern contains any explicit matches for CR or LF characters,
|
|||
otherwise 0. The third argument should point to an <b>uint32_t</b> variable. An
|
||||
explicit match is either a literal CR or LF character, or \r or \n or one of
|
||||
the equivalent hexadecimal or octal escape sequences.
|
||||
<pre>
|
||||
PCRE2_INFO_HEAPLIMIT
|
||||
</pre>
|
||||
If the pattern set a heap memory limit by including an item of the form
|
||||
(*LIMIT_HEAP=nnnn) at the start, the value is returned. The third argument
|
||||
should point to an unsigned 32-bit integer. If no such value has been set, the
|
||||
call to <b>pcre2_pattern_info()</b> returns the error PCRE2_ERROR_UNSET.
|
||||
<pre>
|
||||
PCRE2_INFO_JCHANGED
|
||||
</pre>
|
||||
|
@ -2554,7 +2609,8 @@ The backtracking match limit was reached.
|
|||
</pre>
|
||||
If a pattern contains many nested backtracking points, heap memory is used to
|
||||
remember them. This error is given when the memory allocation function (default
|
||||
or custom) fails.
|
||||
or custom) fails. Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given
|
||||
if the amount of memory needed exceeds the heap limit.
|
||||
<pre>
|
||||
PCRE2_ERROR_NULL
|
||||
</pre>
|
||||
|
@ -3271,7 +3327,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC42" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 04 April 2017
|
||||
Last updated: 11 April 2017
|
||||
<br>
|
||||
Copyright © 1997-2017 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -265,17 +265,41 @@ to the <b>configure</b> command. This setting has no effect on the
|
|||
(though the counting is done differently).
|
||||
</P>
|
||||
<P>
|
||||
In some environments it is desirable to limit the depth of nested backtracking
|
||||
in order to restrict the maximum amount of heap memory that is used. A second
|
||||
limit controls this; it defaults to the value that is set for
|
||||
--with-match-limit. You can set a lower default limit by adding, for example,
|
||||
The <b>pcre2_match()</b> function starts out using a 20K vector on the system
|
||||
stack to record backtracking points. The more nested backtracking points there
|
||||
are (that is, the deeper the search tree), the more memory is needed. If the
|
||||
initial vector is not large enough, heap memory is used, up to a certain limit,
|
||||
which is specified in kilobytes. The limit can be changed at run time, as
|
||||
described in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
documentation. The default limit (in effect unlimited) is 20 million. You can
|
||||
change this by a setting such as
|
||||
<pre>
|
||||
--with-heap-limit=500
|
||||
</pre>
|
||||
which limits the amount of heap to 500 kilobytes. This limit applies only to
|
||||
interpretive matching in pcre2_match(). It does not apply when JIT (which has
|
||||
its own memory arrangements) is used, nor does it apply to
|
||||
<b>pcre2_dfa_match()</b>.
|
||||
</P>
|
||||
<P>
|
||||
You can also explicitly limit the depth of nested backtracking in the
|
||||
<b>pcre2_match()</b> interpreter. This limit defaults to the value that is set
|
||||
for --with-match-limit. You can set a lower default limit by adding, for
|
||||
example,
|
||||
<pre>
|
||||
--with-match-limit_depth=10000
|
||||
</pre>
|
||||
to the <b>configure</b> command. This value can also be overridden at run time.
|
||||
As well as applying to <b>pcre2_match()</b>, this limit also controls the depth
|
||||
of recursive function calls in <b>pcre2_dfa_match()</b>. These are used for
|
||||
lookaround assertions, atomic groups, and recursion within patterns.
|
||||
to the <b>configure</b> command. This value can be overridden at run time. This
|
||||
depth limit indirectly limits the amount of heap memory that is used, but
|
||||
because the size of each backtracking "frame" depends on the number of
|
||||
capturing parentheses in a pattern, the amount of heap that is used before the
|
||||
limit is reached varies from pattern to pattern. This limit was more useful in
|
||||
versions before 10.30, where function recursion was used for backtracking.
|
||||
However, as well as applying to <b>pcre2_match()</b>, this limit also controls
|
||||
the depth of recursive function calls in <b>pcre2_dfa_match()</b>. These are
|
||||
used for lookaround assertions, atomic groups, and recursion within patterns.
|
||||
The limit does not apply to JIT matching.
|
||||
</P>
|
||||
<br><a name="SEC12" href="#TOC1">CREATING CHARACTER TABLES AT BUILD TIME</a><br>
|
||||
<P>
|
||||
|
@ -530,7 +554,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC25" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 31 March 2017
|
||||
Last updated: 10 April 2017
|
||||
<br>
|
||||
Copyright © 1997-2017 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -404,6 +404,10 @@ file name is followed by a colon; for context lines, a hyphen separator is used.
|
|||
If a line number is also being output, it follows the file name.
|
||||
</P>
|
||||
<P>
|
||||
<b>--heap-limit</b>=<i>number</i>
|
||||
See <b>--match-limit</b> below.
|
||||
</P>
|
||||
<P>
|
||||
<b>--help</b>
|
||||
Output a help message, giving brief details of the command options and file
|
||||
type support, and then exit. Anything else on the command line is
|
||||
|
@ -505,7 +509,7 @@ used. There is no short form for this option.
|
|||
<b>--match-limit</b>=<i>number</i>
|
||||
Processing some regular expression patterns may take a very long time to search
|
||||
for all possible matching strings. Others may require a very large amount of
|
||||
memory. There are two options that set resource limits for matching.
|
||||
memory. There are three options that set resource limits for matching.
|
||||
<br>
|
||||
<br>
|
||||
The <b>--match-limit</b> option provides a means of limiting computing resource
|
||||
|
@ -516,13 +520,24 @@ counter that is incremented each time around its main processing loop. If the
|
|||
value set by <b>--match-limit</b> is reached, an error occurs.
|
||||
<br>
|
||||
<br>
|
||||
The <b>--heap-limit</b> option specifies, as a number of kilobytes, the amount
|
||||
of heap memory that may be used for matching. Heap memory is needed only if
|
||||
matching the pattern requires a significant number of nested backtracking
|
||||
points to be remembered. This parameter can be set to zero to forbid the use of
|
||||
heap memory altogether.
|
||||
<br>
|
||||
<br>
|
||||
The <b>--depth-limit</b> option limits the depth of nested backtracking points,
|
||||
which in turn limits the amount of memory that is used. This limit is of use
|
||||
only if it is set smaller than <b>--match-limit</b>.
|
||||
which indirectly limits the amount of memory that is used. The amount of memory
|
||||
needed for each backtracking point depends on the number of capturing
|
||||
parentheses in the pattern, so the amount of memory that is used before this
|
||||
limit acts varies from pattern to pattern. This limit is of use only if it is
|
||||
set smaller than <b>--match-limit</b>.
|
||||
<br>
|
||||
<br>
|
||||
There are no short forms for these options. The default settings are specified
|
||||
when the PCRE2 library is compiled, with the default default being 10 million.
|
||||
when the PCRE2 library is compiled, with the default defaults being very large
|
||||
and so effectively unlimited.
|
||||
</P>
|
||||
<P>
|
||||
\fB--max-buffer-size=<i>number</i>
|
||||
|
@ -764,11 +779,12 @@ Many of the short and long forms of <b>pcre2grep</b>'s options are the same
|
|||
as in the GNU <b>grep</b> program. Any long option of the form
|
||||
<b>--xxx-regexp</b> (GNU terminology) is also available as <b>--xxx-regex</b>
|
||||
(PCRE2 terminology). However, the <b>--depth-limit</b>, <b>--file-list</b>,
|
||||
<b>--file-offsets</b>, <b>--include-dir</b>, <b>--line-offsets</b>,
|
||||
<b>--locale</b>, <b>--match-limit</b>, <b>-M</b>, <b>--multiline</b>, <b>-N</b>,
|
||||
<b>--newline</b>, <b>--om-separator</b>, <b>--output</b>, <b>-u</b>, and
|
||||
<b>--utf-8</b> options are specific to <b>pcre2grep</b>, as is the use of the
|
||||
<b>--only-matching</b> option with a capturing parentheses number.
|
||||
<b>--file-offsets</b>, <b>--heap-limit</b>, <b>--include-dir</b>,
|
||||
<b>--line-offsets</b>, <b>--locale</b>, <b>--match-limit</b>, <b>-M</b>,
|
||||
<b>--multiline</b>, <b>-N</b>, <b>--newline</b>, <b>--om-separator</b>,
|
||||
<b>--output</b>, <b>-u</b>, and <b>--utf-8</b> options are specific to
|
||||
<b>pcre2grep</b>, as is the use of the <b>--only-matching</b> option with a
|
||||
capturing parentheses number.
|
||||
</P>
|
||||
<P>
|
||||
Although most of the common options work the same way, a few are different in
|
||||
|
@ -891,9 +907,9 @@ there are more than 20 such errors, <b>pcre2grep</b> gives up.
|
|||
</P>
|
||||
<P>
|
||||
The <b>--match-limit</b> option of <b>pcre2grep</b> can be used to set the
|
||||
overall resource limit; there is a second option called <b>--depth-limit</b>
|
||||
that sets a limit on the amount of memory that is used (see the discussion of
|
||||
these options above).
|
||||
overall resource limit. There are also other limits that affect the amount of
|
||||
memory used during matching; see the discussion of <b>--heap-limit</b> and
|
||||
<b>--depth-limit</b> above.
|
||||
</P>
|
||||
<br><a name="SEC12" href="#TOC1">DIAGNOSTICS</a><br>
|
||||
<P>
|
||||
|
@ -918,7 +934,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC15" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 06 April 2017
|
||||
Last updated: 11 April 2017
|
||||
<br>
|
||||
Copyright © 1997-2017 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -170,14 +170,15 @@ the application to apply the JIT optimization by calling
|
|||
<b>pcre2_jit_compile()</b> is ignored.
|
||||
</P>
|
||||
<br><b>
|
||||
Setting match and backtracking depth limits
|
||||
Setting match resource limits
|
||||
</b><br>
|
||||
<P>
|
||||
The pcre2_match() function contains a counter that is incremented every time it
|
||||
goes round its main loop. The caller of <b>pcre2_match()</b> can set a limit on
|
||||
this counter, which therefore limits the amount of computing resource used for
|
||||
a match. The maximum depth of nested backtracking can also be limited, and this
|
||||
restricts the amount of heap memory that is used.
|
||||
a match. The maximum depth of nested backtracking can also be limited; this
|
||||
indirectly restricts the amount of heap memory that is used, but there is also
|
||||
an explicit memory limit that can be set.
|
||||
</P>
|
||||
<P>
|
||||
These facilities are provided to catch runaway matches that are provoked by
|
||||
|
@ -186,6 +187,7 @@ unlimited repeats applied to a long string that does not match). When one of
|
|||
these limits is reached, <b>pcre2_match()</b> gives an error return. The limits
|
||||
can also be set by items at the start of the pattern of the form
|
||||
<pre>
|
||||
(*LIMIT_HEAP=d)
|
||||
(*LIMIT_MATCH=d)
|
||||
(*LIMIT_DEPTH=d)
|
||||
</pre>
|
||||
|
@ -200,11 +202,13 @@ Prior to release 10.30, LIMIT_DEPTH was called LIMIT_RECURSION. This name is
|
|||
still recognized for backwards compatibility.
|
||||
</P>
|
||||
<P>
|
||||
The match limit is used (but in a different way) when JIT is being used, but it
|
||||
is not relevant, and is ignored, when matching with <b>pcre2_dfa_match()</b>.
|
||||
However, the depth limit is relevant for DFA matching, which uses function
|
||||
recursion for recursions within the pattern. In this case, the depth limit
|
||||
controls the amount of system stack that is used.
|
||||
The heap limit applies only when the <b>pcre2_match()</b> interpreter is used
|
||||
for matching. It does not apply to JIT or DFA matching. The match limit is used
|
||||
(but in a different way) when JIT is being used, but it is not relevant, and is
|
||||
ignored, when matching with <b>pcre2_dfa_match()</b>. The depth limit is ignored
|
||||
by JIT but is relevant for DFA matching, which uses function recursion for
|
||||
recursions within the pattern. In this case, the depth limit controls the
|
||||
amount of system stack that is used.
|
||||
<a name="newlines"></a></P>
|
||||
<br><b>
|
||||
Newline conventions
|
||||
|
@ -3434,7 +3438,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC30" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 03 April 2017
|
||||
Last updated: 11 April 2017
|
||||
<br>
|
||||
Copyright © 1997-2017 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -83,11 +83,12 @@ From release 10.30, the interpretive (non-JIT) version of <b>pcre2_match()</b>
|
|||
uses very little system stack at run time. In earlier releases recursive
|
||||
function calls could use a great deal of stack, and this could cause problems,
|
||||
but this usage has been eliminated. Backtracking positions are now explicitly
|
||||
remembered in memory frames controlled by the code. An initial 10K vector of
|
||||
frames is allocated on the system stack (enough for about 50 frames for small
|
||||
patterns), but if this is insufficient, heap memory is used. Rewriting patterns
|
||||
to be time-efficient, as described below, may also reduce the memory
|
||||
requirements.
|
||||
remembered in memory frames controlled by the code. An initial 20K vector of
|
||||
frames is allocated on the system stack (enough for about 100 frames for small
|
||||
patterns), but if this is insufficient, heap memory is used. The amount of heap
|
||||
memory can be limited; if the limit is set to zero, only the initial stack
|
||||
vector is used. Rewriting patterns to be time-efficient, as described below,
|
||||
may also reduce the memory requirements.
|
||||
</P>
|
||||
<P>
|
||||
In contrast to <b>pcre2_match()</b>, <b>pcre2_dfa_match()</b> does use recursive
|
||||
|
@ -243,7 +244,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 31 March 2017
|
||||
Last updated: 08 April 2017
|
||||
<br>
|
||||
Copyright © 1997-2017 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -235,6 +235,12 @@ Behave as if each pattern line has the <b>jit</b> modifier; after successful
|
|||
compilation, each pattern is passed to the just-in-time compiler, if available.
|
||||
</P>
|
||||
<P>
|
||||
<b>-jitverify</b>
|
||||
Behave as if each pattern line has the <b>jitverify</b> modifier; after
|
||||
successful compilation, each pattern is passed to the just-in-time compiler, if
|
||||
available, and the use of JIT is verified.
|
||||
</P>
|
||||
<P>
|
||||
\fB-pattern\fB <i>modifier-list</i>
|
||||
Behave as if each pattern line contains the given modifiers.
|
||||
</P>
|
||||
|
@ -1088,6 +1094,7 @@ pattern.
|
|||
get=<number or name> extract captured substring
|
||||
getall extract all captured substrings
|
||||
/g global global matching
|
||||
heap_limit=<n> set a limit on heap memory
|
||||
jitstack=<n> set size of JIT stack
|
||||
mark show mark values
|
||||
match_limit=<n> set a match limit
|
||||
|
@ -1330,11 +1337,11 @@ stack that is larger than the default 32K is necessary only for very
|
|||
complicated patterns.
|
||||
</P>
|
||||
<br><b>
|
||||
Setting match and depth limits
|
||||
Setting heap, match, and depth limits
|
||||
</b><br>
|
||||
<P>
|
||||
The <b>match_limit</b> and <b>depth_limit</b> modifiers set the appropriate
|
||||
limits in the match context. These values are ignored when the
|
||||
The <b>heap_limit</b>, <b>match_limit</b>, and <b>depth_limit</b> modifiers set
|
||||
the appropriate limits in the match context. These values are ignored when the
|
||||
<b>find_limits</b> modifier is specified.
|
||||
</P>
|
||||
<br><b>
|
||||
|
@ -1343,8 +1350,8 @@ Finding minimum limits
|
|||
<P>
|
||||
If the <b>find_limits</b> modifier is present on a subject line, <b>pcre2test</b>
|
||||
calls the relevant matching function several times, setting different values in
|
||||
the match context via <b>pcre2_set_match_limit()</b> or
|
||||
<b>pcre2_set_depth_limit()</b> until it finds the minimum values for each
|
||||
the match context via <b>pcre2_set_heap_limit(), \fBpcre2_set_match_limit()</b>,
|
||||
or <b>pcre2_set_depth_limit()</b> until it finds the minimum values for each
|
||||
parameter that allows the match to complete without error.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -1360,9 +1367,9 @@ increasing length of subject string.
|
|||
</P>
|
||||
<P>
|
||||
For non-DFA matching, the minimum <i>depth_limit</i> number is a measure of how
|
||||
much memory for recording backtracking points is needed to complete the match
|
||||
attempt. In the case of DFA matching, <i>depth_limit</i> controls the depth of
|
||||
recursive calls of the internal function that is used for handling pattern
|
||||
much nested backtracking happens (that is, how deeply the pattern's tree is
|
||||
searched). In the case of DFA matching, <i>depth_limit</i> controls the depth of
|
||||
recursive calls of the internal function that is used for handling pattern
|
||||
recursion, lookaround assertions, and atomic groups.
|
||||
</P>
|
||||
<br><b>
|
||||
|
@ -1800,7 +1807,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 04 April 2017
|
||||
Last updated: 11 April 2017
|
||||
<br>
|
||||
Copyright © 1997-2017 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -213,6 +213,9 @@ in the library.
|
|||
<tr><td><a href="pcre2_set_depth_limit.html">pcre2_set_depth_limit</a></td>
|
||||
<td> Set the match backtracking depth limit</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_heap_limit.html">pcre2_set_heap_limit</a></td>
|
||||
<td> Set the match backtracking heap limit</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_match_limit.html">pcre2_set_match_limit</a></td>
|
||||
<td> Set the match limit</td></tr>
|
||||
|
||||
|
|
2160
doc/pcre2.txt
2160
doc/pcre2.txt
File diff suppressed because it is too large
Load Diff
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2_CONFIG 3 "24 March 2017" "PCRE2 10.30"
|
||||
.TH PCRE2_CONFIG 3 "11 April 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
@ -31,6 +31,7 @@ point to a uint32_t integer variable. The available codes are:
|
|||
PCRE2_CONFIG_BSR Indicates what \eR matches by default:
|
||||
PCRE2_BSR_UNICODE
|
||||
PCRE2_BSR_ANYCRLF
|
||||
PCRE2_CONFIG_HEAPLIMIT Default heap memory limit
|
||||
PCRE2_CONFIG_DEPTHLIMIT Default backtracking depth limit
|
||||
.\" JOIN
|
||||
PCRE2_CONFIG_JIT Availability of just-in-time compiler
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2_MATCH 3 "04 April 2017" "PCRE2 10.30"
|
||||
.TH PCRE2_MATCH 3 "11 April 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
@ -32,6 +32,7 @@ A match context is needed only if you want to:
|
|||
.sp
|
||||
Set up a callout function
|
||||
Set a matching offset limit
|
||||
Change the heap memory limit
|
||||
Change the backtracking match limit
|
||||
Change the backtracking depth limit
|
||||
Set custom memory management specifically for the match
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2_PATTERN_INFO 3 "25 March 2017" "PCRE2 10.30"
|
||||
.TH PCRE2_PATTERN_INFO 3 "11 April 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
@ -43,6 +43,9 @@ request are as follows:
|
|||
.\" JOIN
|
||||
PCRE2_INFO_HASCRORLF Return 1 if explicit CR or LF matches
|
||||
exist in the pattern
|
||||
.\" JOIN
|
||||
PCRE2_INFO_HEAPLIMIT Heap memory limit if set,
|
||||
otherwise PCRE2_ERROR_UNSET
|
||||
PCRE2_INFO_JCHANGED Return 1 if (?J) or (?-J) was used
|
||||
PCRE2_INFO_JITSIZE Size of JIT compiled code, or 0
|
||||
PCRE2_INFO_LASTCODETYPE Type of must-be-present information
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
.TH PCRE2_SET_DEPTH_LIMIT 3 "11 April 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.B #include <pcre2.h>
|
||||
.PP
|
||||
.nf
|
||||
.B int pcre2_set_heap_limit(pcre2_match_context *\fImcontext\fP,
|
||||
.B " uint32_t \fIvalue\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
.sp
|
||||
This function sets the backtracking heap limit field in a match context. The
|
||||
result is always zero.
|
||||
.P
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
.\" HREF
|
||||
\fBpcre2api\fP
|
||||
.\"
|
||||
page and a description of the POSIX API in the
|
||||
.\" HREF
|
||||
\fBpcre2posix\fP
|
||||
.\"
|
||||
page.
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2API 3 "04 April 2017" "PCRE2 10.30"
|
||||
.TH PCRE2API 3 "11 April 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.sp
|
||||
|
@ -123,6 +123,9 @@ document for an overview of all the PCRE2 documentation.
|
|||
.B int pcre2_set_offset_limit(pcre2_match_context *\fImcontext\fP,
|
||||
.B " PCRE2_SIZE \fIvalue\fP);"
|
||||
.sp
|
||||
.B int pcre2_set_heap_limit(pcre2_match_context *\fImcontext\fP,
|
||||
.B " uint32_t \fIvalue\fP);"
|
||||
.sp
|
||||
.B int pcre2_set_match_limit(pcre2_match_context *\fImcontext\fP,
|
||||
.B " uint32_t \fIvalue\fP);"
|
||||
.sp
|
||||
|
@ -753,6 +756,7 @@ A match context is required if you want to:
|
|||
.sp
|
||||
Set up a callout function
|
||||
Set an offset limit for matching an unanchored pattern
|
||||
Change the limit on the amount of heap used when matching
|
||||
Change the backtracking match limit
|
||||
Change the backtracking depth limit
|
||||
Set custom memory management specifically for the match
|
||||
|
@ -816,14 +820,49 @@ limit, a match must occur in the first line and also within the offset limit.
|
|||
In other words, whichever limit comes first is used.
|
||||
.sp
|
||||
.nf
|
||||
.B int pcre2_set_heap_limit(pcre2_match_context *\fImcontext\fP,
|
||||
.B " uint32_t \fIvalue\fP);"
|
||||
.fi
|
||||
.sp
|
||||
The \fIheap_limit\fP parameter specifies, in units of kilobytes, the maximum
|
||||
amount of heap memory that \fBpcre2_match()\fP may use to hold backtracking
|
||||
information when running an interpretive match. This limit does not apply to
|
||||
matching with the JIT optimization, which has its own memory control
|
||||
arrangements (see the
|
||||
.\" HREF
|
||||
\fBpcre2jit\fP
|
||||
.\"
|
||||
documentation for more details), nor does it apply to \fBpcre2_dfa_match()\fP.
|
||||
If the limit is reached, the negative error code PCRE2_ERROR_HEAPLIMIT is
|
||||
returned. The default limit is set when PCRE2 is built; the default default is
|
||||
very large and is essentially "unlimited".
|
||||
.P
|
||||
A value for the heap limit may also be supplied by an item at the start of a
|
||||
pattern of the form
|
||||
.sp
|
||||
(*LIMIT_HEAP=ddd)
|
||||
.sp
|
||||
where ddd is a decimal number. However, such a setting is ignored unless ddd is
|
||||
less than the limit set by the caller of \fBpcre2_match()\fP or, if no such
|
||||
limit is set, less than the default.
|
||||
.P
|
||||
The \fBpcre2_match()\fP function starts out using a 20K vector on the system
|
||||
stack for recording backtracking points. The more nested backtracking points
|
||||
there are (that is, the deeper the search tree), the more memory is needed.
|
||||
Heap memory is used only if the initial vector is too small. If the heap limit
|
||||
is set to a value less than 21 (in particular, zero) no heap memory will be
|
||||
used. In this case, only patterns that do not have a lot of nested backtracking
|
||||
can be successfully processed.
|
||||
.sp
|
||||
.nf
|
||||
.B int pcre2_set_match_limit(pcre2_match_context *\fImcontext\fP,
|
||||
.B " uint32_t \fIvalue\fP);"
|
||||
.fi
|
||||
.sp
|
||||
The \fImatch_limit\fP parameter provides a means of preventing PCRE2 from using
|
||||
up too many resources when processing patterns that are not going to match, but
|
||||
which have a very large number of possibilities in their search trees. The
|
||||
classic example is a pattern that uses nested unlimited repeats.
|
||||
up too many computing resources when processing patterns that are not going to
|
||||
match, but which have a very large number of possibilities in their search
|
||||
trees. The classic example is a pattern that uses nested unlimited repeats.
|
||||
.P
|
||||
There is an internal counter in \fBpcre2_match()\fP that is incremented each
|
||||
time round its main matching loop. If this value reaches the match limit,
|
||||
|
@ -859,15 +898,19 @@ limit is set, less than the default.
|
|||
This parameter limits the depth of nested backtracking in \fBpcre2_match()\fP.
|
||||
Each time a nested backtracking point is passed, a new memory "frame" is used
|
||||
to remember the state of matching at that point. Thus, this parameter
|
||||
indirectly limits the amount of memory that is used in a match.
|
||||
indirectly limits the amount of memory that is used in a match. However,
|
||||
because the size of each memory "frame" depends on the number of capturing
|
||||
parentheses, the actual memory limit varies from pattern to pattern. This limit
|
||||
was more useful in versions before 10.30, where function recursion was used for
|
||||
backtracking.
|
||||
.P
|
||||
This limit is not relevant, and is ignored, when matching is done using JIT
|
||||
compiled code. However, it is supported by \fBpcre2_dfa_match()\fP, which uses
|
||||
it to limit the depth of internal recursive function calls that implement
|
||||
lookaround assertions and pattern recursions. This is, therefore, an indirect
|
||||
limit on the amount of system stack that is used. A recursive pattern such as
|
||||
/(.)(?1)/, when matched to a very long string using \fBpcre2_dfa_match()\fP,
|
||||
can use a great deal of stack.
|
||||
The depth limit is not relevant, and is ignored, when matching is done using
|
||||
JIT compiled code. However, it is supported by \fBpcre2_dfa_match()\fP, which
|
||||
uses it to limit the depth of internal recursive function calls that implement
|
||||
atomic groups, lookaround assertions, and pattern recursions. This is,
|
||||
therefore, an indirect limit on the amount of system stack that is used. A
|
||||
recursive pattern such as /(.)(?1)/, when matched to a very long string using
|
||||
\fBpcre2_dfa_match()\fP, can use a great deal of stack.
|
||||
.P
|
||||
The default value for the depth limit can be set when PCRE2 is built; the
|
||||
default default is the same value as the default for the match limit. If the
|
||||
|
@ -921,6 +964,12 @@ The output is a uint32_t integer that gives the default limit for the depth of
|
|||
nested backtracking in \fBpcre2_match()\fP or the depth of nested recursions
|
||||
and lookarounds in \fBpcre2_dfa_match()\fP. Further details are given with
|
||||
\fBpcre2_set_depth_limit()\fP above.
|
||||
.sp
|
||||
PCRE2_CONFIG_HEAPLIMIT
|
||||
.sp
|
||||
The output is a uint32_t integer that gives, in kilobytes, the default limit
|
||||
for the amount of heap memory used by \fBpcre2_match()\fP. Further details are
|
||||
given with \fBpcre2_set_heap_limit()\fP above.
|
||||
.sp
|
||||
PCRE2_CONFIG_JIT
|
||||
.sp
|
||||
|
@ -1784,6 +1833,13 @@ Return 1 if the pattern contains any explicit matches for CR or LF characters,
|
|||
otherwise 0. The third argument should point to an \fBuint32_t\fP variable. An
|
||||
explicit match is either a literal CR or LF character, or \er or \en or one of
|
||||
the equivalent hexadecimal or octal escape sequences.
|
||||
.sp
|
||||
PCRE2_INFO_HEAPLIMIT
|
||||
.sp
|
||||
If the pattern set a heap memory limit by including an item of the form
|
||||
(*LIMIT_HEAP=nnnn) at the start, the value is returned. The third argument
|
||||
should point to an unsigned 32-bit integer. If no such value has been set, the
|
||||
call to \fBpcre2_pattern_info()\fP returns the error PCRE2_ERROR_UNSET.
|
||||
.sp
|
||||
PCRE2_INFO_JCHANGED
|
||||
.sp
|
||||
|
@ -2603,7 +2659,8 @@ The backtracking match limit was reached.
|
|||
.sp
|
||||
If a pattern contains many nested backtracking points, heap memory is used to
|
||||
remember them. This error is given when the memory allocation function (default
|
||||
or custom) fails.
|
||||
or custom) fails. Note that a different error, PCRE2_ERROR_HEAPLIMIT, is given
|
||||
if the amount of memory needed exceeds the heap limit.
|
||||
.sp
|
||||
PCRE2_ERROR_NULL
|
||||
.sp
|
||||
|
@ -3322,6 +3379,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 04 April 2017
|
||||
Last updated: 11 April 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2BUILD 3 "31 March 2017" "PCRE2 10.30"
|
||||
.TH PCRE2BUILD 3 "10 April 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.
|
||||
|
@ -260,17 +260,42 @@ to the \fBconfigure\fP command. This setting has no effect on the
|
|||
\fBpcre2_dfa_match()\fP matching function, but it does also limit JIT matching
|
||||
(though the counting is done differently).
|
||||
.P
|
||||
In some environments it is desirable to limit the depth of nested backtracking
|
||||
in order to restrict the maximum amount of heap memory that is used. A second
|
||||
limit controls this; it defaults to the value that is set for
|
||||
--with-match-limit. You can set a lower default limit by adding, for example,
|
||||
The \fBpcre2_match()\fP function starts out using a 20K vector on the system
|
||||
stack to record backtracking points. The more nested backtracking points there
|
||||
are (that is, the deeper the search tree), the more memory is needed. If the
|
||||
initial vector is not large enough, heap memory is used, up to a certain limit,
|
||||
which is specified in kilobytes. The limit can be changed at run time, as
|
||||
described in the
|
||||
.\" HREF
|
||||
\fBpcre2api\fP
|
||||
.\"
|
||||
documentation. The default limit (in effect unlimited) is 20 million. You can
|
||||
change this by a setting such as
|
||||
.sp
|
||||
--with-heap-limit=500
|
||||
.sp
|
||||
which limits the amount of heap to 500 kilobytes. This limit applies only to
|
||||
interpretive matching in pcre2_match(). It does not apply when JIT (which has
|
||||
its own memory arrangements) is used, nor does it apply to
|
||||
\fBpcre2_dfa_match()\fP.
|
||||
.P
|
||||
You can also explicitly limit the depth of nested backtracking in the
|
||||
\fBpcre2_match()\fP interpreter. This limit defaults to the value that is set
|
||||
for --with-match-limit. You can set a lower default limit by adding, for
|
||||
example,
|
||||
.sp
|
||||
--with-match-limit_depth=10000
|
||||
.sp
|
||||
to the \fBconfigure\fP command. This value can also be overridden at run time.
|
||||
As well as applying to \fBpcre2_match()\fP, this limit also controls the depth
|
||||
of recursive function calls in \fBpcre2_dfa_match()\fP. These are used for
|
||||
lookaround assertions, atomic groups, and recursion within patterns.
|
||||
to the \fBconfigure\fP command. This value can be overridden at run time. This
|
||||
depth limit indirectly limits the amount of heap memory that is used, but
|
||||
because the size of each backtracking "frame" depends on the number of
|
||||
capturing parentheses in a pattern, the amount of heap that is used before the
|
||||
limit is reached varies from pattern to pattern. This limit was more useful in
|
||||
versions before 10.30, where function recursion was used for backtracking.
|
||||
However, as well as applying to \fBpcre2_match()\fP, this limit also controls
|
||||
the depth of recursive function calls in \fBpcre2_dfa_match()\fP. These are
|
||||
used for lookaround assertions, atomic groups, and recursion within patterns.
|
||||
The limit does not apply to JIT matching.
|
||||
.
|
||||
.
|
||||
.SH "CREATING CHARACTER TABLES AT BUILD TIME"
|
||||
|
@ -547,6 +572,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 31 March 2017
|
||||
Last updated: 10 April 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2GREP 1 "06 April 2017" "PCRE2 10.30"
|
||||
.TH PCRE2GREP 1 "11 April 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
pcre2grep - a grep with Perl-compatible regular expressions.
|
||||
.SH SYNOPSIS
|
||||
|
@ -347,6 +347,9 @@ file names are shown when multiple files are searched. For matching lines, the
|
|||
file name is followed by a colon; for context lines, a hyphen separator is used.
|
||||
If a line number is also being output, it follows the file name.
|
||||
.TP
|
||||
\fB--heap-limit\fP=\fInumber\fP
|
||||
See \fB--match-limit\fP below.
|
||||
.TP
|
||||
\fB--help\fP
|
||||
Output a help message, giving brief details of the command options and file
|
||||
type support, and then exit. Anything else on the command line is
|
||||
|
@ -436,7 +439,7 @@ used. There is no short form for this option.
|
|||
\fB--match-limit\fP=\fInumber\fP
|
||||
Processing some regular expression patterns may take a very long time to search
|
||||
for all possible matching strings. Others may require a very large amount of
|
||||
memory. There are two options that set resource limits for matching.
|
||||
memory. There are three options that set resource limits for matching.
|
||||
.sp
|
||||
The \fB--match-limit\fP option provides a means of limiting computing resource
|
||||
usage when processing patterns that are not going to match, but which have a
|
||||
|
@ -445,12 +448,22 @@ is a pattern that uses nested unlimited repeats. Internally, PCRE2 has a
|
|||
counter that is incremented each time around its main processing loop. If the
|
||||
value set by \fB--match-limit\fP is reached, an error occurs.
|
||||
.sp
|
||||
The \fB--heap-limit\fP option specifies, as a number of kilobytes, the amount
|
||||
of heap memory that may be used for matching. Heap memory is needed only if
|
||||
matching the pattern requires a significant number of nested backtracking
|
||||
points to be remembered. This parameter can be set to zero to forbid the use of
|
||||
heap memory altogether.
|
||||
.sp
|
||||
The \fB--depth-limit\fP option limits the depth of nested backtracking points,
|
||||
which in turn limits the amount of memory that is used. This limit is of use
|
||||
only if it is set smaller than \fB--match-limit\fP.
|
||||
which indirectly limits the amount of memory that is used. The amount of memory
|
||||
needed for each backtracking point depends on the number of capturing
|
||||
parentheses in the pattern, so the amount of memory that is used before this
|
||||
limit acts varies from pattern to pattern. This limit is of use only if it is
|
||||
set smaller than \fB--match-limit\fP.
|
||||
.sp
|
||||
There are no short forms for these options. The default settings are specified
|
||||
when the PCRE2 library is compiled, with the default default being 10 million.
|
||||
when the PCRE2 library is compiled, with the default defaults being very large
|
||||
and so effectively unlimited.
|
||||
.TP
|
||||
\fB--max-buffer-size=\fInumber\fP
|
||||
This limits the expansion of the processing buffer, whose initial size can be
|
||||
|
@ -670,11 +683,12 @@ Many of the short and long forms of \fBpcre2grep\fP's options are the same
|
|||
as in the GNU \fBgrep\fP program. Any long option of the form
|
||||
\fB--xxx-regexp\fP (GNU terminology) is also available as \fB--xxx-regex\fP
|
||||
(PCRE2 terminology). However, the \fB--depth-limit\fP, \fB--file-list\fP,
|
||||
\fB--file-offsets\fP, \fB--include-dir\fP, \fB--line-offsets\fP,
|
||||
\fB--locale\fP, \fB--match-limit\fP, \fB-M\fP, \fB--multiline\fP, \fB-N\fP,
|
||||
\fB--newline\fP, \fB--om-separator\fP, \fB--output\fP, \fB-u\fP, and
|
||||
\fB--utf-8\fP options are specific to \fBpcre2grep\fP, as is the use of the
|
||||
\fB--only-matching\fP option with a capturing parentheses number.
|
||||
\fB--file-offsets\fP, \fB--heap-limit\fP, \fB--include-dir\fP,
|
||||
\fB--line-offsets\fP, \fB--locale\fP, \fB--match-limit\fP, \fB-M\fP,
|
||||
\fB--multiline\fP, \fB-N\fP, \fB--newline\fP, \fB--om-separator\fP,
|
||||
\fB--output\fP, \fB-u\fP, and \fB--utf-8\fP options are specific to
|
||||
\fBpcre2grep\fP, as is the use of the \fB--only-matching\fP option with a
|
||||
capturing parentheses number.
|
||||
.P
|
||||
Although most of the common options work the same way, a few are different in
|
||||
\fBpcre2grep\fP. For example, the \fB--include\fP option's argument is a glob
|
||||
|
@ -799,9 +813,9 @@ message and the line that caused the problem to the standard error stream. If
|
|||
there are more than 20 such errors, \fBpcre2grep\fP gives up.
|
||||
.P
|
||||
The \fB--match-limit\fP option of \fBpcre2grep\fP can be used to set the
|
||||
overall resource limit; there is a second option called \fB--depth-limit\fP
|
||||
that sets a limit on the amount of memory that is used (see the discussion of
|
||||
these options above).
|
||||
overall resource limit. There are also other limits that affect the amount of
|
||||
memory used during matching; see the discussion of \fB--heap-limit\fP and
|
||||
\fB--depth-limit\fP above.
|
||||
.
|
||||
.
|
||||
.SH DIAGNOSTICS
|
||||
|
@ -834,6 +848,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 06 April 2017
|
||||
Last updated: 11 April 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -383,6 +383,9 @@ OPTIONS
|
|||
colon; for context lines, a hyphen separator is used. If a
|
||||
line number is also being output, it follows the file name.
|
||||
|
||||
--heap-limit=number
|
||||
See --match-limit below.
|
||||
|
||||
--help Output a help message, giving brief details of the command
|
||||
options and file type support, and then exit. Anything else
|
||||
on the command line is ignored.
|
||||
|
@ -482,7 +485,7 @@ OPTIONS
|
|||
--match-limit=number
|
||||
Processing some regular expression patterns may take a very
|
||||
long time to search for all possible matching strings. Others
|
||||
may require a very large amount of memory. There are two
|
||||
may require a very large amount of memory. There are three
|
||||
options that set resource limits for matching.
|
||||
|
||||
The --match-limit option provides a means of limiting comput-
|
||||
|
@ -494,237 +497,248 @@ OPTIONS
|
|||
processing loop. If the value set by --match-limit is
|
||||
reached, an error occurs.
|
||||
|
||||
The --depth-limit option limits the depth of nested back-
|
||||
tracking points, which in turn limits the amount of memory
|
||||
that is used. This limit is of use only if it is set smaller
|
||||
than --match-limit.
|
||||
The --heap-limit option specifies, as a number of kilobytes,
|
||||
the amount of heap memory that may be used for matching. Heap
|
||||
memory is needed only if matching the pattern requires a sig-
|
||||
nificant number of nested backtracking points to be remem-
|
||||
bered. This parameter can be set to zero to forbid the use of
|
||||
heap memory altogether.
|
||||
|
||||
The --depth-limit option limits the depth of nested back-
|
||||
tracking points, which indirectly limits the amount of memory
|
||||
that is used. The amount of memory needed for each backtrack-
|
||||
ing point depends on the number of capturing parentheses in
|
||||
the pattern, so the amount of memory that is used before this
|
||||
limit acts varies from pattern to pattern. This limit is of
|
||||
use only if it is set smaller than --match-limit.
|
||||
|
||||
There are no short forms for these options. The default set-
|
||||
tings are specified when the PCRE2 library is compiled, with
|
||||
the default default being 10 million.
|
||||
the default defaults being very large and so effectively
|
||||
unlimited.
|
||||
|
||||
--max-buffer-size=number
|
||||
This limits the expansion of the processing buffer, whose
|
||||
initial size can be set by --buffer-size. The maximum buffer
|
||||
size is silently forced to be no smaller than the starting
|
||||
This limits the expansion of the processing buffer, whose
|
||||
initial size can be set by --buffer-size. The maximum buffer
|
||||
size is silently forced to be no smaller than the starting
|
||||
buffer size.
|
||||
|
||||
-M, --multiline
|
||||
Allow patterns to match more than one line. When this option
|
||||
Allow patterns to match more than one line. When this option
|
||||
is set, the PCRE2 library is called in "multiline" mode. This
|
||||
allows a matched string to extend past the end of a line and
|
||||
continue on one or more subsequent lines. Patterns used with
|
||||
allows a matched string to extend past the end of a line and
|
||||
continue on one or more subsequent lines. Patterns used with
|
||||
-M may usefully contain literal newline characters and inter-
|
||||
nal occurrences of ^ and $ characters. The output for a suc-
|
||||
cessful match may consist of more than one line. The first
|
||||
line is the line in which the match started, and the last
|
||||
line is the line in which the match ended. If the matched
|
||||
string ends with a newline sequence, the output ends at the
|
||||
end of that line. If -v is set, none of the lines in a
|
||||
multi-line match are output. Once a match has been handled,
|
||||
scanning restarts at the beginning of the line after the one
|
||||
nal occurrences of ^ and $ characters. The output for a suc-
|
||||
cessful match may consist of more than one line. The first
|
||||
line is the line in which the match started, and the last
|
||||
line is the line in which the match ended. If the matched
|
||||
string ends with a newline sequence, the output ends at the
|
||||
end of that line. If -v is set, none of the lines in a
|
||||
multi-line match are output. Once a match has been handled,
|
||||
scanning restarts at the beginning of the line after the one
|
||||
in which the match ended.
|
||||
|
||||
The newline sequence that separates multiple lines must be
|
||||
matched as part of the pattern. For example, to find the
|
||||
phrase "regular expression" in a file where "regular" might
|
||||
be at the end of a line and "expression" at the start of the
|
||||
The newline sequence that separates multiple lines must be
|
||||
matched as part of the pattern. For example, to find the
|
||||
phrase "regular expression" in a file where "regular" might
|
||||
be at the end of a line and "expression" at the start of the
|
||||
next line, you could use this command:
|
||||
|
||||
pcre2grep -M 'regular\s+expression' <file>
|
||||
|
||||
The \s escape sequence matches any white space character,
|
||||
including newlines, and is followed by + so as to match
|
||||
trailing white space on the first line as well as possibly
|
||||
The \s escape sequence matches any white space character,
|
||||
including newlines, and is followed by + so as to match
|
||||
trailing white space on the first line as well as possibly
|
||||
handling a two-character newline sequence.
|
||||
|
||||
There is a limit to the number of lines that can be matched,
|
||||
imposed by the way that pcre2grep buffers the input file as
|
||||
it scans it. With a sufficiently large processing buffer,
|
||||
There is a limit to the number of lines that can be matched,
|
||||
imposed by the way that pcre2grep buffers the input file as
|
||||
it scans it. With a sufficiently large processing buffer,
|
||||
this should not be a problem, but the -M option does not work
|
||||
when input is read line by line (see --line-buffered.)
|
||||
|
||||
-N newline-type, --newline=newline-type
|
||||
The PCRE2 library supports five different conventions for
|
||||
indicating the ends of lines. They are the single-character
|
||||
sequences CR (carriage return) and LF (linefeed), the two-
|
||||
character sequence CRLF, an "anycrlf" convention, which rec-
|
||||
ognizes any of the preceding three types, and an "any" con-
|
||||
The PCRE2 library supports five different conventions for
|
||||
indicating the ends of lines. They are the single-character
|
||||
sequences CR (carriage return) and LF (linefeed), the two-
|
||||
character sequence CRLF, an "anycrlf" convention, which rec-
|
||||
ognizes any of the preceding three types, and an "any" con-
|
||||
vention, in which any Unicode line ending sequence is assumed
|
||||
to end a line. The Unicode sequences are the three just men-
|
||||
tioned, plus VT (vertical tab, U+000B), FF (form feed,
|
||||
U+000C), NEL (next line, U+0085), LS (line separator,
|
||||
to end a line. The Unicode sequences are the three just men-
|
||||
tioned, plus VT (vertical tab, U+000B), FF (form feed,
|
||||
U+000C), NEL (next line, U+0085), LS (line separator,
|
||||
U+2028), and PS (paragraph separator, U+2029).
|
||||
|
||||
When the PCRE2 library is built, a default line-ending
|
||||
sequence is specified. This is normally the standard
|
||||
When the PCRE2 library is built, a default line-ending
|
||||
sequence is specified. This is normally the standard
|
||||
sequence for the operating system. Unless otherwise specified
|
||||
by this option, pcre2grep uses the library's default. The
|
||||
by this option, pcre2grep uses the library's default. The
|
||||
possible values for this option are CR, LF, CRLF, ANYCRLF, or
|
||||
ANY. This makes it possible to use pcre2grep to scan files
|
||||
ANY. This makes it possible to use pcre2grep to scan files
|
||||
that have come from other environments without having to mod-
|
||||
ify their line endings. If the data that is being scanned
|
||||
does not agree with the convention set by this option,
|
||||
pcre2grep may behave in strange ways. Note that this option
|
||||
does not apply to files specified by the -f, --exclude-from,
|
||||
or --include-from options, which are expected to use the
|
||||
ify their line endings. If the data that is being scanned
|
||||
does not agree with the convention set by this option,
|
||||
pcre2grep may behave in strange ways. Note that this option
|
||||
does not apply to files specified by the -f, --exclude-from,
|
||||
or --include-from options, which are expected to use the
|
||||
operating system's standard newline sequence.
|
||||
|
||||
-n, --line-number
|
||||
Precede each output line by its line number in the file, fol-
|
||||
lowed by a colon for matching lines or a hyphen for context
|
||||
lowed by a colon for matching lines or a hyphen for context
|
||||
lines. If the file name is also being output, it precedes the
|
||||
line number. When the -M option causes a pattern to match
|
||||
more than one line, only the first is preceded by its line
|
||||
line number. When the -M option causes a pattern to match
|
||||
more than one line, only the first is preceded by its line
|
||||
number. This option is forced if --line-offsets is used.
|
||||
|
||||
--no-jit If the PCRE2 library is built with support for just-in-time
|
||||
--no-jit If the PCRE2 library is built with support for just-in-time
|
||||
compiling (which speeds up matching), pcre2grep automatically
|
||||
makes use of this, unless it was explicitly disabled at build
|
||||
time. This option can be used to disable the use of JIT at
|
||||
run time. It is provided for testing and working round prob-
|
||||
time. This option can be used to disable the use of JIT at
|
||||
run time. It is provided for testing and working round prob-
|
||||
lems. It should never be needed in normal use.
|
||||
|
||||
-O text, --output=text
|
||||
When there is a match, instead of outputting the whole line
|
||||
that matched, output just the given text. This option is
|
||||
mutually exclusive with --only-matching, --file-offsets, and
|
||||
When there is a match, instead of outputting the whole line
|
||||
that matched, output just the given text. This option is
|
||||
mutually exclusive with --only-matching, --file-offsets, and
|
||||
--line-offsets. Escape sequences starting with a dollar char-
|
||||
acter may be used to insert the contents of the matched part
|
||||
acter may be used to insert the contents of the matched part
|
||||
of the line and/or captured substrings into the text.
|
||||
|
||||
$<digits> or ${<digits>} is replaced by the captured sub-
|
||||
string of the given decimal number; zero substitutes the
|
||||
$<digits> or ${<digits>} is replaced by the captured sub-
|
||||
string of the given decimal number; zero substitutes the
|
||||
whole match. If the number is greater than the number of cap-
|
||||
turing substrings, or if the capture is unset, the replace-
|
||||
turing substrings, or if the capture is unset, the replace-
|
||||
ment is empty.
|
||||
|
||||
$a is replaced by bell; $b by backspace; $e by escape; $f by
|
||||
form feed; $n by newline; $r by carriage return; $t by tab;
|
||||
$a is replaced by bell; $b by backspace; $e by escape; $f by
|
||||
form feed; $n by newline; $r by carriage return; $t by tab;
|
||||
$v by vertical tab.
|
||||
|
||||
$o<digits> is replaced by the character represented by the
|
||||
$o<digits> is replaced by the character represented by the
|
||||
given octal number; up to three digits are processed.
|
||||
|
||||
$x<digits> is replaced by the character represented by the
|
||||
$x<digits> is replaced by the character represented by the
|
||||
given hexadecimal number; up to two digits are processed.
|
||||
|
||||
Any other character is substituted by itself. In particular,
|
||||
Any other character is substituted by itself. In particular,
|
||||
$$ is replaced by a single dollar.
|
||||
|
||||
-o, --only-matching
|
||||
Show only the part of the line that matched a pattern instead
|
||||
of the whole line. In this mode, no context is shown. That
|
||||
is, the -A, -B, and -C options are ignored. If there is more
|
||||
than one match in a line, each of them is shown separately,
|
||||
on a separate line of output. If -o is combined with -v
|
||||
(invert the sense of the match to find non-matching lines),
|
||||
no output is generated, but the return code is set appropri-
|
||||
ately. If the matched portion of the line is empty, nothing
|
||||
is output unless the file name or line number are being
|
||||
printed, in which case they are shown on an otherwise empty
|
||||
of the whole line. In this mode, no context is shown. That
|
||||
is, the -A, -B, and -C options are ignored. If there is more
|
||||
than one match in a line, each of them is shown separately,
|
||||
on a separate line of output. If -o is combined with -v
|
||||
(invert the sense of the match to find non-matching lines),
|
||||
no output is generated, but the return code is set appropri-
|
||||
ately. If the matched portion of the line is empty, nothing
|
||||
is output unless the file name or line number are being
|
||||
printed, in which case they are shown on an otherwise empty
|
||||
line. This option is mutually exclusive with --output,
|
||||
--file-offsets and --line-offsets.
|
||||
|
||||
-onumber, --only-matching=number
|
||||
Show only the part of the line that matched the capturing
|
||||
Show only the part of the line that matched the capturing
|
||||
parentheses of the given number. Up to 32 capturing parenthe-
|
||||
ses are supported, and -o0 is equivalent to -o without a num-
|
||||
ber. Because these options can be given without an argument
|
||||
(see above), if an argument is present, it must be given in
|
||||
the same shell item, for example, -o3 or --only-matching=2.
|
||||
ber. Because these options can be given without an argument
|
||||
(see above), if an argument is present, it must be given in
|
||||
the same shell item, for example, -o3 or --only-matching=2.
|
||||
The comments given for the non-argument case above also apply
|
||||
to this option. If the specified capturing parentheses do not
|
||||
exist in the pattern, or were not set in the match, nothing
|
||||
is output unless the file name or line number are being out-
|
||||
exist in the pattern, or were not set in the match, nothing
|
||||
is output unless the file name or line number are being out-
|
||||
put.
|
||||
|
||||
If this option is given multiple times, multiple substrings
|
||||
are output for each match, in the order the options are
|
||||
given, and all on one line. For example, -o3 -o1 -o3 causes
|
||||
the substrings matched by capturing parentheses 3 and 1 and
|
||||
then 3 again to be output. By default, there is no separator
|
||||
If this option is given multiple times, multiple substrings
|
||||
are output for each match, in the order the options are
|
||||
given, and all on one line. For example, -o3 -o1 -o3 causes
|
||||
the substrings matched by capturing parentheses 3 and 1 and
|
||||
then 3 again to be output. By default, there is no separator
|
||||
(but see the next option).
|
||||
|
||||
--om-separator=text
|
||||
Specify a separating string for multiple occurrences of -o.
|
||||
The default is an empty string. Separating strings are never
|
||||
Specify a separating string for multiple occurrences of -o.
|
||||
The default is an empty string. Separating strings are never
|
||||
coloured.
|
||||
|
||||
-q, --quiet
|
||||
Work quietly, that is, display nothing except error messages.
|
||||
The exit status indicates whether or not any matches were
|
||||
The exit status indicates whether or not any matches were
|
||||
found.
|
||||
|
||||
-r, --recursive
|
||||
If any given path is a directory, recursively scan the files
|
||||
it contains, taking note of any --include and --exclude set-
|
||||
tings. By default, a directory is read as a normal file; in
|
||||
some operating systems this gives an immediate end-of-file.
|
||||
This option is a shorthand for setting the -d option to
|
||||
If any given path is a directory, recursively scan the files
|
||||
it contains, taking note of any --include and --exclude set-
|
||||
tings. By default, a directory is read as a normal file; in
|
||||
some operating systems this gives an immediate end-of-file.
|
||||
This option is a shorthand for setting the -d option to
|
||||
"recurse".
|
||||
|
||||
--recursion-limit=number
|
||||
See --match-limit above.
|
||||
|
||||
-s, --no-messages
|
||||
Suppress error messages about non-existent or unreadable
|
||||
files. Such files are quietly skipped. However, the return
|
||||
Suppress error messages about non-existent or unreadable
|
||||
files. Such files are quietly skipped. However, the return
|
||||
code is still 2, even if matches were found in other files.
|
||||
|
||||
-t, --total-count
|
||||
This option is useful when scanning more than one file. If
|
||||
used on its own, -t suppresses all output except for a grand
|
||||
total number of matching lines (or non-matching lines if -v
|
||||
is used) in all the files. If -t is used with -c, a grand
|
||||
total is output except when the previous output is just one
|
||||
line. In other words, it is not output when just one file's
|
||||
count is listed. If file names are being output, the grand
|
||||
total is preceded by "TOTAL:". Otherwise, it appears as just
|
||||
another number. The -t option is ignored when used with -L
|
||||
(list files without matches), because the grand total would
|
||||
This option is useful when scanning more than one file. If
|
||||
used on its own, -t suppresses all output except for a grand
|
||||
total number of matching lines (or non-matching lines if -v
|
||||
is used) in all the files. If -t is used with -c, a grand
|
||||
total is output except when the previous output is just one
|
||||
line. In other words, it is not output when just one file's
|
||||
count is listed. If file names are being output, the grand
|
||||
total is preceded by "TOTAL:". Otherwise, it appears as just
|
||||
another number. The -t option is ignored when used with -L
|
||||
(list files without matches), because the grand total would
|
||||
always be zero.
|
||||
|
||||
-u, --utf-8
|
||||
Operate in UTF-8 mode. This option is available only if PCRE2
|
||||
has been compiled with UTF-8 support. All patterns (including
|
||||
those for any --exclude and --include options) and all sub-
|
||||
ject lines that are scanned must be valid strings of UTF-8
|
||||
those for any --exclude and --include options) and all sub-
|
||||
ject lines that are scanned must be valid strings of UTF-8
|
||||
characters.
|
||||
|
||||
-V, --version
|
||||
Write the version numbers of pcre2grep and the PCRE2 library
|
||||
to the standard output and then exit. Anything else on the
|
||||
Write the version numbers of pcre2grep and the PCRE2 library
|
||||
to the standard output and then exit. Anything else on the
|
||||
command line is ignored.
|
||||
|
||||
-v, --invert-match
|
||||
Invert the sense of the match, so that lines which do not
|
||||
Invert the sense of the match, so that lines which do not
|
||||
match any of the patterns are the ones that are found.
|
||||
|
||||
-w, --word-regex, --word-regexp
|
||||
Force the patterns to match only whole words. This is equiva-
|
||||
lent to having \b at the start and end of the pattern. This
|
||||
option applies only to the patterns that are matched against
|
||||
the contents of files; it does not apply to patterns speci-
|
||||
lent to having \b at the start and end of the pattern. This
|
||||
option applies only to the patterns that are matched against
|
||||
the contents of files; it does not apply to patterns speci-
|
||||
fied by any of the --include or --exclude options.
|
||||
|
||||
-x, --line-regex, --line-regexp
|
||||
Force the patterns to be anchored (each must start matching
|
||||
at the beginning of a line) and in addition, require them to
|
||||
match entire lines. In multiline mode the match may be more
|
||||
Force the patterns to be anchored (each must start matching
|
||||
at the beginning of a line) and in addition, require them to
|
||||
match entire lines. In multiline mode the match may be more
|
||||
than one line. This is equivalent to having \A and \Z charac-
|
||||
ters at the start and end of each alternative top-level
|
||||
ters at the start and end of each alternative top-level
|
||||
branch in every pattern. This option applies only to the pat-
|
||||
terns that are matched against the contents of files; it does
|
||||
not apply to patterns specified by any of the --include or
|
||||
not apply to patterns specified by any of the --include or
|
||||
--exclude options.
|
||||
|
||||
|
||||
ENVIRONMENT VARIABLES
|
||||
|
||||
The environment variables LC_ALL and LC_CTYPE are examined, in that
|
||||
order, for a locale. The first one that is set is used. This can be
|
||||
overridden by the --locale option. If no locale is set, the PCRE2
|
||||
The environment variables LC_ALL and LC_CTYPE are examined, in that
|
||||
order, for a locale. The first one that is set is used. This can be
|
||||
overridden by the --locale option. If no locale is set, the PCRE2
|
||||
library's default (usually the "C" locale) is used.
|
||||
|
||||
|
||||
|
@ -732,99 +746,99 @@ NEWLINES
|
|||
|
||||
The -N (--newline) option allows pcre2grep to scan files with different
|
||||
newline conventions from the default. Any parts of the input files that
|
||||
are written to the standard output are copied identically, with what-
|
||||
ever newline sequences they have in the input. However, the setting of
|
||||
this option does not affect the interpretation of files specified by
|
||||
are written to the standard output are copied identically, with what-
|
||||
ever newline sequences they have in the input. However, the setting of
|
||||
this option does not affect the interpretation of files specified by
|
||||
the -f, --exclude-from, or --include-from options, which are assumed to
|
||||
use the operating system's standard newline sequence, nor does it
|
||||
affect the way in which pcre2grep writes informational messages to the
|
||||
use the operating system's standard newline sequence, nor does it
|
||||
affect the way in which pcre2grep writes informational messages to the
|
||||
standard error and output streams. For these it uses the string "\n" to
|
||||
indicate newlines, relying on the C I/O library to convert this to an
|
||||
indicate newlines, relying on the C I/O library to convert this to an
|
||||
appropriate sequence.
|
||||
|
||||
|
||||
OPTIONS COMPATIBILITY
|
||||
|
||||
Many of the short and long forms of pcre2grep's options are the same as
|
||||
in the GNU grep program. Any long option of the form --xxx-regexp (GNU
|
||||
in the GNU grep program. Any long option of the form --xxx-regexp (GNU
|
||||
terminology) is also available as --xxx-regex (PCRE2 terminology). How-
|
||||
ever, the --depth-limit, --file-list, --file-offsets, --include-dir,
|
||||
--line-offsets, --locale, --match-limit, -M, --multiline, -N, --new-
|
||||
line, --om-separator, --output, -u, and --utf-8 options are specific to
|
||||
pcre2grep, as is the use of the --only-matching option with a capturing
|
||||
parentheses number.
|
||||
ever, the --depth-limit, --file-list, --file-offsets, --heap-limit,
|
||||
--include-dir, --line-offsets, --locale, --match-limit, -M, --multi-
|
||||
line, -N, --newline, --om-separator, --output, -u, and --utf-8 options
|
||||
are specific to pcre2grep, as is the use of the --only-matching option
|
||||
with a capturing parentheses number.
|
||||
|
||||
Although most of the common options work the same way, a few are dif-
|
||||
ferent in pcre2grep. For example, the --include option's argument is a
|
||||
glob for GNU grep, but a regular expression for pcre2grep. If both the
|
||||
-c and -l options are given, GNU grep lists only file names, without
|
||||
Although most of the common options work the same way, a few are dif-
|
||||
ferent in pcre2grep. For example, the --include option's argument is a
|
||||
glob for GNU grep, but a regular expression for pcre2grep. If both the
|
||||
-c and -l options are given, GNU grep lists only file names, without
|
||||
counts, but pcre2grep gives the counts as well.
|
||||
|
||||
|
||||
OPTIONS WITH DATA
|
||||
|
||||
There are four different ways in which an option with data can be spec-
|
||||
ified. If a short form option is used, the data may follow immedi-
|
||||
ified. If a short form option is used, the data may follow immedi-
|
||||
ately, or (with one exception) in the next command line item. For exam-
|
||||
ple:
|
||||
|
||||
-f/some/file
|
||||
-f /some/file
|
||||
|
||||
The exception is the -o option, which may appear with or without data.
|
||||
Because of this, if data is present, it must follow immediately in the
|
||||
The exception is the -o option, which may appear with or without data.
|
||||
Because of this, if data is present, it must follow immediately in the
|
||||
same item, for example -o3.
|
||||
|
||||
If a long form option is used, the data may appear in the same command
|
||||
line item, separated by an equals character, or (with two exceptions)
|
||||
If a long form option is used, the data may appear in the same command
|
||||
line item, separated by an equals character, or (with two exceptions)
|
||||
it may appear in the next command line item. For example:
|
||||
|
||||
--file=/some/file
|
||||
--file /some/file
|
||||
|
||||
Note, however, that if you want to supply a file name beginning with ~
|
||||
as data in a shell command, and have the shell expand ~ to a home
|
||||
Note, however, that if you want to supply a file name beginning with ~
|
||||
as data in a shell command, and have the shell expand ~ to a home
|
||||
directory, you must separate the file name from the option, because the
|
||||
shell does not treat ~ specially unless it is at the start of an item.
|
||||
|
||||
The exceptions to the above are the --colour (or --color) and --only-
|
||||
matching options, for which the data is optional. If one of these
|
||||
options does have data, it must be given in the first form, using an
|
||||
The exceptions to the above are the --colour (or --color) and --only-
|
||||
matching options, for which the data is optional. If one of these
|
||||
options does have data, it must be given in the first form, using an
|
||||
equals character. Otherwise pcre2grep will assume that it has no data.
|
||||
|
||||
|
||||
USING PCRE2'S CALLOUT FACILITY
|
||||
|
||||
pcre2grep has, by default, support for calling external programs or
|
||||
scripts or echoing specific strings during matching by making use of
|
||||
PCRE2's callout facility. However, this support can be disabled when
|
||||
pcre2grep is built. You can find out whether your binary has support
|
||||
for callouts by running it with the --help option. If the support is
|
||||
pcre2grep has, by default, support for calling external programs or
|
||||
scripts or echoing specific strings during matching by making use of
|
||||
PCRE2's callout facility. However, this support can be disabled when
|
||||
pcre2grep is built. You can find out whether your binary has support
|
||||
for callouts by running it with the --help option. If the support is
|
||||
not enabled, all callouts in patterns are ignored by pcre2grep.
|
||||
|
||||
A callout in a PCRE2 pattern is of the form (?C<arg>) where the argu-
|
||||
ment is either a number or a quoted string (see the pcre2callout docu-
|
||||
mentation for details). Numbered callouts are ignored by pcre2grep;
|
||||
A callout in a PCRE2 pattern is of the form (?C<arg>) where the argu-
|
||||
ment is either a number or a quoted string (see the pcre2callout docu-
|
||||
mentation for details). Numbered callouts are ignored by pcre2grep;
|
||||
only callouts with string arguments are useful.
|
||||
|
||||
Calling external programs or scripts
|
||||
|
||||
If the callout string does not start with a pipe (vertical bar) charac-
|
||||
ter, it is parsed into a list of substrings separated by pipe charac-
|
||||
ters. The first substring must be an executable name, with the follow-
|
||||
ter, it is parsed into a list of substrings separated by pipe charac-
|
||||
ters. The first substring must be an executable name, with the follow-
|
||||
ing substrings specifying arguments:
|
||||
|
||||
executable_name|arg1|arg2|...
|
||||
|
||||
Any substring (including the executable name) may contain escape
|
||||
sequences started by a dollar character: $<digits> or ${<digits>} is
|
||||
replaced by the captured substring of the given decimal number, which
|
||||
must be greater than zero. If the number is greater than the number of
|
||||
capturing substrings, or if the capture is unset, the replacement is
|
||||
Any substring (including the executable name) may contain escape
|
||||
sequences started by a dollar character: $<digits> or ${<digits>} is
|
||||
replaced by the captured substring of the given decimal number, which
|
||||
must be greater than zero. If the number is greater than the number of
|
||||
capturing substrings, or if the capture is unset, the replacement is
|
||||
empty.
|
||||
|
||||
Any other character is substituted by itself. In particular, $$ is
|
||||
replaced by a single dollar and $| is replaced by a pipe character.
|
||||
Any other character is substituted by itself. In particular, $$ is
|
||||
replaced by a single dollar and $| is replaced by a pipe character.
|
||||
Here is an example:
|
||||
|
||||
echo -e "abcde\n12345" | pcre2grep \
|
||||
|
@ -840,49 +854,49 @@ USING PCRE2'S CALLOUT FACILITY
|
|||
|
||||
The parameters for the execv() system call that is used to run the pro-
|
||||
gram or script are zero-terminated strings. This means that binary zero
|
||||
characters in the callout argument will cause premature termination of
|
||||
their substrings, and therefore should not be present. Any syntax
|
||||
errors in the string (for example, a dollar not followed by another
|
||||
character) cause the callout to be ignored. If running the program
|
||||
characters in the callout argument will cause premature termination of
|
||||
their substrings, and therefore should not be present. Any syntax
|
||||
errors in the string (for example, a dollar not followed by another
|
||||
character) cause the callout to be ignored. If running the program
|
||||
fails for any reason (including the non-existence of the executable), a
|
||||
local matching failure occurs and the matcher backtracks in the normal
|
||||
local matching failure occurs and the matcher backtracks in the normal
|
||||
way.
|
||||
|
||||
Echoing a specific string
|
||||
|
||||
If the callout string starts with a pipe (vertical bar) character, the
|
||||
If the callout string starts with a pipe (vertical bar) character, the
|
||||
rest of the string is written to the output, having been passed through
|
||||
the same escape processing as text from the --output option. This pro-
|
||||
the same escape processing as text from the --output option. This pro-
|
||||
vides a simple echoing facility that avoids calling an external program
|
||||
or script. No terminator is added to the string, so if you want a new-
|
||||
line, you must include it explicitly. Matching continues normally
|
||||
after the string is output. If you want to see only the callout output
|
||||
but not any output from an actual match, you should end the relevant
|
||||
or script. No terminator is added to the string, so if you want a new-
|
||||
line, you must include it explicitly. Matching continues normally
|
||||
after the string is output. If you want to see only the callout output
|
||||
but not any output from an actual match, you should end the relevant
|
||||
pattern with (*FAIL).
|
||||
|
||||
|
||||
MATCHING ERRORS
|
||||
|
||||
It is possible to supply a regular expression that takes a very long
|
||||
time to fail to match certain lines. Such patterns normally involve
|
||||
nested indefinite repeats, for example: (a+)*\d when matched against a
|
||||
line of a's with no final digit. The PCRE2 matching function has a
|
||||
resource limit that causes it to abort in these circumstances. If this
|
||||
happens, pcre2grep outputs an error message and the line that caused
|
||||
the problem to the standard error stream. If there are more than 20
|
||||
It is possible to supply a regular expression that takes a very long
|
||||
time to fail to match certain lines. Such patterns normally involve
|
||||
nested indefinite repeats, for example: (a+)*\d when matched against a
|
||||
line of a's with no final digit. The PCRE2 matching function has a
|
||||
resource limit that causes it to abort in these circumstances. If this
|
||||
happens, pcre2grep outputs an error message and the line that caused
|
||||
the problem to the standard error stream. If there are more than 20
|
||||
such errors, pcre2grep gives up.
|
||||
|
||||
The --match-limit option of pcre2grep can be used to set the overall
|
||||
resource limit; there is a second option called --depth-limit that sets
|
||||
a limit on the amount of memory that is used (see the discussion of
|
||||
these options above).
|
||||
The --match-limit option of pcre2grep can be used to set the overall
|
||||
resource limit. There are also other limits that affect the amount of
|
||||
memory used during matching; see the discussion of --heap-limit and
|
||||
--depth-limit above.
|
||||
|
||||
|
||||
DIAGNOSTICS
|
||||
|
||||
Exit status is 0 if any matches were found, 1 if no matches were found,
|
||||
and 2 for syntax errors, overlong lines, non-existent or inaccessible
|
||||
files (even if matches were found in other files) or too many matching
|
||||
and 2 for syntax errors, overlong lines, non-existent or inaccessible
|
||||
files (even if matches were found in other files) or too many matching
|
||||
errors. Using the -s option to suppress error messages about inaccessi-
|
||||
ble files does not affect the return code.
|
||||
|
||||
|
@ -901,5 +915,5 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 06 April 2017
|
||||
Last updated: 11 April 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2PATTERN 3 "03 April 2017" "PCRE2 10.30"
|
||||
.TH PCRE2PATTERN 3 "11 April 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH "PCRE2 REGULAR EXPRESSION DETAILS"
|
||||
|
@ -138,14 +138,15 @@ the application to apply the JIT optimization by calling
|
|||
\fBpcre2_jit_compile()\fP is ignored.
|
||||
.
|
||||
.
|
||||
.SS "Setting match and backtracking depth limits"
|
||||
.SS "Setting match resource limits"
|
||||
.rs
|
||||
.sp
|
||||
The pcre2_match() function contains a counter that is incremented every time it
|
||||
goes round its main loop. The caller of \fBpcre2_match()\fP can set a limit on
|
||||
this counter, which therefore limits the amount of computing resource used for
|
||||
a match. The maximum depth of nested backtracking can also be limited, and this
|
||||
restricts the amount of heap memory that is used.
|
||||
a match. The maximum depth of nested backtracking can also be limited; this
|
||||
indirectly restricts the amount of heap memory that is used, but there is also
|
||||
an explicit memory limit that can be set.
|
||||
.P
|
||||
These facilities are provided to catch runaway matches that are provoked by
|
||||
patterns with huge matching trees (a typical example is a pattern with nested
|
||||
|
@ -153,6 +154,7 @@ unlimited repeats applied to a long string that does not match). When one of
|
|||
these limits is reached, \fBpcre2_match()\fP gives an error return. The limits
|
||||
can also be set by items at the start of the pattern of the form
|
||||
.sp
|
||||
(*LIMIT_HEAP=d)
|
||||
(*LIMIT_MATCH=d)
|
||||
(*LIMIT_DEPTH=d)
|
||||
.sp
|
||||
|
@ -165,11 +167,13 @@ setting of one of these limits, the lower value is used.
|
|||
Prior to release 10.30, LIMIT_DEPTH was called LIMIT_RECURSION. This name is
|
||||
still recognized for backwards compatibility.
|
||||
.P
|
||||
The match limit is used (but in a different way) when JIT is being used, but it
|
||||
is not relevant, and is ignored, when matching with \fBpcre2_dfa_match()\fP.
|
||||
However, the depth limit is relevant for DFA matching, which uses function
|
||||
recursion for recursions within the pattern. In this case, the depth limit
|
||||
controls the amount of system stack that is used.
|
||||
The heap limit applies only when the \fBpcre2_match()\fP interpreter is used
|
||||
for matching. It does not apply to JIT or DFA matching. The match limit is used
|
||||
(but in a different way) when JIT is being used, but it is not relevant, and is
|
||||
ignored, when matching with \fBpcre2_dfa_match()\fP. The depth limit is ignored
|
||||
by JIT but is relevant for DFA matching, which uses function recursion for
|
||||
recursions within the pattern. In this case, the depth limit controls the
|
||||
amount of system stack that is used.
|
||||
.
|
||||
.
|
||||
.\" HTML <a name="newlines"></a>
|
||||
|
@ -3465,6 +3469,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 03 April 2017
|
||||
Last updated: 11 April 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2PERFORM 3 "31 March 2017" "PCRE2 10.30"
|
||||
.TH PCRE2PERFORM 3 "08 April 2017" "PCRE2 10.30"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH "PCRE2 PERFORMANCE"
|
||||
|
@ -69,11 +69,12 @@ From release 10.30, the interpretive (non-JIT) version of \fBpcre2_match()\fP
|
|||
uses very little system stack at run time. In earlier releases recursive
|
||||
function calls could use a great deal of stack, and this could cause problems,
|
||||
but this usage has been eliminated. Backtracking positions are now explicitly
|
||||
remembered in memory frames controlled by the code. An initial 10K vector of
|
||||
frames is allocated on the system stack (enough for about 50 frames for small
|
||||
patterns), but if this is insufficient, heap memory is used. Rewriting patterns
|
||||
to be time-efficient, as described below, may also reduce the memory
|
||||
requirements.
|
||||
remembered in memory frames controlled by the code. An initial 20K vector of
|
||||
frames is allocated on the system stack (enough for about 100 frames for small
|
||||
patterns), but if this is insufficient, heap memory is used. The amount of heap
|
||||
memory can be limited; if the limit is set to zero, only the initial stack
|
||||
vector is used. Rewriting patterns to be time-efficient, as described below,
|
||||
may also reduce the memory requirements.
|
||||
.P
|
||||
In contrast to \fBpcre2_match()\fP, \fBpcre2_dfa_match()\fP does use recursive
|
||||
function calls, but only for processing atomic groups, lookaround assertions,
|
||||
|
@ -231,6 +232,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 31 March 2017
|
||||
Last updated: 08 April 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2TEST 1 "08 April 2017" "PCRE 10.30"
|
||||
.TH PCRE2TEST 1 "11 April 2017" "PCRE 10.30"
|
||||
.SH NAME
|
||||
pcre2test - a program for testing Perl-compatible regular expressions.
|
||||
.SH SYNOPSIS
|
||||
|
@ -1063,6 +1063,7 @@ pattern.
|
|||
get=<number or name> extract captured substring
|
||||
getall extract all captured substrings
|
||||
/g global global matching
|
||||
heap_limit=<n> set a limit on heap memory
|
||||
jitstack=<n> set size of JIT stack
|
||||
mark show mark values
|
||||
match_limit=<n> set a match limit
|
||||
|
@ -1293,11 +1294,11 @@ stack that is larger than the default 32K is necessary only for very
|
|||
complicated patterns.
|
||||
.
|
||||
.
|
||||
.SS "Setting match and depth limits"
|
||||
.SS "Setting heap, match, and depth limits"
|
||||
.rs
|
||||
.sp
|
||||
The \fBmatch_limit\fP and \fBdepth_limit\fP modifiers set the appropriate
|
||||
limits in the match context. These values are ignored when the
|
||||
The \fBheap_limit\fP, \fBmatch_limit\fP, and \fBdepth_limit\fP modifiers set
|
||||
the appropriate limits in the match context. These values are ignored when the
|
||||
\fBfind_limits\fP modifier is specified.
|
||||
.
|
||||
.
|
||||
|
@ -1306,8 +1307,8 @@ limits in the match context. These values are ignored when the
|
|||
.sp
|
||||
If the \fBfind_limits\fP modifier is present on a subject line, \fBpcre2test\fP
|
||||
calls the relevant matching function several times, setting different values in
|
||||
the match context via \fBpcre2_set_match_limit()\fP or
|
||||
\fBpcre2_set_depth_limit()\fP until it finds the minimum values for each
|
||||
the match context via \fBpcre2_set_heap_limit(), \fBpcre2_set_match_limit()\fP,
|
||||
or \fBpcre2_set_depth_limit()\fP until it finds the minimum values for each
|
||||
parameter that allows the match to complete without error.
|
||||
.P
|
||||
If JIT is being used, only the match limit is relevant. If DFA matching is
|
||||
|
@ -1320,9 +1321,9 @@ numbers of matching possibilities, it can become large very quickly with
|
|||
increasing length of subject string.
|
||||
.P
|
||||
For non-DFA matching, the minimum \fIdepth_limit\fP number is a measure of how
|
||||
much memory for recording backtracking points is needed to complete the match
|
||||
attempt. In the case of DFA matching, \fIdepth_limit\fP controls the depth of
|
||||
recursive calls of the internal function that is used for handling pattern
|
||||
much nested backtracking happens (that is, how deeply the pattern's tree is
|
||||
searched). In the case of DFA matching, \fIdepth_limit\fP controls the depth of
|
||||
recursive calls of the internal function that is used for handling pattern
|
||||
recursion, lookaround assertions, and atomic groups.
|
||||
.
|
||||
.
|
||||
|
@ -1782,6 +1783,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 08 April 2017
|
||||
Last updated: 11 April 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
.fi
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -132,6 +132,10 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
/* Define to 1 if you have the <zlib.h> header file. */
|
||||
#undef HAVE_ZLIB_H
|
||||
|
||||
/* This limits the amount of memory that pcre2_match() may use while matching
|
||||
a pattern. The value is in kilobytes. */
|
||||
#undef HEAP_LIMIT
|
||||
|
||||
/* The value of LINK_SIZE determines the number of bytes used to store links
|
||||
as offsets within the compiled regex. The default is 2, which allows for
|
||||
compiled patterns up to 64K long. This covers the vast majority of cases.
|
||||
|
@ -143,7 +147,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#undef LT_OBJDIR
|
||||
|
||||
/* The value of MATCH_LIMIT determines the default number of times the
|
||||
internal match() function can record a backtrack position during a single
|
||||
pcre2_match() function can record a backtrack position during a single
|
||||
matching attempt. There is a runtime interface for setting a different
|
||||
limit. The limit exists in order to catch runaway regular expressions that
|
||||
take for ever to determine that they do not match. The default is set very
|
||||
|
|
|
@ -268,6 +268,7 @@ numbers must not be changed. */
|
|||
#define PCRE2_ERROR_BADSUBSPATTERN (-60)
|
||||
#define PCRE2_ERROR_TOOMANYREPLACE (-61)
|
||||
#define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
|
||||
#define PCRE2_ERROR_HEAPLIMIT (-63)
|
||||
|
||||
/* Request types for pcre2_pattern_info() */
|
||||
|
||||
|
@ -297,6 +298,7 @@ numbers must not be changed. */
|
|||
#define PCRE2_INFO_SIZE 22
|
||||
#define PCRE2_INFO_HASBACKSLASHC 23
|
||||
#define PCRE2_INFO_FRAMESIZE 24
|
||||
#define PCRE2_INFO_HEAPLIMIT 25
|
||||
|
||||
/* Request types for pcre2_config(). */
|
||||
|
||||
|
@ -313,6 +315,7 @@ numbers must not be changed. */
|
|||
#define PCRE2_CONFIG_UNICODE 9
|
||||
#define PCRE2_CONFIG_UNICODE_VERSION 10
|
||||
#define PCRE2_CONFIG_VERSION 11
|
||||
#define PCRE2_CONFIG_HEAPLIMIT 12
|
||||
|
||||
/* Types for code units in patterns and subject strings. */
|
||||
|
||||
|
@ -452,6 +455,8 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
|||
int (*)(pcre2_callout_block *, void *), void *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_heap_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_match_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
|
@ -676,6 +681,7 @@ pcre2_compile are called by application code. */
|
|||
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
|
||||
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
|
||||
#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
|
||||
#define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_)
|
||||
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
|
||||
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
|
||||
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
|
||||
|
|
|
@ -268,6 +268,7 @@ numbers must not be changed. */
|
|||
#define PCRE2_ERROR_BADSUBSPATTERN (-60)
|
||||
#define PCRE2_ERROR_TOOMANYREPLACE (-61)
|
||||
#define PCRE2_ERROR_BADSERIALIZEDDATA (-62)
|
||||
#define PCRE2_ERROR_HEAPLIMIT (-63)
|
||||
|
||||
/* Request types for pcre2_pattern_info() */
|
||||
|
||||
|
@ -297,6 +298,7 @@ numbers must not be changed. */
|
|||
#define PCRE2_INFO_SIZE 22
|
||||
#define PCRE2_INFO_HASBACKSLASHC 23
|
||||
#define PCRE2_INFO_FRAMESIZE 24
|
||||
#define PCRE2_INFO_HEAPLIMIT 25
|
||||
|
||||
/* Request types for pcre2_config(). */
|
||||
|
||||
|
@ -313,6 +315,7 @@ numbers must not be changed. */
|
|||
#define PCRE2_CONFIG_UNICODE 9
|
||||
#define PCRE2_CONFIG_UNICODE_VERSION 10
|
||||
#define PCRE2_CONFIG_VERSION 11
|
||||
#define PCRE2_CONFIG_HEAPLIMIT 12
|
||||
|
||||
/* Types for code units in patterns and subject strings. */
|
||||
|
||||
|
@ -452,6 +455,8 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
|||
int (*)(pcre2_callout_block *, void *), void *); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_heap_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
pcre2_set_match_limit(pcre2_match_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||
|
@ -676,6 +681,7 @@ pcre2_compile are called by application code. */
|
|||
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
|
||||
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
|
||||
#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
|
||||
#define pcre2_set_heap_limit PCRE2_SUFFIX(pcre2_set_heap_limit_)
|
||||
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
|
||||
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
|
||||
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
|
||||
|
|
|
@ -727,6 +727,7 @@ enum { PSO_OPT, /* Value is an option bit */
|
|||
PSO_FLG, /* Value is a flag bit */
|
||||
PSO_NL, /* Value is a newline type */
|
||||
PSO_BSR, /* Value is a \R type */
|
||||
PSO_LIMH, /* Read integer value for heap limit */
|
||||
PSO_LIMM, /* Read integer value for match limit */
|
||||
PSO_LIMD }; /* Read integer value for depth limit */
|
||||
|
||||
|
@ -749,6 +750,7 @@ static pso pso_list[] = {
|
|||
{ (uint8_t *)STRING_NO_DOTSTAR_ANCHOR_RIGHTPAR, 18, PSO_OPT, PCRE2_NO_DOTSTAR_ANCHOR },
|
||||
{ (uint8_t *)STRING_NO_JIT_RIGHTPAR, 7, PSO_FLG, PCRE2_NOJIT },
|
||||
{ (uint8_t *)STRING_NO_START_OPT_RIGHTPAR, 13, PSO_OPT, PCRE2_NO_START_OPTIMIZE },
|
||||
{ (uint8_t *)STRING_LIMIT_HEAP_EQ, 11, PSO_LIMH, 0 },
|
||||
{ (uint8_t *)STRING_LIMIT_MATCH_EQ, 12, PSO_LIMM, 0 },
|
||||
{ (uint8_t *)STRING_LIMIT_DEPTH_EQ, 12, PSO_LIMD, 0 },
|
||||
{ (uint8_t *)STRING_LIMIT_RECURSION_EQ, 16, PSO_LIMD, 0 },
|
||||
|
@ -8853,6 +8855,7 @@ uint32_t firstcu, reqcu; /* Value of first/req code unit */
|
|||
uint32_t setflags = 0; /* NL and BSR set flags */
|
||||
|
||||
uint32_t skipatstart; /* When checking (*UTF) etc */
|
||||
uint32_t limit_heap = UINT32_MAX;
|
||||
uint32_t limit_match = UINT32_MAX; /* Unset match limits */
|
||||
uint32_t limit_depth = UINT32_MAX;
|
||||
|
||||
|
@ -9026,6 +9029,7 @@ while (patlen - skipatstart >= 2 &&
|
|||
|
||||
case PSO_LIMM:
|
||||
case PSO_LIMD:
|
||||
case PSO_LIMH:
|
||||
c = 0;
|
||||
pp = skipatstart;
|
||||
if (!IS_DIGIT(ptr[pp]))
|
||||
|
@ -9045,7 +9049,8 @@ while (patlen - skipatstart >= 2 &&
|
|||
ptr += pp;
|
||||
goto HAD_EARLY_ERROR;
|
||||
}
|
||||
if (p->type == PSO_LIMM) limit_match = c;
|
||||
if (p->type == PSO_LIMH) limit_heap = c;
|
||||
else if (p->type == PSO_LIMM) limit_match = c;
|
||||
else limit_depth = c;
|
||||
skipatstart += pp - skipatstart;
|
||||
break;
|
||||
|
@ -9288,6 +9293,7 @@ re->magic_number = MAGIC_NUMBER;
|
|||
re->compile_options = options;
|
||||
re->overall_options = cb.external_options;
|
||||
re->flags = PCRE2_CODE_UNIT_WIDTH/8 | cb.external_flags | setflags;
|
||||
re->limit_heap = limit_heap;
|
||||
re->limit_match = limit_match;
|
||||
re->limit_depth = limit_depth;
|
||||
re->first_codeunit = 0;
|
||||
|
|
|
@ -84,6 +84,7 @@ if (where == NULL) /* Requests a length */
|
|||
return PCRE2_ERROR_BADOPTION;
|
||||
|
||||
case PCRE2_CONFIG_BSR:
|
||||
case PCRE2_CONFIG_HEAPLIMIT:
|
||||
case PCRE2_CONFIG_JIT:
|
||||
case PCRE2_CONFIG_LINKSIZE:
|
||||
case PCRE2_CONFIG_MATCHLIMIT:
|
||||
|
@ -116,6 +117,10 @@ switch (what)
|
|||
#endif
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_HEAPLIMIT:
|
||||
*((uint32_t *)where) = HEAP_LIMIT;
|
||||
break;
|
||||
|
||||
case PCRE2_CONFIG_JIT:
|
||||
#ifdef SUPPORT_JIT
|
||||
*((uint32_t *)where) = 1;
|
||||
|
|
|
@ -168,6 +168,7 @@ const pcre2_match_context PRIV(default_match_context) = {
|
|||
NULL,
|
||||
NULL,
|
||||
PCRE2_UNSET, /* Offset limit */
|
||||
HEAP_LIMIT,
|
||||
MATCH_LIMIT,
|
||||
MATCH_LIMIT_DEPTH };
|
||||
|
||||
|
@ -346,6 +347,13 @@ mcontext->callout_data = callout_data;
|
|||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_heap_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||
{
|
||||
mcontext->heap_limit = limit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_match_limit(pcre2_match_context *mcontext, uint32_t limit)
|
||||
{
|
||||
|
|
|
@ -256,6 +256,7 @@ static const unsigned char match_error_texts[] =
|
|||
"match with end before start is not supported\0"
|
||||
"too many replacements (more than INT_MAX)\0"
|
||||
"bad serialized data\0"
|
||||
"heap limit exceeded\0"
|
||||
;
|
||||
|
||||
|
||||
|
|
|
@ -240,6 +240,16 @@ not rely on this. */
|
|||
|
||||
#define COMPILE_ERROR_BASE 100
|
||||
|
||||
/* The initial frames vector for remembering backtracking points in
|
||||
pcre2_match() is allocated on the system stack, of this size (bytes). The size
|
||||
must be a multiple of sizeof(PCRE2_SPTR) in all environments, so making it a
|
||||
multiple of 8 is best. Typical frame sizes are a few hundred bytes (it depends
|
||||
on the number of capturing parentheses) so 20K handles quite a few frames. A
|
||||
larger vector on the heap is obtained for patterns that need more frames. The
|
||||
maximum size of this can be limited. */
|
||||
|
||||
#define START_FRAMES_SIZE 20480
|
||||
|
||||
/* Define the default BSR convention. */
|
||||
|
||||
#ifdef BSR_ANYCRLF
|
||||
|
@ -922,6 +932,7 @@ a positive value. */
|
|||
#define STRING_NO_START_OPT_RIGHTPAR "NO_START_OPT)"
|
||||
#define STRING_NOTEMPTY_RIGHTPAR "NOTEMPTY)"
|
||||
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR "NOTEMPTY_ATSTART)"
|
||||
#define STRING_LIMIT_HEAP_EQ "LIMIT_HEAP="
|
||||
#define STRING_LIMIT_MATCH_EQ "LIMIT_MATCH="
|
||||
#define STRING_LIMIT_DEPTH_EQ "LIMIT_DEPTH="
|
||||
#define STRING_LIMIT_RECURSION_EQ "LIMIT_RECURSION="
|
||||
|
@ -1196,6 +1207,7 @@ only. */
|
|||
#define STRING_NO_START_OPT_RIGHTPAR STR_N STR_O STR_UNDERSCORE STR_S STR_T STR_A STR_R STR_T STR_UNDERSCORE STR_O STR_P STR_T STR_RIGHT_PARENTHESIS
|
||||
#define STRING_NOTEMPTY_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_RIGHT_PARENTHESIS
|
||||
#define STRING_NOTEMPTY_ATSTART_RIGHTPAR STR_N STR_O STR_T STR_E STR_M STR_P STR_T STR_Y STR_UNDERSCORE STR_A STR_T STR_S STR_T STR_A STR_R STR_T STR_RIGHT_PARENTHESIS
|
||||
#define STRING_LIMIT_HEAP_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_H STR_E STR_A STR_P STR_EQUALS_SIGN
|
||||
#define STRING_LIMIT_MATCH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_M STR_A STR_T STR_C STR_H STR_EQUALS_SIGN
|
||||
#define STRING_LIMIT_DEPTH_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_D STR_E STR_P STR_T STR_H STR_EQUALS_SIGN
|
||||
#define STRING_LIMIT_RECURSION_EQ STR_L STR_I STR_M STR_I STR_T STR_UNDERSCORE STR_R STR_E STR_C STR_U STR_R STR_S STR_I STR_O STR_N STR_EQUALS_SIGN
|
||||
|
|
|
@ -585,6 +585,7 @@ typedef struct pcre2_real_match_context {
|
|||
int (*callout)(pcre2_callout_block *, void *);
|
||||
void *callout_data;
|
||||
PCRE2_SIZE offset_limit;
|
||||
uint32_t heap_limit;
|
||||
uint32_t match_limit;
|
||||
uint32_t depth_limit;
|
||||
} pcre2_real_match_context;
|
||||
|
@ -614,6 +615,7 @@ typedef struct pcre2_real_code {
|
|||
uint32_t compile_options; /* Options passed to pcre2_compile() */
|
||||
uint32_t overall_options; /* Options after processing the pattern */
|
||||
uint32_t flags; /* Various state flags */
|
||||
uint32_t limit_heap; /* Limit set in the pattern */
|
||||
uint32_t limit_match; /* Limit set in the pattern */
|
||||
uint32_t limit_depth; /* Limit set in the pattern */
|
||||
uint32_t first_codeunit; /* Starting code unit */
|
||||
|
@ -808,9 +810,10 @@ typedef struct match_block {
|
|||
heapframe *match_frames; /* Points to vector of frames */
|
||||
heapframe *match_frames_top; /* Points after the end of the vector */
|
||||
heapframe *stack_frames; /* The original vector on the stack */
|
||||
uint32_t match_call_count; /* Number of times a new frame is created */
|
||||
PCRE2_SIZE heap_limit; /* As it says */
|
||||
uint32_t match_limit; /* As it says */
|
||||
uint32_t match_limit_depth; /* As it says */
|
||||
uint32_t match_call_count; /* Number of times a new frame is created */
|
||||
BOOL hitend; /* Hit the end of the subject at some point */
|
||||
BOOL hasthen; /* Pattern contains (*THEN) */
|
||||
const uint8_t *lcc; /* Points to lower casing table */
|
||||
|
|
|
@ -64,15 +64,6 @@ information, and fields within it. */
|
|||
|
||||
#define RECURSE_UNSET 0xffffffffu /* Bigger than max group number */
|
||||
|
||||
/* The initial frames vector for remembering backtracking points is allocated
|
||||
on the system stack, of this size (bytes). The size must be a multiple of
|
||||
sizeof(PCRE2_SPTR) in all environments, so making it a multiple of 8 is best.
|
||||
Typical frame sizes are a few hundred bytes (it depends on the number of
|
||||
capturing parentheses) so 10K handles quite a few frames. A larger vector on
|
||||
the heap is obtained for patterns that need more frames. */
|
||||
|
||||
#define START_FRAMES_SIZE 10240
|
||||
|
||||
/* Masks for identifying the public options that are permitted at match time. */
|
||||
|
||||
#define PUBLIC_MATCH_OPTIONS \
|
||||
|
@ -618,14 +609,22 @@ backtracking point. */
|
|||
MATCH_RECURSE:
|
||||
|
||||
/* Set up a new backtracking frame. If the vector is full, get a new one
|
||||
on the heap, doubling the size. */
|
||||
on the heap, doubling the size, but constrained by the heap limit. */
|
||||
|
||||
N = (heapframe *)((char *)F + frame_size);
|
||||
if (N >= mb->match_frames_top)
|
||||
{
|
||||
PCRE2_SIZE newsize = mb->frame_vector_size * 2;
|
||||
heapframe *new = mb->memctl.malloc(newsize, mb->memctl.memory_data);
|
||||
heapframe *new;
|
||||
|
||||
if ((newsize / 1024) > mb->heap_limit)
|
||||
{
|
||||
PCRE2_SIZE maxsize = ((mb->heap_limit * 1024)/frame_size) * frame_size;
|
||||
if (mb->frame_vector_size == maxsize) return PCRE2_ERROR_HEAPLIMIT;
|
||||
newsize = maxsize;
|
||||
}
|
||||
|
||||
new = mb->memctl.malloc(newsize, mb->memctl.memory_data);
|
||||
if (new == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
memcpy(new, mb->match_frames, mb->frame_vector_size);
|
||||
|
||||
|
@ -802,13 +801,13 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
|||
Fstart_match == mb->start_subject + mb->start_offset)))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
|
||||
/* Also fail if PCRE2_ENDANCHORED is set and the end of the match is not
|
||||
/* Also fail if PCRE2_ENDANCHORED is set and the end of the match is not
|
||||
the end of the subject. */
|
||||
|
||||
|
||||
if (Feptr < mb->end_subject &&
|
||||
((mb->moptions | mb->poptions) & PCRE2_ENDANCHORED) != 0)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
|
||||
/* We have a successful match of the whole pattern. Record the result and
|
||||
then do a direct return from the function. If there is space in the offset
|
||||
vector, set any pairs that follow the highest-numbered captured string but
|
||||
|
@ -6093,13 +6092,13 @@ set up later. */
|
|||
utf = (re->overall_options & PCRE2_UTF) != 0;
|
||||
mb->partial = ((options & PCRE2_PARTIAL_HARD) != 0)? 2 :
|
||||
((options & PCRE2_PARTIAL_SOFT) != 0)? 1 : 0;
|
||||
|
||||
/* Partial matching and PCRE2_ENDANCHORED are currently not allowed at the same
|
||||
time. */
|
||||
|
||||
if (mb->partial != 0 &&
|
||||
|
||||
/* Partial matching and PCRE2_ENDANCHORED are currently not allowed at the same
|
||||
time. */
|
||||
|
||||
if (mb->partial != 0 &&
|
||||
((re->overall_options | options) & PCRE2_ENDANCHORED) != 0)
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
|
||||
/* Check a UTF string for validity if required. For 8-bit and 16-bit strings,
|
||||
we must also check that a starting offset does not point into the middle of a
|
||||
|
@ -6266,9 +6265,22 @@ correct when calling match() more than once for non-anchored patterns. */
|
|||
|
||||
frame_size = sizeof(heapframe) + ((re->top_bracket - 1) * 2 * sizeof(PCRE2_SIZE));
|
||||
|
||||
/* Limits set in the pattern override the match context only if they are
|
||||
smaller. */
|
||||
|
||||
mb->heap_limit = (mcontext->heap_limit < re->limit_heap)?
|
||||
mcontext->heap_limit : re->limit_heap;
|
||||
|
||||
mb->match_limit = (mcontext->match_limit < re->limit_match)?
|
||||
mcontext->match_limit : re->limit_match;
|
||||
|
||||
mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)?
|
||||
mcontext->depth_limit : re->limit_depth;
|
||||
|
||||
/* If a pattern has very many capturing parentheses, the frame size may be very
|
||||
large. Ensure that there are at least 10 available frames by getting an initial
|
||||
vector on the heap if necessary. */
|
||||
vector on the heap if necessary, except when the heap limit prevents this. Get
|
||||
fewer if possible. (The heap limit is in kilobytes.) */
|
||||
|
||||
if (frame_size <= START_FRAMES_SIZE/10)
|
||||
{
|
||||
|
@ -6278,6 +6290,11 @@ if (frame_size <= START_FRAMES_SIZE/10)
|
|||
else
|
||||
{
|
||||
mb->frame_vector_size = frame_size * 10;
|
||||
if ((mb->frame_vector_size / 1024) > mb->heap_limit)
|
||||
{
|
||||
if (frame_size > mb->heap_limit * 1024) return PCRE2_ERROR_HEAPLIMIT;
|
||||
mb->frame_vector_size = ((mb->heap_limit * 1024)/frame_size) * frame_size;
|
||||
}
|
||||
mb->match_frames = mb->memctl.malloc(mb->frame_vector_size,
|
||||
mb->memctl.memory_data);
|
||||
if (mb->match_frames == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
|
@ -6292,14 +6309,6 @@ to avoid uninitialized memory read errors when it is copied to a new frame. */
|
|||
memset((char *)(mb->match_frames) + offsetof(heapframe, ovector), 0xff,
|
||||
re->top_bracket * 2 * sizeof(PCRE2_SIZE));
|
||||
|
||||
/* Limits set in the pattern override the match context only if they are
|
||||
smaller. */
|
||||
|
||||
mb->match_limit = (mcontext->match_limit < re->limit_match)?
|
||||
mcontext->match_limit : re->limit_match;
|
||||
mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)?
|
||||
mcontext->depth_limit : re->limit_depth;
|
||||
|
||||
/* Pointers to the individual character tables */
|
||||
|
||||
mb->lcc = re->tables + lcc_offset;
|
||||
|
|
|
@ -80,6 +80,7 @@ if (where == NULL) /* Requests field length */
|
|||
case PCRE2_INFO_FIRSTCODEUNIT:
|
||||
case PCRE2_INFO_HASBACKSLASHC:
|
||||
case PCRE2_INFO_HASCRORLF:
|
||||
case PCRE2_INFO_HEAPLIMIT:
|
||||
case PCRE2_INFO_JCHANGED:
|
||||
case PCRE2_INFO_LASTCODETYPE:
|
||||
case PCRE2_INFO_LASTCODEUNIT:
|
||||
|
@ -171,6 +172,11 @@ switch(what)
|
|||
*((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_HEAPLIMIT:
|
||||
*((uint32_t *)where) = re->limit_heap;
|
||||
if (re->limit_heap == UINT32_MAX) return PCRE2_ERROR_UNSET;
|
||||
break;
|
||||
|
||||
case PCRE2_INFO_JCHANGED:
|
||||
*((uint32_t *)where) = (re->flags & PCRE2_JCHANGED) != 0;
|
||||
break;
|
||||
|
|
|
@ -212,6 +212,7 @@ static const uint8_t *character_tables = NULL;
|
|||
|
||||
static uint32_t pcre2_options = 0;
|
||||
static uint32_t process_options = 0;
|
||||
static PCRE2_SIZE heap_limit = PCRE2_UNSET;
|
||||
static uint32_t match_limit = 0;
|
||||
static uint32_t depth_limit = 0;
|
||||
|
||||
|
@ -330,7 +331,7 @@ static const char *incexname[4] = { "--include", "--exclude",
|
|||
|
||||
/* Structure for options and list of them */
|
||||
|
||||
enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER,
|
||||
enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER, OP_SIZE,
|
||||
OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
|
||||
|
||||
typedef struct option_item {
|
||||
|
@ -356,16 +357,17 @@ used to identify them. */
|
|||
#define N_LOFFSETS (-10)
|
||||
#define N_FOFFSETS (-11)
|
||||
#define N_LBUFFER (-12)
|
||||
#define N_M_LIMIT (-13)
|
||||
#define N_M_LIMIT_DEP (-14)
|
||||
#define N_BUFSIZE (-15)
|
||||
#define N_NOJIT (-16)
|
||||
#define N_FILE_LIST (-17)
|
||||
#define N_BINARY_FILES (-18)
|
||||
#define N_EXCLUDE_FROM (-19)
|
||||
#define N_INCLUDE_FROM (-20)
|
||||
#define N_OM_SEPARATOR (-21)
|
||||
#define N_MAX_BUFSIZE (-22)
|
||||
#define N_H_LIMIT (-13)
|
||||
#define N_M_LIMIT (-14)
|
||||
#define N_M_LIMIT_DEP (-15)
|
||||
#define N_BUFSIZE (-16)
|
||||
#define N_NOJIT (-17)
|
||||
#define N_FILE_LIST (-18)
|
||||
#define N_BINARY_FILES (-19)
|
||||
#define N_EXCLUDE_FROM (-20)
|
||||
#define N_INCLUDE_FROM (-21)
|
||||
#define N_OM_SEPARATOR (-22)
|
||||
#define N_MAX_BUFSIZE (-23)
|
||||
|
||||
static option_item optionlist[] = {
|
||||
{ OP_NODATA, N_NULL, NULL, "", "terminate options" },
|
||||
|
@ -397,6 +399,7 @@ static option_item optionlist[] = {
|
|||
{ OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
|
||||
{ OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
|
||||
{ OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
|
||||
{ OP_SIZE, N_H_LIMIT, &heap_limit, "heap-limit=number", "set PCRE2 heap limit option (kilobytes)" },
|
||||
{ OP_U32NUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE2 match limit option" },
|
||||
{ OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" },
|
||||
{ OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsolete synonym for depth-limit" },
|
||||
|
@ -525,9 +528,9 @@ pcre2grep_exit(int rc)
|
|||
{
|
||||
if (resource_error)
|
||||
{
|
||||
fprintf(stderr, "pcre2grep: Error %d, %d or %d means that a resource limit "
|
||||
"was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
|
||||
PCRE2_ERROR_DEPTHLIMIT);
|
||||
fprintf(stderr, "pcre2grep: Error %d, %d, %d or %d means that a resource "
|
||||
"limit was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
|
||||
PCRE2_ERROR_DEPTHLIMIT, PCRE2_ERROR_HEAPLIMIT);
|
||||
fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n");
|
||||
}
|
||||
exit(rc);
|
||||
|
@ -1647,7 +1650,7 @@ for (i = 1; p != NULL; p = p->next, i++)
|
|||
FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
|
||||
fprintf(stderr, "\n\n");
|
||||
if (*mrc == PCRE2_ERROR_MATCHLIMIT || *mrc == PCRE2_ERROR_DEPTHLIMIT ||
|
||||
*mrc == PCRE2_ERROR_JIT_STACKLIMIT)
|
||||
*mrc == PCRE2_ERROR_HEAPLIMIT || *mrc == PCRE2_ERROR_JIT_STACKLIMIT)
|
||||
resource_error = TRUE;
|
||||
if (error_count++ > 20)
|
||||
{
|
||||
|
@ -3796,7 +3799,7 @@ for (i = 1; i < argc; i++)
|
|||
/* Otherwise, deal with a single string or numeric data value. */
|
||||
|
||||
else if (op->type != OP_NUMBER && op->type != OP_U32NUMBER &&
|
||||
op->type != OP_OP_NUMBER)
|
||||
op->type != OP_OP_NUMBER && op->type != OP_SIZE)
|
||||
{
|
||||
*((char **)op->dataptr) = option_data;
|
||||
}
|
||||
|
@ -3804,6 +3807,7 @@ for (i = 1; i < argc; i++)
|
|||
{
|
||||
unsigned long int n = decode_number(option_data, op, longop);
|
||||
if (op->type == OP_U32NUMBER) *((uint32_t *)op->dataptr) = n;
|
||||
else if (op->type == OP_SIZE) *((PCRE2_SIZE *)op->dataptr) = n;
|
||||
else *((int *)op->dataptr) = n;
|
||||
}
|
||||
}
|
||||
|
@ -3839,6 +3843,7 @@ if (output_text != NULL &&
|
|||
|
||||
/* Put limits into the match data block. */
|
||||
|
||||
if (heap_limit != PCRE2_UNSET) pcre2_set_heap_limit(match_context, heap_limit);
|
||||
if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
|
||||
if (depth_limit > 0) pcre2_set_depth_limit(match_context, depth_limit);
|
||||
|
||||
|
|
|
@ -588,6 +588,7 @@ static modstruct modlist[] = {
|
|||
{ "get", MOD_DAT, MOD_NN, DO(get_numbers), DO(get_names) },
|
||||
{ "getall", MOD_DAT, MOD_CTL, CTL_GETALL, DO(control) },
|
||||
{ "global", MOD_PNDP, MOD_CTL, CTL_GLOBAL, PO(control) },
|
||||
{ "heap_limit", MOD_CTM, MOD_INT, 0, MO(heap_limit) },
|
||||
{ "hex", MOD_PAT, MOD_CTL, CTL_HEXPAT, PO(control) },
|
||||
{ "info", MOD_PAT, MOD_CTL, CTL_INFO, PO(control) },
|
||||
{ "jit", MOD_PAT, MOD_IND, 7, PO(jit) },
|
||||
|
@ -1207,6 +1208,14 @@ are supported. */
|
|||
else \
|
||||
pcre2_set_depth_limit_32(G(a,32),b)
|
||||
|
||||
#define PCRE2_SET_HEAP_LIMIT(a,b) \
|
||||
if (test_mode == PCRE8_MODE) \
|
||||
pcre2_set_heap_limit_8(G(a,8),b); \
|
||||
else if (test_mode == PCRE16_MODE) \
|
||||
pcre2_set_heap_limit_16(G(a,16),b); \
|
||||
else \
|
||||
pcre2_set_heap_limit_32(G(a,32),b)
|
||||
|
||||
#define PCRE2_SET_MATCH_LIMIT(a,b) \
|
||||
if (test_mode == PCRE8_MODE) \
|
||||
pcre2_set_match_limit_8(G(a,8),b); \
|
||||
|
@ -1643,6 +1652,12 @@ the three different cases. */
|
|||
else \
|
||||
G(pcre2_set_depth_limit_,BITTWO)(G(a,BITTWO),b)
|
||||
|
||||
#define PCRE2_SET_HEAP_LIMIT(a,b) \
|
||||
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
||||
G(pcre2_set_heap_limit_,BITONE)(G(a,BITONE),b); \
|
||||
else \
|
||||
G(pcre2_set_heap_limit_,BITTWO)(G(a,BITTWO),b)
|
||||
|
||||
#define PCRE2_SET_MATCH_LIMIT(a,b) \
|
||||
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
||||
G(pcre2_set_match_limit_,BITONE)(G(a,BITONE),b); \
|
||||
|
@ -1856,6 +1871,7 @@ the three different cases. */
|
|||
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
|
||||
pcre2_set_compile_recursion_guard_8(G(a,8),b,c)
|
||||
#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_8(G(a,8),b)
|
||||
#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_8(G(a,8),b)
|
||||
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
|
||||
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b)
|
||||
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b)
|
||||
|
@ -1952,6 +1968,7 @@ the three different cases. */
|
|||
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
|
||||
pcre2_set_compile_recursion_guard_16(G(a,16),b,c)
|
||||
#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_16(G(a,16),b)
|
||||
#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_16(G(a,16),b)
|
||||
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
|
||||
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_16(G(a,16),b)
|
||||
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b)
|
||||
|
@ -2048,6 +2065,7 @@ the three different cases. */
|
|||
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
|
||||
pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
|
||||
#define PCRE2_SET_DEPTH_LIMIT(a,b) pcre2_set_depth_limit_32(G(a,32),b)
|
||||
#define PCRE2_SET_HEAP_LIMIT(a,b) pcre2_set_heap_limit_32(G(a,32),b)
|
||||
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
|
||||
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_32(G(a,32),b)
|
||||
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b)
|
||||
|
@ -4040,14 +4058,28 @@ if ((pat_patctl.control & CTL_INFO) != 0)
|
|||
{
|
||||
void *nametable;
|
||||
uint8_t *start_bits;
|
||||
BOOL match_limit_set, depth_limit_set;
|
||||
BOOL heap_limit_set, match_limit_set, depth_limit_set;
|
||||
uint32_t backrefmax, bsr_convention, capture_count, first_ctype, first_cunit,
|
||||
hasbackslashc, hascrorlf, jchanged, last_ctype, last_cunit, match_empty,
|
||||
match_limit, minlength, nameentrysize, namecount, newline_convention,
|
||||
depth_limit;
|
||||
depth_limit, heap_limit, match_limit, minlength, nameentrysize, namecount,
|
||||
newline_convention;
|
||||
|
||||
/* These info requests may return PCRE2_ERROR_UNSET. */
|
||||
|
||||
switch(pattern_info(PCRE2_INFO_HEAPLIMIT, &heap_limit, TRUE))
|
||||
{
|
||||
case 0:
|
||||
heap_limit_set = TRUE;
|
||||
break;
|
||||
|
||||
case PCRE2_ERROR_UNSET:
|
||||
heap_limit_set = FALSE;
|
||||
break;
|
||||
|
||||
default:
|
||||
return PR_ABEND;
|
||||
}
|
||||
|
||||
switch(pattern_info(PCRE2_INFO_MATCHLIMIT, &match_limit, TRUE))
|
||||
{
|
||||
case 0:
|
||||
|
@ -4106,6 +4138,9 @@ if ((pat_patctl.control & CTL_INFO) != 0)
|
|||
if (maxlookbehind > 0)
|
||||
fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
|
||||
|
||||
if (heap_limit_set)
|
||||
fprintf(outfile, "Heap limit = %u\n", heap_limit);
|
||||
|
||||
if (match_limit_set)
|
||||
fprintf(outfile, "Match limit = %u\n", match_limit);
|
||||
|
||||
|
@ -5353,10 +5388,15 @@ uint32_t max = UINT32_MAX;
|
|||
|
||||
PCRE2_SET_MATCH_LIMIT(dat_context, max);
|
||||
PCRE2_SET_DEPTH_LIMIT(dat_context, max);
|
||||
PCRE2_SET_HEAP_LIMIT(dat_context, max);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (errnumber == PCRE2_ERROR_MATCHLIMIT)
|
||||
if (errnumber == PCRE2_ERROR_HEAPLIMIT)
|
||||
{
|
||||
PCRE2_SET_HEAP_LIMIT(dat_context, mid);
|
||||
}
|
||||
else if (errnumber == PCRE2_ERROR_MATCHLIMIT)
|
||||
{
|
||||
PCRE2_SET_MATCH_LIMIT(dat_context, mid);
|
||||
}
|
||||
|
@ -5393,13 +5433,23 @@ for (;;)
|
|||
capcount == PCRE2_ERROR_NOMATCH ||
|
||||
capcount == PCRE2_ERROR_PARTIAL)
|
||||
{
|
||||
/* If we've not hit the error with a heap limit less than the size of the
|
||||
initial stack frame vector, the heap is not being used, so the minimum
|
||||
limit is zero; there's no need to go on. The other limits are always
|
||||
greater than zero. */
|
||||
|
||||
if (errnumber == PCRE2_ERROR_HEAPLIMIT && mid < START_FRAMES_SIZE/1024)
|
||||
{
|
||||
fprintf(outfile, "Minimum %s limit = 0\n", msg);
|
||||
break;
|
||||
}
|
||||
if (mid == min + 1)
|
||||
{
|
||||
fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
|
||||
break;
|
||||
}
|
||||
max = mid;
|
||||
mid = (min + mid)/2;
|
||||
}
|
||||
max = mid;
|
||||
mid = (min + max)/2;
|
||||
}
|
||||
else break; /* Some other error */
|
||||
}
|
||||
|
@ -6662,20 +6712,32 @@ else for (gmatched = 0;; gmatched++)
|
|||
(double)CLOCKS_PER_SEC);
|
||||
}
|
||||
|
||||
/* Find the match and depth limits if requested. The match limit is not
|
||||
relevant for DFA matching and the depth limit is not relevant for JIT. */
|
||||
/* Find the heap, match and depth limits if requested. The match and heap
|
||||
limits are not relevant for DFA matching and the depth limit is not relevant
|
||||
for JIT. */
|
||||
|
||||
if ((dat_datctl.control & CTL_FINDLIMITS) != 0)
|
||||
{
|
||||
if ((dat_datctl.control & CTL_DFA) == 0)
|
||||
{
|
||||
if (FLD(compiled_code, executable_jit) == NULL ||
|
||||
(dat_datctl.options & PCRE2_NO_JIT) != 0)
|
||||
{
|
||||
capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_HEAPLIMIT,
|
||||
"heap");
|
||||
}
|
||||
capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_MATCHLIMIT,
|
||||
"match");
|
||||
}
|
||||
else capcount = 0;
|
||||
|
||||
if (FLD(compiled_code, executable_jit) == NULL ||
|
||||
(dat_datctl.options & PCRE2_NO_JIT) != 0 ||
|
||||
(dat_datctl.control & CTL_DFA) != 0)
|
||||
{
|
||||
capcount = check_match_limit(pp, arg_ulen, PCRE2_ERROR_DEPTHLIMIT,
|
||||
"depth");
|
||||
}
|
||||
}
|
||||
|
||||
/* Otherwise just run a single match, setting up a callout if required (the
|
||||
|
@ -7402,6 +7464,8 @@ printf(" \\C is supported\n");
|
|||
printf(" Internal link size = %d\n", optval);
|
||||
(void)PCRE2_CONFIG(PCRE2_CONFIG_PARENSLIMIT, &optval);
|
||||
printf(" Parentheses nest limit = %d\n", optval);
|
||||
(void)PCRE2_CONFIG(PCRE2_CONFIG_HEAPLIMIT, &optval);
|
||||
printf(" Default heap limit = %d\n", optval);
|
||||
(void)PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, &optval);
|
||||
printf(" Default match limit = %d\n", optval);
|
||||
(void)PCRE2_CONFIG(PCRE2_CONFIG_DEPTHLIMIT, &optval);
|
||||
|
|
|
@ -12,11 +12,13 @@ Starting code units: a z
|
|||
Last code unit = 'z'
|
||||
Subject length lower bound = 2
|
||||
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazzbbbbbb\=find_limits
|
||||
Minimum heap limit = 0
|
||||
Minimum match limit = 7
|
||||
Minimum depth limit = 7
|
||||
0: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaazz
|
||||
1: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
|
||||
aaaaaaaaaaaaaz\=find_limits
|
||||
Minimum heap limit = 0
|
||||
Minimum match limit = 20481
|
||||
Minimum depth limit = 30
|
||||
No match
|
||||
|
@ -26,6 +28,7 @@ Capturing subpattern count = 1
|
|||
May match empty string
|
||||
Subject length lower bound = 0
|
||||
/* this is a C style comment */\=find_limits
|
||||
Minimum heap limit = 0
|
||||
Minimum match limit = 64
|
||||
Minimum depth limit = 7
|
||||
0: /* this is a C style comment */
|
||||
|
@ -33,21 +36,25 @@ Minimum depth limit = 7
|
|||
|
||||
/^(?>a)++/
|
||||
aa\=find_limits
|
||||
Minimum heap limit = 0
|
||||
Minimum match limit = 5
|
||||
Minimum depth limit = 3
|
||||
0: aa
|
||||
aaaaaaaaa\=find_limits
|
||||
Minimum heap limit = 0
|
||||
Minimum match limit = 12
|
||||
Minimum depth limit = 3
|
||||
0: aaaaaaaaa
|
||||
|
||||
/(a)(?1)++/
|
||||
aa\=find_limits
|
||||
Minimum heap limit = 0
|
||||
Minimum match limit = 7
|
||||
Minimum depth limit = 5
|
||||
0: aa
|
||||
1: a
|
||||
aaaaaaaaa\=find_limits
|
||||
Minimum heap limit = 0
|
||||
Minimum match limit = 21
|
||||
Minimum depth limit = 5
|
||||
0: aaaaaaaaa
|
||||
|
@ -55,30 +62,35 @@ Minimum depth limit = 5
|
|||
|
||||
/a(?:.)*?a/ims
|
||||
abbbbbbbbbbbbbbbbbbbbba\=find_limits
|
||||
Minimum heap limit = 0
|
||||
Minimum match limit = 24
|
||||
Minimum depth limit = 3
|
||||
0: abbbbbbbbbbbbbbbbbbbbba
|
||||
|
||||
/a(?:.(*THEN))*?a/ims
|
||||
abbbbbbbbbbbbbbbbbbbbba\=find_limits
|
||||
Minimum heap limit = 0
|
||||
Minimum match limit = 66
|
||||
Minimum depth limit = 45
|
||||
0: abbbbbbbbbbbbbbbbbbbbba
|
||||
|
||||
/a(?:.(*THEN:ABC))*?a/ims
|
||||
abbbbbbbbbbbbbbbbbbbbba\=find_limits
|
||||
Minimum heap limit = 0
|
||||
Minimum match limit = 66
|
||||
Minimum depth limit = 45
|
||||
0: abbbbbbbbbbbbbbbbbbbbba
|
||||
|
||||
/^(?>a+)(?>b+)(?>c+)(?>d+)(?>e+)/
|
||||
aabbccddee\=find_limits
|
||||
Minimum heap limit = 0
|
||||
Minimum match limit = 7
|
||||
Minimum depth limit = 7
|
||||
0: aabbccddee
|
||||
|
||||
/^(?>(a+))(?>(b+))(?>(c+))(?>(d+))(?>(e+))/
|
||||
aabbccddee\=find_limits
|
||||
Minimum heap limit = 0
|
||||
Minimum match limit = 12
|
||||
Minimum depth limit = 12
|
||||
0: aabbccddee
|
||||
|
@ -90,6 +102,7 @@ Minimum depth limit = 12
|
|||
|
||||
/^(?>(a+))(?>b+)(?>(c+))(?>d+)(?>(e+))/
|
||||
aabbccddee\=find_limits
|
||||
Minimum heap limit = 0
|
||||
Minimum match limit = 10
|
||||
Minimum depth limit = 10
|
||||
0: aabbccddee
|
||||
|
|
|
@ -15609,7 +15609,7 @@ Last code unit = 'c'
|
|||
Subject length lower bound = 4
|
||||
|
||||
# End of testinput2
|
||||
Error -63: PCRE2_ERROR_BADDATA (unknown error number)
|
||||
Error -64: PCRE2_ERROR_BADDATA (unknown error number)
|
||||
Error -62: bad serialized data
|
||||
Error -2: partial match
|
||||
Error -1: no match
|
||||
|
|
Loading…
Reference in New Issue