Implement pcre2_set_max_pattern_length()
This commit is contained in:
parent
37e9ea2331
commit
b7ee0684e3
|
@ -263,6 +263,9 @@ result of the use of \K).
|
|||
76. Check the length of subpattern names and the names in (*MARK:xx) etc.
|
||||
dynamically to avoid the possibility of integer overflow.
|
||||
|
||||
77. Implement pcre2_set_max_pattern_length() so that programs can restrict the
|
||||
size of patterns that they are prepared to handle.
|
||||
|
||||
|
||||
Version 10.20 30-June-2015
|
||||
--------------------------
|
||||
|
|
|
@ -210,12 +210,15 @@ in the library.
|
|||
<tr><td><a href="pcre2_set_match_limit.html">pcre2_set_match_limit</a></td>
|
||||
<td> Set the match limit</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_offset_limit.html">pcre2_set_offset_limit</a></td>
|
||||
<td> Set the offset limit</td></tr>
|
||||
<tr><td><a href="pcre2_set_max_pattern_length.html">pcre2_set_max_pattern_length</a></td>
|
||||
<td> Set the maximum length of pattern</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_newline.html">pcre2_set_newline</a></td>
|
||||
<td> Set the newline convention</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_offset_limit.html">pcre2_set_offset_limit</a></td>
|
||||
<td> Set the offset limit</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_parens_nest_limit.html">pcre2_set_parens_nest_limit</a></td>
|
||||
<td> Set the parentheses nesting limit</td></tr>
|
||||
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre2_set_max_pattern_length specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_set_max_pattern_length man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_set_max_pattern_length(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||
<b> PCRE2_SIZE <i>value</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function sets, in a compile context, the maximum length (in code units) of
|
||||
the pattern that can be compiled. The result is always zero.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
|
@ -143,6 +143,10 @@ document for an overview of all the PCRE2 documentation.
|
|||
<b> const unsigned char *<i>tables</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre2_set_max_pattern_length(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||
<b> PCRE2_SIZE <i>value</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre2_set_newline(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||
<b> uint32_t <i>value</i>);</b>
|
||||
<br>
|
||||
|
@ -614,6 +618,7 @@ of the following compile-time parameters:
|
|||
PCRE2's character tables
|
||||
The newline character sequence
|
||||
The compile time nested parentheses limit
|
||||
The maximum length of the pattern string
|
||||
An external function for stack checking
|
||||
</pre>
|
||||
A compile context is also required if you are using custom memory management.
|
||||
|
@ -652,6 +657,15 @@ interpreted matching functions, <i>pcre2_match()</i> and
|
|||
The value must be the result of a call to <i>pcre2_maketables()</i>, whose only
|
||||
argument is a general context. This function builds a set of character tables
|
||||
in the current locale.
|
||||
<b>int pcre2_set_max_pattern_length(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||
<b> PCRE2_SIZE <i>value</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
This sets a maximum length, in code units, for the pattern string that is to be
|
||||
compiled. If the pattern is longer, an error is generated. This facility is
|
||||
provided so that applications that accept patterns from external sources can
|
||||
limit their size. The default is the largest number that a PCRE2_SIZE variable
|
||||
can hold, which is effectively unlimited.
|
||||
<b>int pcre2_set_newline(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||
<b> uint32_t <i>value</i>);</b>
|
||||
<br>
|
||||
|
@ -2622,7 +2636,9 @@ same number causes an error at compile time.
|
|||
This function calls <b>pcre2_match()</b> and then makes a copy of the subject
|
||||
string in <i>outputbuffer</i>, replacing the part that was matched with the
|
||||
<i>replacement</i> string, whose length is supplied in <b>rlength</b>. This can
|
||||
be given as PCRE2_ZERO_TERMINATED for a zero-terminated string.
|
||||
be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. Matches in
|
||||
which a \K item in a lookahead in the pattern causes the match to end before
|
||||
it starts are not supported, and give rise to an error return.
|
||||
</P>
|
||||
<P>
|
||||
The first seven arguments of <b>pcre2_substitute()</b> are the same as for
|
||||
|
@ -2735,8 +2751,9 @@ are passed straight back. PCRE2_ERROR_NOMEMORY is returned if the output buffer
|
|||
is not big enough. PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax
|
||||
errors in the replacement string, with more particular errors being
|
||||
PCRE2_ERROR_BADREPESCAPE (invalid escape sequence),
|
||||
PCRE2_ERROR_REPMISSING_BRACE (closing curly bracket not found), and
|
||||
PCRE2_BADSUBSTITUTION (syntax error in extended group substitution). As for all
|
||||
PCRE2_ERROR_REPMISSING_BRACE (closing curly bracket not found),
|
||||
PCRE2_BADSUBSTITUTION (syntax error in extended group substitution), and
|
||||
PCRE2_BADSUBPATTERN (the pattern match ended before it started). As for all
|
||||
PCRE2 errors, a text message that describes the error can be obtained by
|
||||
calling <b>pcre2_get_error_message()</b>.
|
||||
</P>
|
||||
|
@ -3015,7 +3032,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC40" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 16 October 2015
|
||||
Last updated: 05 November 2015
|
||||
<br>
|
||||
Copyright © 1997-2015 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -32,6 +32,11 @@ However, the speed of execution is slower. In the 32-bit library, the internal
|
|||
linkage size is always 4.
|
||||
</P>
|
||||
<P>
|
||||
The maximum length of a source pattern string is essentially unlimited; it is
|
||||
the largest number a PCRE2_SIZE variable can hold. However, the program that
|
||||
calls <b>pcre2_compile()</b> can specify a smaller limit.
|
||||
</P>
|
||||
<P>
|
||||
The maximum length (in code units) of a subject string is one less than the
|
||||
largest number a PCRE2_SIZE variable can hold. PCRE2_SIZE is an unsigned
|
||||
integer type, usually defined as size_t. Its maximum value (that is
|
||||
|
@ -50,6 +55,9 @@ documentation.
|
|||
All values in repeating quantifiers must be less than 65536.
|
||||
</P>
|
||||
<P>
|
||||
The maximum length of a lookbehind assertion is 65535 characters.
|
||||
</P>
|
||||
<P>
|
||||
There is no limit to the number of parenthesized subpatterns, but there can be
|
||||
no more than 65535 capturing subpatterns. There is, however, a limit to the
|
||||
depth of nesting of parenthesized subpatterns of all kinds. This is imposed in
|
||||
|
@ -85,9 +93,9 @@ Cambridge, England.
|
|||
REVISION
|
||||
</b><br>
|
||||
<P>
|
||||
Last updated: 25 November 2014
|
||||
Last updated: 05 November 2015
|
||||
<br>
|
||||
Copyright © 1997-2014 University of Cambridge.
|
||||
Copyright © 1997-2015 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
|
|
|
@ -2512,7 +2512,8 @@ For example:
|
|||
(?(VERSION>=10.4)yes|no)
|
||||
</pre>
|
||||
This pattern matches "yes" if the PCRE2 version is greater or equal to 10.4, or
|
||||
"no" otherwise.
|
||||
"no" otherwise. The fractional part of the version number may not contain more
|
||||
than two digits.
|
||||
</P>
|
||||
<br><b>
|
||||
Assertion conditions
|
||||
|
@ -3358,7 +3359,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC30" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 16 October 2015
|
||||
Last updated: 01 November 2015
|
||||
<br>
|
||||
Copyright © 1997-2015 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -266,9 +266,11 @@ header file, of which REG_NOMATCH is the "expected" failure code.
|
|||
The <b>regerror()</b> function maps a non-zero errorcode from either
|
||||
<b>regcomp()</b> or <b>regexec()</b> to a printable message. If <i>preg</i> is not
|
||||
NULL, the error should have arisen from the use of that structure. A message
|
||||
terminated by a binary zero is placed in <i>errbuf</i>. The length of the
|
||||
message, including the zero, is limited to <i>errbuf_size</i>. The yield of the
|
||||
function is the size of buffer needed to hold the whole message.
|
||||
terminated by a binary zero is placed in <i>errbuf</i>. If the buffer is too
|
||||
short, only the first <i>errbuf_size</i> - 1 characters of the error message are
|
||||
used. The yield of the function is the size of buffer needed to hold the whole
|
||||
message, including the terminating zero. This value is greater than
|
||||
<i>errbuf_size</i> if the message was truncated.
|
||||
</P>
|
||||
<br><a name="SEC7" href="#TOC1">MEMORY USAGE</a><br>
|
||||
<P>
|
||||
|
@ -287,7 +289,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC9" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 03 September 2015
|
||||
Last updated: 30 October 2015
|
||||
<br>
|
||||
Copyright © 1997-2015 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -41,12 +41,12 @@ If you are running an application that uses a large number of regular
|
|||
expression patterns, it may be useful to store them in a precompiled form
|
||||
instead of having to compile them every time the application is run. However,
|
||||
if you are using the just-in-time optimization feature, it is not possible to
|
||||
save and reload the JIT data, because it is position-dependent. In addition,
|
||||
the host on which the patterns are reloaded must be running the same version of
|
||||
PCRE2, with the same code unit width, and must also have the same endianness,
|
||||
pointer width and PCRE2_SIZE type. For example, patterns compiled on a 32-bit
|
||||
system using PCRE2's 16-bit library cannot be reloaded on a 64-bit system, nor
|
||||
can they be reloaded using the 8-bit library.
|
||||
save and reload the JIT data, because it is position-dependent. The host on
|
||||
which the patterns are reloaded must be running the same version of PCRE2, with
|
||||
the same code unit width, and must also have the same endianness, pointer width
|
||||
and PCRE2_SIZE type. For example, patterns compiled on a 32-bit system using
|
||||
PCRE2's 16-bit library cannot be reloaded on a 64-bit system, nor can they be
|
||||
reloaded using the 8-bit library.
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">SAVING COMPILED PATTERNS</a><br>
|
||||
<P>
|
||||
|
@ -153,10 +153,15 @@ on a system with different endianness.
|
|||
</P>
|
||||
<P>
|
||||
Decoded patterns can be used for matching in the usual way, and must be freed
|
||||
by calling <b>pcre2_code_free()</b> as normal. A single copy of the character
|
||||
tables is used by all the decoded patterns. A reference count is used to
|
||||
by calling <b>pcre2_code_free()</b>. However, be aware that there is a potential
|
||||
race issue if you are using multiple patterns that were decoded from a single
|
||||
byte stream in a multithreaded application. A single copy of the character
|
||||
tables is used by all the decoded patterns and a reference count is used to
|
||||
arrange for its memory to be automatically freed when the last pattern is
|
||||
freed.
|
||||
freed, but there is no locking on this reference count. Therefore, if you want
|
||||
to call <b>pcre2_code_free()</b> for these patterns in different threads, you
|
||||
must arrange your own locking, and ensure that <b>pcre2_code_free()</b> cannot
|
||||
be called by two threads at the same time.
|
||||
</P>
|
||||
<P>
|
||||
If a pattern was processed by <b>pcre2_jit_compile()</b> before being
|
||||
|
@ -175,7 +180,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 20 January 2015
|
||||
Last updated: 03 November 2015
|
||||
<br>
|
||||
Copyright © 1997-2015 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -266,9 +266,9 @@ Each subject line is matched separately and independently. If you want to do
|
|||
multi-line matches, you have to use the \n escape sequence (or \r or \r\n,
|
||||
etc., depending on the newline setting) in a single line of input to encode the
|
||||
newline sequences. There is no limit on the length of subject lines; the input
|
||||
buffer is automatically extended if it is too small. There is a replication
|
||||
feature that makes it possible to generate long subject lines without having to
|
||||
supply them explicitly.
|
||||
buffer is automatically extended if it is too small. There are replication
|
||||
features that makes it possible to generate long repetitive pattern or subject
|
||||
lines without having to supply them explicitly.
|
||||
</P>
|
||||
<P>
|
||||
An empty line or the end of the file signals the end of the subject lines for a
|
||||
|
@ -500,10 +500,10 @@ a real empty line terminates the data input.
|
|||
</P>
|
||||
<br><a name="SEC10" href="#TOC1">PATTERN MODIFIERS</a><br>
|
||||
<P>
|
||||
There are three types of modifier that can appear in pattern lines, two of
|
||||
which may also be used in a <b>#pattern</b> command. A pattern's modifier list
|
||||
can add to or override default modifiers that were set by a previous
|
||||
<b>#pattern</b> command.
|
||||
There are several types of modifier that can appear in pattern lines. Except
|
||||
where noted below, they may also be used in <b>#pattern</b> commands. A
|
||||
pattern's modifier list can add to or override default modifiers that were set
|
||||
by a previous <b>#pattern</b> command.
|
||||
<a name="optionmodifiers"></a></P>
|
||||
<br><b>
|
||||
Setting compilation options
|
||||
|
@ -564,6 +564,7 @@ about the pattern:
|
|||
jitfast use JIT fast path
|
||||
jitverify verify JIT use
|
||||
locale=<name> use this locale
|
||||
max_pattern_length=<n> set the maximum pattern length
|
||||
memory show memory used
|
||||
newline=<type> set newline type
|
||||
null_context compile with a NULL context
|
||||
|
@ -670,6 +671,34 @@ PCRE2_ZERO_TERMINATED. However, for patterns specified in hexadecimal, the
|
|||
actual length of the pattern is passed.
|
||||
</P>
|
||||
<br><b>
|
||||
Generating long repetitive patterns
|
||||
</b><br>
|
||||
<P>
|
||||
Some tests use long patterns that are very repetitive. Instead of creating a
|
||||
very long input line for such a pattern, you can use a special repetition
|
||||
feature, similar to the one described for subject lines above. If the
|
||||
<b>expand</b> modifier is present on a pattern, parts of the pattern that have
|
||||
the form
|
||||
<pre>
|
||||
\[<characters>]{<count>}
|
||||
</pre>
|
||||
are expanded before the pattern is passed to <b>pcre2_compile()</b>. For
|
||||
example, \[AB]{6000} is expanded to "ABAB..." 6000 times. This construction
|
||||
cannot be nested. An initial "\[" sequence is recognized only if "]{" followed
|
||||
by decimal digits and "}" is found later in the pattern. If not, the characters
|
||||
remain in the pattern unaltered.
|
||||
</P>
|
||||
<P>
|
||||
If part of an expanded pattern looks like an expansion, but is really part of
|
||||
the actual pattern, unwanted expansion can be avoided by giving two values in
|
||||
the quantifier. For example, \[AB]{6000,6000} is not recognized as an
|
||||
expansion item.
|
||||
</P>
|
||||
<P>
|
||||
If the <b>info</b> modifier is set on an expanded pattern, the result of the
|
||||
expansion is included in the information that is output.
|
||||
</P>
|
||||
<br><b>
|
||||
JIT compilation
|
||||
</b><br>
|
||||
<P>
|
||||
|
@ -780,6 +809,15 @@ sets its own default of 220, which is required for running the standard test
|
|||
suite.
|
||||
</P>
|
||||
<br><b>
|
||||
Limiting the pattern length
|
||||
</b><br>
|
||||
<P>
|
||||
The <b>max_pattern_length</b> modifier sets a limit, in code units, to the
|
||||
length of pattern that <b>pcre2_compile()</b> will accept. Breaching the limit
|
||||
causes a compilation error. The default is the largest number a PCRE2_SIZE
|
||||
variable can hold (essentially unlimited).
|
||||
</P>
|
||||
<br><b>
|
||||
Using the POSIX wrapper API
|
||||
</b><br>
|
||||
<P>
|
||||
|
@ -798,6 +836,16 @@ modifiers set options for the <b>regcomp()</b> function:
|
|||
ucp REG_UCP ) the POSIX standard
|
||||
utf REG_UTF8 )
|
||||
</pre>
|
||||
The <b>regerror_buffsize</b> modifier specifies a size for the error buffer that
|
||||
is passed to <b>regerror()</b> in the event of a compilation error. For example:
|
||||
<pre>
|
||||
/abc/posix,regerror_buffsize=20
|
||||
</pre>
|
||||
This provides a means of testing the behaviour of <b>regerror()</b> when the
|
||||
buffer is too small for the error message. If this modifier has not been set, a
|
||||
large buffer is used.
|
||||
</P>
|
||||
<P>
|
||||
The <b>aftertext</b> and <b>allaftertext</b> subject modifiers work as described
|
||||
below. All other modifiers cause an error.
|
||||
</P>
|
||||
|
@ -840,8 +888,9 @@ Setting certain match controls
|
|||
<P>
|
||||
The following modifiers are really subject modifiers, and are described below.
|
||||
However, they may be included in a pattern's modifier list, in which case they
|
||||
are applied to every subject line that is processed with that pattern. They do
|
||||
not affect the compilation process.
|
||||
are applied to every subject line that is processed with that pattern. They may
|
||||
not appear in <b>#pattern</b> commands. These modifiers do not affect the
|
||||
compilation process.
|
||||
<pre>
|
||||
aftertext show text after match
|
||||
allaftertext show text after captures
|
||||
|
@ -1574,7 +1623,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 17 October 2015
|
||||
Last updated: 05 November 2015
|
||||
<br>
|
||||
Copyright © 1997-2015 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -210,12 +210,15 @@ in the library.
|
|||
<tr><td><a href="pcre2_set_match_limit.html">pcre2_set_match_limit</a></td>
|
||||
<td> Set the match limit</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_offset_limit.html">pcre2_set_offset_limit</a></td>
|
||||
<td> Set the offset limit</td></tr>
|
||||
<tr><td><a href="pcre2_set_max_pattern_length.html">pcre2_set_max_pattern_length</a></td>
|
||||
<td> Set the maximum length of pattern</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_newline.html">pcre2_set_newline</a></td>
|
||||
<td> Set the newline convention</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_offset_limit.html">pcre2_set_offset_limit</a></td>
|
||||
<td> Set the offset limit</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_parens_nest_limit.html">pcre2_set_parens_nest_limit</a></td>
|
||||
<td> Set the parentheses nesting limit</td></tr>
|
||||
|
||||
|
|
2334
doc/pcre2.txt
2334
doc/pcre2.txt
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,28 @@
|
|||
.TH PCRE2_SET_MAX_PATTERN_LENGTH 3 "05 November 2015" "PCRE2 10.21"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.B #include <pcre2.h>
|
||||
.PP
|
||||
.nf
|
||||
.B int pcre2_set_max_pattern_length(pcre2_compile_context *\fIccontext\fP,
|
||||
.B " PCRE2_SIZE \fIvalue\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
.sp
|
||||
This function sets, in a compile context, the maximum length (in code units) of
|
||||
the pattern that can be compiled. The result is always zero.
|
||||
.P
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
.\" HREF
|
||||
\fBpcre2api\fP
|
||||
.\"
|
||||
page and a description of the POSIX API in the
|
||||
.\" HREF
|
||||
\fBpcre2posix\fP
|
||||
.\"
|
||||
page.
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2API 3 "03 November 2015" "PCRE2 10.21"
|
||||
.TH PCRE2API 3 "05 November 2015" "PCRE2 10.21"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.sp
|
||||
|
@ -90,6 +90,9 @@ document for an overview of all the PCRE2 documentation.
|
|||
.B int pcre2_set_character_tables(pcre2_compile_context *\fIccontext\fP,
|
||||
.B " const unsigned char *\fItables\fP);"
|
||||
.sp
|
||||
.B int pcre2_set_max_pattern_length(pcre2_compile_context *\fIccontext\fP,
|
||||
.B " PCRE2_SIZE \fIvalue\fP);"
|
||||
.sp
|
||||
.B int pcre2_set_newline(pcre2_compile_context *\fIccontext\fP,
|
||||
.B " uint32_t \fIvalue\fP);"
|
||||
.sp
|
||||
|
@ -567,6 +570,7 @@ of the following compile-time parameters:
|
|||
PCRE2's character tables
|
||||
The newline character sequence
|
||||
The compile time nested parentheses limit
|
||||
The maximum length of the pattern string
|
||||
An external function for stack checking
|
||||
.sp
|
||||
A compile context is also required if you are using custom memory management.
|
||||
|
@ -610,6 +614,17 @@ argument is a general context. This function builds a set of character tables
|
|||
in the current locale.
|
||||
.sp
|
||||
.nf
|
||||
.B int pcre2_set_max_pattern_length(pcre2_compile_context *\fIccontext\fP,
|
||||
.B " PCRE2_SIZE \fIvalue\fP);"
|
||||
.fi
|
||||
.sp
|
||||
This sets a maximum length, in code units, for the pattern string that is to be
|
||||
compiled. If the pattern is longer, an error is generated. This facility is
|
||||
provided so that applications that accept patterns from external sources can
|
||||
limit their size. The default is the largest number that a PCRE2_SIZE variable
|
||||
can hold, which is effectively unlimited.
|
||||
.sp
|
||||
.nf
|
||||
.B int pcre2_set_newline(pcre2_compile_context *\fIccontext\fP,
|
||||
.B " uint32_t \fIvalue\fP);"
|
||||
.fi
|
||||
|
@ -3069,6 +3084,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 03 November 2015
|
||||
Last updated: 05 November 2015
|
||||
Copyright (c) 1997-2015 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2LIMITS 3 "03 November 2015" "PCRE2 10.21"
|
||||
.TH PCRE2LIMITS 3 "05 November 2015" "PCRE2 10.21"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH "SIZE AND OTHER LIMITATIONS"
|
||||
|
@ -20,6 +20,10 @@ documentation for details. In these cases the limit is substantially larger.
|
|||
However, the speed of execution is slower. In the 32-bit library, the internal
|
||||
linkage size is always 4.
|
||||
.P
|
||||
The maximum length of a source pattern string is essentially unlimited; it is
|
||||
the largest number a PCRE2_SIZE variable can hold. However, the program that
|
||||
calls \fBpcre2_compile()\fP can specify a smaller limit.
|
||||
.P
|
||||
The maximum length (in code units) of a subject string is one less than the
|
||||
largest number a PCRE2_SIZE variable can hold. PCRE2_SIZE is an unsigned
|
||||
integer type, usually defined as size_t. Its maximum value (that is
|
||||
|
@ -71,6 +75,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 03 November 2015
|
||||
Last updated: 05 November 2015
|
||||
Copyright (c) 1997-2015 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2TEST 1 "30 October 2015" "PCRE 10.21"
|
||||
.TH PCRE2TEST 1 "05 November 2015" "PCRE 10.21"
|
||||
.SH NAME
|
||||
pcre2test - a program for testing Perl-compatible regular expressions.
|
||||
.SH SYNOPSIS
|
||||
|
@ -528,6 +528,7 @@ about the pattern:
|
|||
jitfast use JIT fast path
|
||||
jitverify verify JIT use
|
||||
locale=<name> use this locale
|
||||
max_pattern_length=<n> set the maximum pattern length
|
||||
memory show memory used
|
||||
newline=<type> set newline type
|
||||
null_context compile with a NULL context
|
||||
|
@ -767,6 +768,15 @@ sets its own default of 220, which is required for running the standard test
|
|||
suite.
|
||||
.
|
||||
.
|
||||
.SS "Limiting the pattern length"
|
||||
.rs
|
||||
.sp
|
||||
The \fBmax_pattern_length\fP modifier sets a limit, in code units, to the
|
||||
length of pattern that \fBpcre2_compile()\fP will accept. Breaching the limit
|
||||
causes a compilation error. The default is the largest number a PCRE2_SIZE
|
||||
variable can hold (essentially unlimited).
|
||||
.
|
||||
.
|
||||
.SS "Using the POSIX wrapper API"
|
||||
.rs
|
||||
.sp
|
||||
|
@ -1596,6 +1606,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 30 October 2015
|
||||
Last updated: 05 November 2015
|
||||
Copyright (c) 1997-2015 University of Cambridge.
|
||||
.fi
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -396,6 +396,8 @@ PCRE2_EXP_DECL void pcre2_compile_context_free(pcre2_compile_context *); \
|
|||
PCRE2_EXP_DECL int pcre2_set_bsr(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_character_tables(pcre2_compile_context *, \
|
||||
const unsigned char *); \
|
||||
PCRE2_EXP_DECL int pcre2_set_max_pattern_length(pcre2_compile_context *, \
|
||||
PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int pcre2_set_newline(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_parens_nest_limit(pcre2_compile_context *, \
|
||||
uint32_t); \
|
||||
|
@ -616,6 +618,7 @@ pcre2_compile are called by application code. */
|
|||
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
|
||||
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
|
||||
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
|
||||
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
|
||||
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
|
||||
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
|
||||
#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_)
|
||||
|
|
|
@ -396,6 +396,8 @@ PCRE2_EXP_DECL void pcre2_compile_context_free(pcre2_compile_context *); \
|
|||
PCRE2_EXP_DECL int pcre2_set_bsr(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_character_tables(pcre2_compile_context *, \
|
||||
const unsigned char *); \
|
||||
PCRE2_EXP_DECL int pcre2_set_max_pattern_length(pcre2_compile_context *, \
|
||||
PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int pcre2_set_newline(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_parens_nest_limit(pcre2_compile_context *, \
|
||||
uint32_t); \
|
||||
|
@ -616,6 +618,7 @@ pcre2_compile are called by application code. */
|
|||
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
|
||||
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
|
||||
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
|
||||
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
|
||||
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
|
||||
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
|
||||
#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_)
|
||||
|
|
|
@ -583,7 +583,7 @@ enum { ERR0 = COMPILE_ERROR_BASE,
|
|||
ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60,
|
||||
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
|
||||
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
|
||||
ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87 };
|
||||
ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88 };
|
||||
|
||||
/* Error codes that correspond to negative error codes returned by
|
||||
find_fixedlength(). */
|
||||
|
@ -2988,7 +2988,7 @@ for (; ptr < cb->end_pattern; ptr++)
|
|||
if ((unsigned int)arglen > MAX_MARK)
|
||||
{
|
||||
*errorcodeptr = ERR76;
|
||||
*ptrptr = ptr;
|
||||
*ptrptr = ptr;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
@ -8128,10 +8128,24 @@ if (ccontext == NULL)
|
|||
/* A zero-terminated pattern is indicated by the special length value
|
||||
PCRE2_ZERO_TERMINATED. Otherwise, we make a copy of the pattern and add a zero,
|
||||
to ensure that it is always possible to look one code unit beyond the end of
|
||||
the pattern's characters. */
|
||||
the pattern's characters. In both cases, check that the pattern is overlong. */
|
||||
|
||||
if (patlen == PCRE2_ZERO_TERMINATED) patlen = PRIV(strlen)(pattern); else
|
||||
if (patlen == PCRE2_ZERO_TERMINATED)
|
||||
{
|
||||
patlen = PRIV(strlen)(pattern);
|
||||
if (patlen > ccontext->max_pattern_length)
|
||||
{
|
||||
*errorptr = ERR88;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (patlen > ccontext->max_pattern_length)
|
||||
{
|
||||
*errorptr = ERR88;
|
||||
return NULL;
|
||||
}
|
||||
if (patlen < COPIED_PATTERN_SIZE)
|
||||
copied_pattern = stack_copied_pattern;
|
||||
else
|
||||
|
|
|
@ -131,13 +131,14 @@ return gcontext;
|
|||
when no context is supplied to the compile function. */
|
||||
|
||||
const pcre2_compile_context PRIV(default_compile_context) = {
|
||||
{ default_malloc, default_free, NULL },
|
||||
NULL,
|
||||
NULL,
|
||||
PRIV(default_tables),
|
||||
BSR_DEFAULT,
|
||||
NEWLINE_DEFAULT,
|
||||
PARENS_NEST_LIMIT };
|
||||
{ default_malloc, default_free, NULL }, /* Default memory handling */
|
||||
NULL, /* Stack guard */
|
||||
NULL, /* Stack guard data */
|
||||
PRIV(default_tables), /* Character tables */
|
||||
PCRE2_UNSET, /* Max pattern length */
|
||||
BSR_DEFAULT, /* Backslash R default */
|
||||
NEWLINE_DEFAULT, /* Newline convention */
|
||||
PARENS_NEST_LIMIT }; /* As it says */
|
||||
|
||||
/* The create function copies the default into the new memory, but must
|
||||
override the default memory handling functions if a gcontext was provided. */
|
||||
|
@ -169,7 +170,7 @@ const pcre2_match_context PRIV(default_match_context) = {
|
|||
#endif
|
||||
NULL,
|
||||
NULL,
|
||||
PCRE2_UNSET, /* Offset limit */
|
||||
PCRE2_UNSET, /* Offset limit */
|
||||
MATCH_LIMIT,
|
||||
MATCH_LIMIT_RECURSION };
|
||||
|
||||
|
@ -295,6 +296,13 @@ switch(value)
|
|||
}
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_max_pattern_length(pcre2_compile_context *ccontext, PCRE2_SIZE length)
|
||||
{
|
||||
ccontext->max_pattern_length = length;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_newline(pcre2_compile_context *ccontext, uint32_t newline)
|
||||
{
|
||||
|
|
|
@ -172,6 +172,7 @@ static const char compile_error_texts[] =
|
|||
"using \\C is disabled in this PCRE2 library\0"
|
||||
"regular expression is too complicated\0"
|
||||
"lookbehind assertion is too long\0"
|
||||
"pattern string is longer than the limit set by the application\0"
|
||||
;
|
||||
|
||||
/* Match-time and UTF error texts are in the same format. */
|
||||
|
|
|
@ -562,6 +562,7 @@ typedef struct pcre2_real_compile_context {
|
|||
int (*stack_guard)(uint32_t, void *);
|
||||
void *stack_guard_data;
|
||||
const uint8_t *tables;
|
||||
PCRE2_SIZE max_pattern_length;
|
||||
uint16_t bsr_convention;
|
||||
uint16_t newline_convention;
|
||||
uint32_t parens_nest_limit;
|
||||
|
|
|
@ -540,6 +540,7 @@ static modstruct modlist[] = {
|
|||
{ "mark", MOD_PNDP, MOD_CTL, CTL_MARK, PO(control) },
|
||||
{ "match_limit", MOD_CTM, MOD_INT, 0, MO(match_limit) },
|
||||
{ "match_unset_backref", MOD_PAT, MOD_OPT, PCRE2_MATCH_UNSET_BACKREF, PO(options) },
|
||||
{ "max_pattern_length", MOD_CTC, MOD_SIZ, 0, CO(max_pattern_length) },
|
||||
{ "memory", MOD_PD, MOD_CTL, CTL_MEMORY, PD(control) },
|
||||
{ "multiline", MOD_PATP, MOD_OPT, PCRE2_MULTILINE, PO(options) },
|
||||
{ "never_backslash_c", MOD_PAT, MOD_OPT, PCRE2_NEVER_BACKSLASH_C, PO(options) },
|
||||
|
@ -1094,6 +1095,14 @@ are supported. */
|
|||
else \
|
||||
pcre2_set_match_limit_32(G(a,32),b)
|
||||
|
||||
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
|
||||
if (test_mode == PCRE8_MODE) \
|
||||
pcre2_set_max_pattern_length_8(G(a,8),b); \
|
||||
else if (test_mode == PCRE16_MODE) \
|
||||
pcre2_set_max_pattern_length_16(G(a,16),b); \
|
||||
else \
|
||||
pcre2_set_max_pattern_length_32(G(a,32),b)
|
||||
|
||||
#define PCRE2_SET_OFFSET_LIMIT(a,b) \
|
||||
if (test_mode == PCRE8_MODE) \
|
||||
pcre2_set_offset_limit_8(G(a,8),b); \
|
||||
|
@ -1502,6 +1511,12 @@ the three different cases. */
|
|||
else \
|
||||
G(pcre2_set_match_limit_,BITTWO)(G(a,BITTWO),b)
|
||||
|
||||
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
|
||||
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
||||
G(pcre2_set_max_pattern_length_,BITONE)(G(a,BITONE),b); \
|
||||
else \
|
||||
G(pcre2_set_max_pattern_length_,BITTWO)(G(a,BITTWO),b)
|
||||
|
||||
#define PCRE2_SET_OFFSET_LIMIT(a,b) \
|
||||
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
||||
G(pcre2_set_offset_limit_,BITONE)(G(a,BITONE),b); \
|
||||
|
@ -1706,6 +1721,7 @@ the three different cases. */
|
|||
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
|
||||
pcre2_set_compile_recursion_guard_8(G(a,8),b,c)
|
||||
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
|
||||
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b)
|
||||
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b)
|
||||
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b)
|
||||
#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_8(G(a,8),b)
|
||||
|
@ -1798,6 +1814,7 @@ the three different cases. */
|
|||
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
|
||||
pcre2_set_compile_recursion_guard_16(G(a,16),b,c)
|
||||
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
|
||||
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_16(G(a,16),b)
|
||||
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b)
|
||||
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b)
|
||||
#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_16(G(a,16),b)
|
||||
|
@ -1890,6 +1907,7 @@ the three different cases. */
|
|||
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
|
||||
pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
|
||||
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
|
||||
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_32(G(a,32),b)
|
||||
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b)
|
||||
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b)
|
||||
#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_32(G(a,32),b)
|
||||
|
|
|
@ -4603,4 +4603,14 @@ B)x/alt_verbnames,mark
|
|||
|
||||
/(?'abcdefghijklmnopqrstuvwxyzABCDEF'justright)/
|
||||
|
||||
# These two use zero-termination
|
||||
/abcd/max_pattern_length=3
|
||||
|
||||
/abc/max_pattern_length=3
|
||||
|
||||
# These two, being hex, pass the length
|
||||
/abcdefab/hex,max_pattern_length=3
|
||||
|
||||
/abcdef/hex,max_pattern_length=3
|
||||
|
||||
# End of testinput2
|
||||
|
|
|
@ -14699,4 +14699,16 @@ Failed: error 148 at offset 36: subpattern name is too long (maximum 32 characte
|
|||
|
||||
/(?'abcdefghijklmnopqrstuvwxyzABCDEF'justright)/
|
||||
|
||||
# These two use zero-termination
|
||||
/abcd/max_pattern_length=3
|
||||
Failed: error 188 at offset 0: pattern string is longer than the limit set by the application
|
||||
|
||||
/abc/max_pattern_length=3
|
||||
|
||||
# These two, being hex, pass the length
|
||||
/abcdefab/hex,max_pattern_length=3
|
||||
Failed: error 188 at offset 0: pattern string is longer than the limit set by the application
|
||||
|
||||
/abcdef/hex,max_pattern_length=3
|
||||
|
||||
# End of testinput2
|
||||
|
|
Loading…
Reference in New Issue