Implement pcre2_set_max_pattern_length()
This commit is contained in:
parent
37e9ea2331
commit
b7ee0684e3
|
@ -263,6 +263,9 @@ result of the use of \K).
|
|||
76. Check the length of subpattern names and the names in (*MARK:xx) etc.
|
||||
dynamically to avoid the possibility of integer overflow.
|
||||
|
||||
77. Implement pcre2_set_max_pattern_length() so that programs can restrict the
|
||||
size of patterns that they are prepared to handle.
|
||||
|
||||
|
||||
Version 10.20 30-June-2015
|
||||
--------------------------
|
||||
|
|
|
@ -210,12 +210,15 @@ in the library.
|
|||
<tr><td><a href="pcre2_set_match_limit.html">pcre2_set_match_limit</a></td>
|
||||
<td> Set the match limit</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_offset_limit.html">pcre2_set_offset_limit</a></td>
|
||||
<td> Set the offset limit</td></tr>
|
||||
<tr><td><a href="pcre2_set_max_pattern_length.html">pcre2_set_max_pattern_length</a></td>
|
||||
<td> Set the maximum length of pattern</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_newline.html">pcre2_set_newline</a></td>
|
||||
<td> Set the newline convention</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_offset_limit.html">pcre2_set_offset_limit</a></td>
|
||||
<td> Set the offset limit</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_parens_nest_limit.html">pcre2_set_parens_nest_limit</a></td>
|
||||
<td> Set the parentheses nesting limit</td></tr>
|
||||
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>pcre2_set_max_pattern_length specification</title>
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||
<h1>pcre2_set_max_pattern_length man page</h1>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
||||
<p>
|
||||
This page is part of the PCRE2 HTML documentation. It was generated
|
||||
automatically from the original man page. If there is any nonsense in it,
|
||||
please consult the man page, in case the conversion went wrong.
|
||||
<br>
|
||||
<br><b>
|
||||
SYNOPSIS
|
||||
</b><br>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
</P>
|
||||
<P>
|
||||
<b>int pcre2_set_max_pattern_length(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||
<b> PCRE2_SIZE <i>value</i>);</b>
|
||||
</P>
|
||||
<br><b>
|
||||
DESCRIPTION
|
||||
</b><br>
|
||||
<P>
|
||||
This function sets, in a compile context, the maximum length (in code units) of
|
||||
the pattern that can be compiled. The result is always zero.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||
page and a description of the POSIX API in the
|
||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||
page.
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
</p>
|
|
@ -143,6 +143,10 @@ document for an overview of all the PCRE2 documentation.
|
|||
<b> const unsigned char *<i>tables</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre2_set_max_pattern_length(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||
<b> PCRE2_SIZE <i>value</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
<b>int pcre2_set_newline(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||
<b> uint32_t <i>value</i>);</b>
|
||||
<br>
|
||||
|
@ -614,6 +618,7 @@ of the following compile-time parameters:
|
|||
PCRE2's character tables
|
||||
The newline character sequence
|
||||
The compile time nested parentheses limit
|
||||
The maximum length of the pattern string
|
||||
An external function for stack checking
|
||||
</pre>
|
||||
A compile context is also required if you are using custom memory management.
|
||||
|
@ -652,6 +657,15 @@ interpreted matching functions, <i>pcre2_match()</i> and
|
|||
The value must be the result of a call to <i>pcre2_maketables()</i>, whose only
|
||||
argument is a general context. This function builds a set of character tables
|
||||
in the current locale.
|
||||
<b>int pcre2_set_max_pattern_length(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||
<b> PCRE2_SIZE <i>value</i>);</b>
|
||||
<br>
|
||||
<br>
|
||||
This sets a maximum length, in code units, for the pattern string that is to be
|
||||
compiled. If the pattern is longer, an error is generated. This facility is
|
||||
provided so that applications that accept patterns from external sources can
|
||||
limit their size. The default is the largest number that a PCRE2_SIZE variable
|
||||
can hold, which is effectively unlimited.
|
||||
<b>int pcre2_set_newline(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||
<b> uint32_t <i>value</i>);</b>
|
||||
<br>
|
||||
|
@ -2622,7 +2636,9 @@ same number causes an error at compile time.
|
|||
This function calls <b>pcre2_match()</b> and then makes a copy of the subject
|
||||
string in <i>outputbuffer</i>, replacing the part that was matched with the
|
||||
<i>replacement</i> string, whose length is supplied in <b>rlength</b>. This can
|
||||
be given as PCRE2_ZERO_TERMINATED for a zero-terminated string.
|
||||
be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. Matches in
|
||||
which a \K item in a lookahead in the pattern causes the match to end before
|
||||
it starts are not supported, and give rise to an error return.
|
||||
</P>
|
||||
<P>
|
||||
The first seven arguments of <b>pcre2_substitute()</b> are the same as for
|
||||
|
@ -2735,8 +2751,9 @@ are passed straight back. PCRE2_ERROR_NOMEMORY is returned if the output buffer
|
|||
is not big enough. PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax
|
||||
errors in the replacement string, with more particular errors being
|
||||
PCRE2_ERROR_BADREPESCAPE (invalid escape sequence),
|
||||
PCRE2_ERROR_REPMISSING_BRACE (closing curly bracket not found), and
|
||||
PCRE2_BADSUBSTITUTION (syntax error in extended group substitution). As for all
|
||||
PCRE2_ERROR_REPMISSING_BRACE (closing curly bracket not found),
|
||||
PCRE2_BADSUBSTITUTION (syntax error in extended group substitution), and
|
||||
PCRE2_BADSUBPATTERN (the pattern match ended before it started). As for all
|
||||
PCRE2 errors, a text message that describes the error can be obtained by
|
||||
calling <b>pcre2_get_error_message()</b>.
|
||||
</P>
|
||||
|
@ -3015,7 +3032,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC40" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 16 October 2015
|
||||
Last updated: 05 November 2015
|
||||
<br>
|
||||
Copyright © 1997-2015 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -32,6 +32,11 @@ However, the speed of execution is slower. In the 32-bit library, the internal
|
|||
linkage size is always 4.
|
||||
</P>
|
||||
<P>
|
||||
The maximum length of a source pattern string is essentially unlimited; it is
|
||||
the largest number a PCRE2_SIZE variable can hold. However, the program that
|
||||
calls <b>pcre2_compile()</b> can specify a smaller limit.
|
||||
</P>
|
||||
<P>
|
||||
The maximum length (in code units) of a subject string is one less than the
|
||||
largest number a PCRE2_SIZE variable can hold. PCRE2_SIZE is an unsigned
|
||||
integer type, usually defined as size_t. Its maximum value (that is
|
||||
|
@ -50,6 +55,9 @@ documentation.
|
|||
All values in repeating quantifiers must be less than 65536.
|
||||
</P>
|
||||
<P>
|
||||
The maximum length of a lookbehind assertion is 65535 characters.
|
||||
</P>
|
||||
<P>
|
||||
There is no limit to the number of parenthesized subpatterns, but there can be
|
||||
no more than 65535 capturing subpatterns. There is, however, a limit to the
|
||||
depth of nesting of parenthesized subpatterns of all kinds. This is imposed in
|
||||
|
@ -85,9 +93,9 @@ Cambridge, England.
|
|||
REVISION
|
||||
</b><br>
|
||||
<P>
|
||||
Last updated: 25 November 2014
|
||||
Last updated: 05 November 2015
|
||||
<br>
|
||||
Copyright © 1997-2014 University of Cambridge.
|
||||
Copyright © 1997-2015 University of Cambridge.
|
||||
<br>
|
||||
<p>
|
||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||
|
|
|
@ -2512,7 +2512,8 @@ For example:
|
|||
(?(VERSION>=10.4)yes|no)
|
||||
</pre>
|
||||
This pattern matches "yes" if the PCRE2 version is greater or equal to 10.4, or
|
||||
"no" otherwise.
|
||||
"no" otherwise. The fractional part of the version number may not contain more
|
||||
than two digits.
|
||||
</P>
|
||||
<br><b>
|
||||
Assertion conditions
|
||||
|
@ -3358,7 +3359,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC30" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 16 October 2015
|
||||
Last updated: 01 November 2015
|
||||
<br>
|
||||
Copyright © 1997-2015 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -266,9 +266,11 @@ header file, of which REG_NOMATCH is the "expected" failure code.
|
|||
The <b>regerror()</b> function maps a non-zero errorcode from either
|
||||
<b>regcomp()</b> or <b>regexec()</b> to a printable message. If <i>preg</i> is not
|
||||
NULL, the error should have arisen from the use of that structure. A message
|
||||
terminated by a binary zero is placed in <i>errbuf</i>. The length of the
|
||||
message, including the zero, is limited to <i>errbuf_size</i>. The yield of the
|
||||
function is the size of buffer needed to hold the whole message.
|
||||
terminated by a binary zero is placed in <i>errbuf</i>. If the buffer is too
|
||||
short, only the first <i>errbuf_size</i> - 1 characters of the error message are
|
||||
used. The yield of the function is the size of buffer needed to hold the whole
|
||||
message, including the terminating zero. This value is greater than
|
||||
<i>errbuf_size</i> if the message was truncated.
|
||||
</P>
|
||||
<br><a name="SEC7" href="#TOC1">MEMORY USAGE</a><br>
|
||||
<P>
|
||||
|
@ -287,7 +289,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC9" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 03 September 2015
|
||||
Last updated: 30 October 2015
|
||||
<br>
|
||||
Copyright © 1997-2015 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -41,12 +41,12 @@ If you are running an application that uses a large number of regular
|
|||
expression patterns, it may be useful to store them in a precompiled form
|
||||
instead of having to compile them every time the application is run. However,
|
||||
if you are using the just-in-time optimization feature, it is not possible to
|
||||
save and reload the JIT data, because it is position-dependent. In addition,
|
||||
the host on which the patterns are reloaded must be running the same version of
|
||||
PCRE2, with the same code unit width, and must also have the same endianness,
|
||||
pointer width and PCRE2_SIZE type. For example, patterns compiled on a 32-bit
|
||||
system using PCRE2's 16-bit library cannot be reloaded on a 64-bit system, nor
|
||||
can they be reloaded using the 8-bit library.
|
||||
save and reload the JIT data, because it is position-dependent. The host on
|
||||
which the patterns are reloaded must be running the same version of PCRE2, with
|
||||
the same code unit width, and must also have the same endianness, pointer width
|
||||
and PCRE2_SIZE type. For example, patterns compiled on a 32-bit system using
|
||||
PCRE2's 16-bit library cannot be reloaded on a 64-bit system, nor can they be
|
||||
reloaded using the 8-bit library.
|
||||
</P>
|
||||
<br><a name="SEC2" href="#TOC1">SAVING COMPILED PATTERNS</a><br>
|
||||
<P>
|
||||
|
@ -153,10 +153,15 @@ on a system with different endianness.
|
|||
</P>
|
||||
<P>
|
||||
Decoded patterns can be used for matching in the usual way, and must be freed
|
||||
by calling <b>pcre2_code_free()</b> as normal. A single copy of the character
|
||||
tables is used by all the decoded patterns. A reference count is used to
|
||||
by calling <b>pcre2_code_free()</b>. However, be aware that there is a potential
|
||||
race issue if you are using multiple patterns that were decoded from a single
|
||||
byte stream in a multithreaded application. A single copy of the character
|
||||
tables is used by all the decoded patterns and a reference count is used to
|
||||
arrange for its memory to be automatically freed when the last pattern is
|
||||
freed.
|
||||
freed, but there is no locking on this reference count. Therefore, if you want
|
||||
to call <b>pcre2_code_free()</b> for these patterns in different threads, you
|
||||
must arrange your own locking, and ensure that <b>pcre2_code_free()</b> cannot
|
||||
be called by two threads at the same time.
|
||||
</P>
|
||||
<P>
|
||||
If a pattern was processed by <b>pcre2_jit_compile()</b> before being
|
||||
|
@ -175,7 +180,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC5" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 20 January 2015
|
||||
Last updated: 03 November 2015
|
||||
<br>
|
||||
Copyright © 1997-2015 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -266,9 +266,9 @@ Each subject line is matched separately and independently. If you want to do
|
|||
multi-line matches, you have to use the \n escape sequence (or \r or \r\n,
|
||||
etc., depending on the newline setting) in a single line of input to encode the
|
||||
newline sequences. There is no limit on the length of subject lines; the input
|
||||
buffer is automatically extended if it is too small. There is a replication
|
||||
feature that makes it possible to generate long subject lines without having to
|
||||
supply them explicitly.
|
||||
buffer is automatically extended if it is too small. There are replication
|
||||
features that makes it possible to generate long repetitive pattern or subject
|
||||
lines without having to supply them explicitly.
|
||||
</P>
|
||||
<P>
|
||||
An empty line or the end of the file signals the end of the subject lines for a
|
||||
|
@ -500,10 +500,10 @@ a real empty line terminates the data input.
|
|||
</P>
|
||||
<br><a name="SEC10" href="#TOC1">PATTERN MODIFIERS</a><br>
|
||||
<P>
|
||||
There are three types of modifier that can appear in pattern lines, two of
|
||||
which may also be used in a <b>#pattern</b> command. A pattern's modifier list
|
||||
can add to or override default modifiers that were set by a previous
|
||||
<b>#pattern</b> command.
|
||||
There are several types of modifier that can appear in pattern lines. Except
|
||||
where noted below, they may also be used in <b>#pattern</b> commands. A
|
||||
pattern's modifier list can add to or override default modifiers that were set
|
||||
by a previous <b>#pattern</b> command.
|
||||
<a name="optionmodifiers"></a></P>
|
||||
<br><b>
|
||||
Setting compilation options
|
||||
|
@ -564,6 +564,7 @@ about the pattern:
|
|||
jitfast use JIT fast path
|
||||
jitverify verify JIT use
|
||||
locale=<name> use this locale
|
||||
max_pattern_length=<n> set the maximum pattern length
|
||||
memory show memory used
|
||||
newline=<type> set newline type
|
||||
null_context compile with a NULL context
|
||||
|
@ -670,6 +671,34 @@ PCRE2_ZERO_TERMINATED. However, for patterns specified in hexadecimal, the
|
|||
actual length of the pattern is passed.
|
||||
</P>
|
||||
<br><b>
|
||||
Generating long repetitive patterns
|
||||
</b><br>
|
||||
<P>
|
||||
Some tests use long patterns that are very repetitive. Instead of creating a
|
||||
very long input line for such a pattern, you can use a special repetition
|
||||
feature, similar to the one described for subject lines above. If the
|
||||
<b>expand</b> modifier is present on a pattern, parts of the pattern that have
|
||||
the form
|
||||
<pre>
|
||||
\[<characters>]{<count>}
|
||||
</pre>
|
||||
are expanded before the pattern is passed to <b>pcre2_compile()</b>. For
|
||||
example, \[AB]{6000} is expanded to "ABAB..." 6000 times. This construction
|
||||
cannot be nested. An initial "\[" sequence is recognized only if "]{" followed
|
||||
by decimal digits and "}" is found later in the pattern. If not, the characters
|
||||
remain in the pattern unaltered.
|
||||
</P>
|
||||
<P>
|
||||
If part of an expanded pattern looks like an expansion, but is really part of
|
||||
the actual pattern, unwanted expansion can be avoided by giving two values in
|
||||
the quantifier. For example, \[AB]{6000,6000} is not recognized as an
|
||||
expansion item.
|
||||
</P>
|
||||
<P>
|
||||
If the <b>info</b> modifier is set on an expanded pattern, the result of the
|
||||
expansion is included in the information that is output.
|
||||
</P>
|
||||
<br><b>
|
||||
JIT compilation
|
||||
</b><br>
|
||||
<P>
|
||||
|
@ -780,6 +809,15 @@ sets its own default of 220, which is required for running the standard test
|
|||
suite.
|
||||
</P>
|
||||
<br><b>
|
||||
Limiting the pattern length
|
||||
</b><br>
|
||||
<P>
|
||||
The <b>max_pattern_length</b> modifier sets a limit, in code units, to the
|
||||
length of pattern that <b>pcre2_compile()</b> will accept. Breaching the limit
|
||||
causes a compilation error. The default is the largest number a PCRE2_SIZE
|
||||
variable can hold (essentially unlimited).
|
||||
</P>
|
||||
<br><b>
|
||||
Using the POSIX wrapper API
|
||||
</b><br>
|
||||
<P>
|
||||
|
@ -798,6 +836,16 @@ modifiers set options for the <b>regcomp()</b> function:
|
|||
ucp REG_UCP ) the POSIX standard
|
||||
utf REG_UTF8 )
|
||||
</pre>
|
||||
The <b>regerror_buffsize</b> modifier specifies a size for the error buffer that
|
||||
is passed to <b>regerror()</b> in the event of a compilation error. For example:
|
||||
<pre>
|
||||
/abc/posix,regerror_buffsize=20
|
||||
</pre>
|
||||
This provides a means of testing the behaviour of <b>regerror()</b> when the
|
||||
buffer is too small for the error message. If this modifier has not been set, a
|
||||
large buffer is used.
|
||||
</P>
|
||||
<P>
|
||||
The <b>aftertext</b> and <b>allaftertext</b> subject modifiers work as described
|
||||
below. All other modifiers cause an error.
|
||||
</P>
|
||||
|
@ -840,8 +888,9 @@ Setting certain match controls
|
|||
<P>
|
||||
The following modifiers are really subject modifiers, and are described below.
|
||||
However, they may be included in a pattern's modifier list, in which case they
|
||||
are applied to every subject line that is processed with that pattern. They do
|
||||
not affect the compilation process.
|
||||
are applied to every subject line that is processed with that pattern. They may
|
||||
not appear in <b>#pattern</b> commands. These modifiers do not affect the
|
||||
compilation process.
|
||||
<pre>
|
||||
aftertext show text after match
|
||||
allaftertext show text after captures
|
||||
|
@ -1574,7 +1623,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 17 October 2015
|
||||
Last updated: 05 November 2015
|
||||
<br>
|
||||
Copyright © 1997-2015 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -210,12 +210,15 @@ in the library.
|
|||
<tr><td><a href="pcre2_set_match_limit.html">pcre2_set_match_limit</a></td>
|
||||
<td> Set the match limit</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_offset_limit.html">pcre2_set_offset_limit</a></td>
|
||||
<td> Set the offset limit</td></tr>
|
||||
<tr><td><a href="pcre2_set_max_pattern_length.html">pcre2_set_max_pattern_length</a></td>
|
||||
<td> Set the maximum length of pattern</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_newline.html">pcre2_set_newline</a></td>
|
||||
<td> Set the newline convention</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_offset_limit.html">pcre2_set_offset_limit</a></td>
|
||||
<td> Set the offset limit</td></tr>
|
||||
|
||||
<tr><td><a href="pcre2_set_parens_nest_limit.html">pcre2_set_parens_nest_limit</a></td>
|
||||
<td> Set the parentheses nesting limit</td></tr>
|
||||
|
||||
|
|
|
@ -252,6 +252,9 @@ PCRE2 NATIVE API COMPILE CONTEXT FUNCTIONS
|
|||
int pcre2_set_character_tables(pcre2_compile_context *ccontext,
|
||||
const unsigned char *tables);
|
||||
|
||||
int pcre2_set_max_pattern_length(pcre2_compile_context *ccontext,
|
||||
PCRE2_SIZE value);
|
||||
|
||||
int pcre2_set_newline(pcre2_compile_context *ccontext,
|
||||
uint32_t value);
|
||||
|
||||
|
@ -678,6 +681,7 @@ PCRE2 CONTEXTS
|
|||
PCRE2's character tables
|
||||
The newline character sequence
|
||||
The compile time nested parentheses limit
|
||||
The maximum length of the pattern string
|
||||
An external function for stack checking
|
||||
|
||||
A compile context is also required if you are using custom memory man-
|
||||
|
@ -715,6 +719,16 @@ PCRE2 CONTEXTS
|
|||
only argument is a general context. This function builds a set of char-
|
||||
acter tables in the current locale.
|
||||
|
||||
int pcre2_set_max_pattern_length(pcre2_compile_context *ccontext,
|
||||
PCRE2_SIZE value);
|
||||
|
||||
This sets a maximum length, in code units, for the pattern string that
|
||||
is to be compiled. If the pattern is longer, an error is generated.
|
||||
This facility is provided so that applications that accept patterns
|
||||
from external sources can limit their size. The default is the largest
|
||||
number that a PCRE2_SIZE variable can hold, which is effectively unlim-
|
||||
ited.
|
||||
|
||||
int pcre2_set_newline(pcre2_compile_context *ccontext,
|
||||
uint32_t value);
|
||||
|
||||
|
@ -2581,7 +2595,9 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
|
|||
This function calls pcre2_match() and then makes a copy of the subject
|
||||
string in outputbuffer, replacing the part that was matched with the
|
||||
replacement string, whose length is supplied in rlength. This can be
|
||||
given as PCRE2_ZERO_TERMINATED for a zero-terminated string.
|
||||
given as PCRE2_ZERO_TERMINATED for a zero-terminated string. Matches in
|
||||
which a \K item in a lookahead in the pattern causes the match to end
|
||||
before it starts are not supported, and give rise to an error return.
|
||||
|
||||
The first seven arguments of pcre2_substitute() are the same as for
|
||||
pcre2_match(), except that the partial matching options are not permit-
|
||||
|
@ -2693,10 +2709,11 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
|
|||
PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax errors in
|
||||
the replacement string, with more particular errors being
|
||||
PCRE2_ERROR_BADREPESCAPE (invalid escape sequence), PCRE2_ERROR_REP-
|
||||
MISSING_BRACE (closing curly bracket not found), and PCRE2_BADSUBSTITU-
|
||||
TION (syntax error in extended group substitution). As for all PCRE2
|
||||
errors, a text message that describes the error can be obtained by
|
||||
calling pcre2_get_error_message().
|
||||
MISSING_BRACE (closing curly bracket not found), PCRE2_BADSUBSTITUTION
|
||||
(syntax error in extended group substitution), and PCRE2_BADSUBPATTERN
|
||||
(the pattern match ended before it started). As for all PCRE2 errors, a
|
||||
text message that describes the error can be obtained by calling
|
||||
pcre2_get_error_message().
|
||||
|
||||
|
||||
DUPLICATE SUBPATTERN NAMES
|
||||
|
@ -2957,7 +2974,7 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 16 October 2015
|
||||
Last updated: 05 November 2015
|
||||
Copyright (c) 1997-2015 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
@ -4405,6 +4422,10 @@ SIZE AND OTHER LIMITATIONS
|
|||
of execution is slower. In the 32-bit library, the internal linkage
|
||||
size is always 4.
|
||||
|
||||
The maximum length of a source pattern string is essentially unlimited;
|
||||
it is the largest number a PCRE2_SIZE variable can hold. However, the
|
||||
program that calls pcre2_compile() can specify a smaller limit.
|
||||
|
||||
The maximum length (in code units) of a subject string is one less than
|
||||
the largest number a PCRE2_SIZE variable can hold. PCRE2_SIZE is an
|
||||
unsigned integer type, usually defined as size_t. Its maximum value
|
||||
|
@ -4419,6 +4440,8 @@ SIZE AND OTHER LIMITATIONS
|
|||
|
||||
All values in repeating quantifiers must be less than 65536.
|
||||
|
||||
The maximum length of a lookbehind assertion is 65535 characters.
|
||||
|
||||
There is no limit to the number of parenthesized subpatterns, but there
|
||||
can be no more than 65535 capturing subpatterns. There is, however, a
|
||||
limit to the depth of nesting of parenthesized subpatterns of all
|
||||
|
@ -4449,8 +4472,8 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 25 November 2014
|
||||
Copyright (c) 1997-2014 University of Cambridge.
|
||||
Last updated: 05 November 2015
|
||||
Copyright (c) 1997-2015 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
||||
|
@ -7310,7 +7333,8 @@ CONDITIONAL SUBPATTERNS
|
|||
(?(VERSION>=10.4)yes|no)
|
||||
|
||||
This pattern matches "yes" if the PCRE2 version is greater or equal to
|
||||
10.4, or "no" otherwise.
|
||||
10.4, or "no" otherwise. The fractional part of the version number may
|
||||
not contain more than two digits.
|
||||
|
||||
Assertion conditions
|
||||
|
||||
|
@ -8117,7 +8141,7 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 16 October 2015
|
||||
Last updated: 01 November 2015
|
||||
Copyright (c) 1997-2015 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
@ -8539,9 +8563,11 @@ ERROR MESSAGES
|
|||
The regerror() function maps a non-zero errorcode from either regcomp()
|
||||
or regexec() to a printable message. If preg is not NULL, the error
|
||||
should have arisen from the use of that structure. A message terminated
|
||||
by a binary zero is placed in errbuf. The length of the message,
|
||||
including the zero, is limited to errbuf_size. The yield of the func-
|
||||
tion is the size of buffer needed to hold the whole message.
|
||||
by a binary zero is placed in errbuf. If the buffer is too short, only
|
||||
the first errbuf_size - 1 characters of the error message are used. The
|
||||
yield of the function is the size of buffer needed to hold the whole
|
||||
message, including the terminating zero. This value is greater than
|
||||
errbuf_size if the message was truncated.
|
||||
|
||||
|
||||
MEMORY USAGE
|
||||
|
@ -8561,7 +8587,7 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 03 September 2015
|
||||
Last updated: 30 October 2015
|
||||
Copyright (c) 1997-2015 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
@ -8673,12 +8699,12 @@ SAVING AND RE-USING PRECOMPILED PCRE2 PATTERNS
|
|||
form instead of having to compile them every time the application is
|
||||
run. However, if you are using the just-in-time optimization feature,
|
||||
it is not possible to save and reload the JIT data, because it is posi-
|
||||
tion-dependent. In addition, the host on which the patterns are
|
||||
reloaded must be running the same version of PCRE2, with the same code
|
||||
unit width, and must also have the same endianness, pointer width and
|
||||
PCRE2_SIZE type. For example, patterns compiled on a 32-bit system
|
||||
using PCRE2's 16-bit library cannot be reloaded on a 64-bit system, nor
|
||||
can they be reloaded using the 8-bit library.
|
||||
tion-dependent. The host on which the patterns are reloaded must be
|
||||
running the same version of PCRE2, with the same code unit width, and
|
||||
must also have the same endianness, pointer width and PCRE2_SIZE type.
|
||||
For example, patterns compiled on a 32-bit system using PCRE2's 16-bit
|
||||
library cannot be reloaded on a 64-bit system, nor can they be reloaded
|
||||
using the 8-bit library.
|
||||
|
||||
|
||||
SAVING COMPILED PATTERNS
|
||||
|
@ -8784,10 +8810,16 @@ RE-USING PRECOMPILED PATTERNS
|
|||
compiled on a system with different endianness.
|
||||
|
||||
Decoded patterns can be used for matching in the usual way, and must be
|
||||
freed by calling pcre2_code_free() as normal. A single copy of the
|
||||
character tables is used by all the decoded patterns. A reference count
|
||||
is used to arrange for its memory to be automatically freed when the
|
||||
last pattern is freed.
|
||||
freed by calling pcre2_code_free(). However, be aware that there is a
|
||||
potential race issue if you are using multiple patterns that were
|
||||
decoded from a single byte stream in a multithreaded application. A
|
||||
single copy of the character tables is used by all the decoded patterns
|
||||
and a reference count is used to arrange for its memory to be automati-
|
||||
cally freed when the last pattern is freed, but there is no locking on
|
||||
this reference count. Therefore, if you want to call pcre2_code_free()
|
||||
for these patterns in different threads, you must arrange your own
|
||||
locking, and ensure that pcre2_code_free() cannot be called by two
|
||||
threads at the same time.
|
||||
|
||||
If a pattern was processed by pcre2_jit_compile() before being serial-
|
||||
ized, the JIT data is discarded and so is no longer available after a
|
||||
|
@ -8804,7 +8836,7 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 20 January 2015
|
||||
Last updated: 03 November 2015
|
||||
Copyright (c) 1997-2015 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
.TH PCRE2_SET_MAX_PATTERN_LENGTH 3 "05 November 2015" "PCRE2 10.21"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
.rs
|
||||
.sp
|
||||
.B #include <pcre2.h>
|
||||
.PP
|
||||
.nf
|
||||
.B int pcre2_set_max_pattern_length(pcre2_compile_context *\fIccontext\fP,
|
||||
.B " PCRE2_SIZE \fIvalue\fP);"
|
||||
.fi
|
||||
.
|
||||
.SH DESCRIPTION
|
||||
.rs
|
||||
.sp
|
||||
This function sets, in a compile context, the maximum length (in code units) of
|
||||
the pattern that can be compiled. The result is always zero.
|
||||
.P
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
.\" HREF
|
||||
\fBpcre2api\fP
|
||||
.\"
|
||||
page and a description of the POSIX API in the
|
||||
.\" HREF
|
||||
\fBpcre2posix\fP
|
||||
.\"
|
||||
page.
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2API 3 "03 November 2015" "PCRE2 10.21"
|
||||
.TH PCRE2API 3 "05 November 2015" "PCRE2 10.21"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.sp
|
||||
|
@ -90,6 +90,9 @@ document for an overview of all the PCRE2 documentation.
|
|||
.B int pcre2_set_character_tables(pcre2_compile_context *\fIccontext\fP,
|
||||
.B " const unsigned char *\fItables\fP);"
|
||||
.sp
|
||||
.B int pcre2_set_max_pattern_length(pcre2_compile_context *\fIccontext\fP,
|
||||
.B " PCRE2_SIZE \fIvalue\fP);"
|
||||
.sp
|
||||
.B int pcre2_set_newline(pcre2_compile_context *\fIccontext\fP,
|
||||
.B " uint32_t \fIvalue\fP);"
|
||||
.sp
|
||||
|
@ -567,6 +570,7 @@ of the following compile-time parameters:
|
|||
PCRE2's character tables
|
||||
The newline character sequence
|
||||
The compile time nested parentheses limit
|
||||
The maximum length of the pattern string
|
||||
An external function for stack checking
|
||||
.sp
|
||||
A compile context is also required if you are using custom memory management.
|
||||
|
@ -610,6 +614,17 @@ argument is a general context. This function builds a set of character tables
|
|||
in the current locale.
|
||||
.sp
|
||||
.nf
|
||||
.B int pcre2_set_max_pattern_length(pcre2_compile_context *\fIccontext\fP,
|
||||
.B " PCRE2_SIZE \fIvalue\fP);"
|
||||
.fi
|
||||
.sp
|
||||
This sets a maximum length, in code units, for the pattern string that is to be
|
||||
compiled. If the pattern is longer, an error is generated. This facility is
|
||||
provided so that applications that accept patterns from external sources can
|
||||
limit their size. The default is the largest number that a PCRE2_SIZE variable
|
||||
can hold, which is effectively unlimited.
|
||||
.sp
|
||||
.nf
|
||||
.B int pcre2_set_newline(pcre2_compile_context *\fIccontext\fP,
|
||||
.B " uint32_t \fIvalue\fP);"
|
||||
.fi
|
||||
|
@ -3069,6 +3084,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 03 November 2015
|
||||
Last updated: 05 November 2015
|
||||
Copyright (c) 1997-2015 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2LIMITS 3 "03 November 2015" "PCRE2 10.21"
|
||||
.TH PCRE2LIMITS 3 "05 November 2015" "PCRE2 10.21"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH "SIZE AND OTHER LIMITATIONS"
|
||||
|
@ -20,6 +20,10 @@ documentation for details. In these cases the limit is substantially larger.
|
|||
However, the speed of execution is slower. In the 32-bit library, the internal
|
||||
linkage size is always 4.
|
||||
.P
|
||||
The maximum length of a source pattern string is essentially unlimited; it is
|
||||
the largest number a PCRE2_SIZE variable can hold. However, the program that
|
||||
calls \fBpcre2_compile()\fP can specify a smaller limit.
|
||||
.P
|
||||
The maximum length (in code units) of a subject string is one less than the
|
||||
largest number a PCRE2_SIZE variable can hold. PCRE2_SIZE is an unsigned
|
||||
integer type, usually defined as size_t. Its maximum value (that is
|
||||
|
@ -71,6 +75,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 03 November 2015
|
||||
Last updated: 05 November 2015
|
||||
Copyright (c) 1997-2015 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2TEST 1 "30 October 2015" "PCRE 10.21"
|
||||
.TH PCRE2TEST 1 "05 November 2015" "PCRE 10.21"
|
||||
.SH NAME
|
||||
pcre2test - a program for testing Perl-compatible regular expressions.
|
||||
.SH SYNOPSIS
|
||||
|
@ -528,6 +528,7 @@ about the pattern:
|
|||
jitfast use JIT fast path
|
||||
jitverify verify JIT use
|
||||
locale=<name> use this locale
|
||||
max_pattern_length=<n> set the maximum pattern length
|
||||
memory show memory used
|
||||
newline=<type> set newline type
|
||||
null_context compile with a NULL context
|
||||
|
@ -767,6 +768,15 @@ sets its own default of 220, which is required for running the standard test
|
|||
suite.
|
||||
.
|
||||
.
|
||||
.SS "Limiting the pattern length"
|
||||
.rs
|
||||
.sp
|
||||
The \fBmax_pattern_length\fP modifier sets a limit, in code units, to the
|
||||
length of pattern that \fBpcre2_compile()\fP will accept. Breaching the limit
|
||||
causes a compilation error. The default is the largest number a PCRE2_SIZE
|
||||
variable can hold (essentially unlimited).
|
||||
.
|
||||
.
|
||||
.SS "Using the POSIX wrapper API"
|
||||
.rs
|
||||
.sp
|
||||
|
@ -1596,6 +1606,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 30 October 2015
|
||||
Last updated: 05 November 2015
|
||||
Copyright (c) 1997-2015 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -209,8 +209,9 @@ DESCRIPTION
|
|||
or \r\n, etc., depending on the newline setting) in a single line of
|
||||
input to encode the newline sequences. There is no limit on the length
|
||||
of subject lines; the input buffer is automatically extended if it is
|
||||
too small. There is a replication feature that makes it possible to
|
||||
generate long subject lines without having to supply them explicitly.
|
||||
too small. There are replication features that makes it possible to
|
||||
generate long repetitive pattern or subject lines without having to
|
||||
supply them explicitly.
|
||||
|
||||
An empty line or the end of the file signals the end of the subject
|
||||
lines for a test, at which point a new pattern or command line is
|
||||
|
@ -450,10 +451,10 @@ SUBJECT LINE SYNTAX
|
|||
|
||||
PATTERN MODIFIERS
|
||||
|
||||
There are three types of modifier that can appear in pattern lines, two
|
||||
of which may also be used in a #pattern command. A pattern's modifier
|
||||
list can add to or override default modifiers that were set by a previ-
|
||||
ous #pattern command.
|
||||
There are several types of modifier that can appear in pattern lines.
|
||||
Except where noted below, they may also be used in #pattern commands. A
|
||||
pattern's modifier list can add to or override default modifiers that
|
||||
were set by a previous #pattern command.
|
||||
|
||||
Setting compilation options
|
||||
|
||||
|
@ -509,6 +510,7 @@ PATTERN MODIFIERS
|
|||
jitfast use JIT fast path
|
||||
jitverify verify JIT use
|
||||
locale=<name> use this locale
|
||||
max_pattern_length=<n> set the maximum pattern length
|
||||
memory show memory used
|
||||
newline=<type> set newline type
|
||||
null_context compile with a NULL context
|
||||
|
@ -603,6 +605,30 @@ PATTERN MODIFIERS
|
|||
the length as PCRE2_ZERO_TERMINATED. However, for patterns specified in
|
||||
hexadecimal, the actual length of the pattern is passed.
|
||||
|
||||
Generating long repetitive patterns
|
||||
|
||||
Some tests use long patterns that are very repetitive. Instead of cre-
|
||||
ating a very long input line for such a pattern, you can use a special
|
||||
repetition feature, similar to the one described for subject lines
|
||||
above. If the expand modifier is present on a pattern, parts of the
|
||||
pattern that have the form
|
||||
|
||||
\[<characters>]{<count>}
|
||||
|
||||
are expanded before the pattern is passed to pcre2_compile(). For exam-
|
||||
ple, \[AB]{6000} is expanded to "ABAB..." 6000 times. This construction
|
||||
cannot be nested. An initial "\[" sequence is recognized only if "]{"
|
||||
followed by decimal digits and "}" is found later in the pattern. If
|
||||
not, the characters remain in the pattern unaltered.
|
||||
|
||||
If part of an expanded pattern looks like an expansion, but is really
|
||||
part of the actual pattern, unwanted expansion can be avoided by giving
|
||||
two values in the quantifier. For example, \[AB]{6000,6000} is not rec-
|
||||
ognized as an expansion item.
|
||||
|
||||
If the info modifier is set on an expanded pattern, the result of the
|
||||
expansion is included in the information that is output.
|
||||
|
||||
JIT compilation
|
||||
|
||||
Just-in-time (JIT) compiling is a heavyweight optimization that can
|
||||
|
@ -697,6 +723,13 @@ PATTERN MODIFIERS
|
|||
pcre2test sets its own default of 220, which is required for running
|
||||
the standard test suite.
|
||||
|
||||
Limiting the pattern length
|
||||
|
||||
The max_pattern_length modifier sets a limit, in code units, to the
|
||||
length of pattern that pcre2_compile() will accept. Breaching the limit
|
||||
causes a compilation error. The default is the largest number a
|
||||
PCRE2_SIZE variable can hold (essentially unlimited).
|
||||
|
||||
Using the POSIX wrapper API
|
||||
|
||||
The /posix modifier causes pcre2test to call PCRE2 via the POSIX wrap-
|
||||
|
@ -714,6 +747,16 @@ PATTERN MODIFIERS
|
|||
ucp REG_UCP ) the POSIX standard
|
||||
utf REG_UTF8 )
|
||||
|
||||
The regerror_buffsize modifier specifies a size for the error buffer
|
||||
that is passed to regerror() in the event of a compilation error. For
|
||||
example:
|
||||
|
||||
/abc/posix,regerror_buffsize=20
|
||||
|
||||
This provides a means of testing the behaviour of regerror() when the
|
||||
buffer is too small for the error message. If this modifier has not
|
||||
been set, a large buffer is used.
|
||||
|
||||
The aftertext and allaftertext subject modifiers work as described
|
||||
below. All other modifiers cause an error.
|
||||
|
||||
|
@ -751,7 +794,8 @@ PATTERN MODIFIERS
|
|||
The following modifiers are really subject modifiers, and are described
|
||||
below. However, they may be included in a pattern's modifier list, in
|
||||
which case they are applied to every subject line that is processed
|
||||
with that pattern. They do not affect the compilation process.
|
||||
with that pattern. They may not appear in #pattern commands. These mod-
|
||||
ifiers do not affect the compilation process.
|
||||
|
||||
aftertext show text after match
|
||||
allaftertext show text after captures
|
||||
|
@ -1427,5 +1471,5 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 17 October 2015
|
||||
Last updated: 05 November 2015
|
||||
Copyright (c) 1997-2015 University of Cambridge.
|
||||
|
|
|
@ -396,6 +396,8 @@ PCRE2_EXP_DECL void pcre2_compile_context_free(pcre2_compile_context *); \
|
|||
PCRE2_EXP_DECL int pcre2_set_bsr(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_character_tables(pcre2_compile_context *, \
|
||||
const unsigned char *); \
|
||||
PCRE2_EXP_DECL int pcre2_set_max_pattern_length(pcre2_compile_context *, \
|
||||
PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int pcre2_set_newline(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_parens_nest_limit(pcre2_compile_context *, \
|
||||
uint32_t); \
|
||||
|
@ -616,6 +618,7 @@ pcre2_compile are called by application code. */
|
|||
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
|
||||
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
|
||||
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
|
||||
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
|
||||
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
|
||||
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
|
||||
#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_)
|
||||
|
|
|
@ -396,6 +396,8 @@ PCRE2_EXP_DECL void pcre2_compile_context_free(pcre2_compile_context *); \
|
|||
PCRE2_EXP_DECL int pcre2_set_bsr(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_character_tables(pcre2_compile_context *, \
|
||||
const unsigned char *); \
|
||||
PCRE2_EXP_DECL int pcre2_set_max_pattern_length(pcre2_compile_context *, \
|
||||
PCRE2_SIZE); \
|
||||
PCRE2_EXP_DECL int pcre2_set_newline(pcre2_compile_context *, uint32_t); \
|
||||
PCRE2_EXP_DECL int pcre2_set_parens_nest_limit(pcre2_compile_context *, \
|
||||
uint32_t); \
|
||||
|
@ -616,6 +618,7 @@ pcre2_compile are called by application code. */
|
|||
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
|
||||
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
|
||||
#define pcre2_set_match_limit PCRE2_SUFFIX(pcre2_set_match_limit_)
|
||||
#define pcre2_set_max_pattern_length PCRE2_SUFFIX(pcre2_set_max_pattern_length_)
|
||||
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
|
||||
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
|
||||
#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_)
|
||||
|
|
|
@ -583,7 +583,7 @@ enum { ERR0 = COMPILE_ERROR_BASE,
|
|||
ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60,
|
||||
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
|
||||
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
|
||||
ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87 };
|
||||
ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88 };
|
||||
|
||||
/* Error codes that correspond to negative error codes returned by
|
||||
find_fixedlength(). */
|
||||
|
@ -8128,10 +8128,24 @@ if (ccontext == NULL)
|
|||
/* A zero-terminated pattern is indicated by the special length value
|
||||
PCRE2_ZERO_TERMINATED. Otherwise, we make a copy of the pattern and add a zero,
|
||||
to ensure that it is always possible to look one code unit beyond the end of
|
||||
the pattern's characters. */
|
||||
the pattern's characters. In both cases, check that the pattern is overlong. */
|
||||
|
||||
if (patlen == PCRE2_ZERO_TERMINATED) patlen = PRIV(strlen)(pattern); else
|
||||
if (patlen == PCRE2_ZERO_TERMINATED)
|
||||
{
|
||||
patlen = PRIV(strlen)(pattern);
|
||||
if (patlen > ccontext->max_pattern_length)
|
||||
{
|
||||
*errorptr = ERR88;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (patlen > ccontext->max_pattern_length)
|
||||
{
|
||||
*errorptr = ERR88;
|
||||
return NULL;
|
||||
}
|
||||
if (patlen < COPIED_PATTERN_SIZE)
|
||||
copied_pattern = stack_copied_pattern;
|
||||
else
|
||||
|
|
|
@ -131,13 +131,14 @@ return gcontext;
|
|||
when no context is supplied to the compile function. */
|
||||
|
||||
const pcre2_compile_context PRIV(default_compile_context) = {
|
||||
{ default_malloc, default_free, NULL },
|
||||
NULL,
|
||||
NULL,
|
||||
PRIV(default_tables),
|
||||
BSR_DEFAULT,
|
||||
NEWLINE_DEFAULT,
|
||||
PARENS_NEST_LIMIT };
|
||||
{ default_malloc, default_free, NULL }, /* Default memory handling */
|
||||
NULL, /* Stack guard */
|
||||
NULL, /* Stack guard data */
|
||||
PRIV(default_tables), /* Character tables */
|
||||
PCRE2_UNSET, /* Max pattern length */
|
||||
BSR_DEFAULT, /* Backslash R default */
|
||||
NEWLINE_DEFAULT, /* Newline convention */
|
||||
PARENS_NEST_LIMIT }; /* As it says */
|
||||
|
||||
/* The create function copies the default into the new memory, but must
|
||||
override the default memory handling functions if a gcontext was provided. */
|
||||
|
@ -295,6 +296,13 @@ switch(value)
|
|||
}
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_max_pattern_length(pcre2_compile_context *ccontext, PCRE2_SIZE length)
|
||||
{
|
||||
ccontext->max_pattern_length = length;
|
||||
return 0;
|
||||
}
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_set_newline(pcre2_compile_context *ccontext, uint32_t newline)
|
||||
{
|
||||
|
|
|
@ -172,6 +172,7 @@ static const char compile_error_texts[] =
|
|||
"using \\C is disabled in this PCRE2 library\0"
|
||||
"regular expression is too complicated\0"
|
||||
"lookbehind assertion is too long\0"
|
||||
"pattern string is longer than the limit set by the application\0"
|
||||
;
|
||||
|
||||
/* Match-time and UTF error texts are in the same format. */
|
||||
|
|
|
@ -562,6 +562,7 @@ typedef struct pcre2_real_compile_context {
|
|||
int (*stack_guard)(uint32_t, void *);
|
||||
void *stack_guard_data;
|
||||
const uint8_t *tables;
|
||||
PCRE2_SIZE max_pattern_length;
|
||||
uint16_t bsr_convention;
|
||||
uint16_t newline_convention;
|
||||
uint32_t parens_nest_limit;
|
||||
|
|
|
@ -540,6 +540,7 @@ static modstruct modlist[] = {
|
|||
{ "mark", MOD_PNDP, MOD_CTL, CTL_MARK, PO(control) },
|
||||
{ "match_limit", MOD_CTM, MOD_INT, 0, MO(match_limit) },
|
||||
{ "match_unset_backref", MOD_PAT, MOD_OPT, PCRE2_MATCH_UNSET_BACKREF, PO(options) },
|
||||
{ "max_pattern_length", MOD_CTC, MOD_SIZ, 0, CO(max_pattern_length) },
|
||||
{ "memory", MOD_PD, MOD_CTL, CTL_MEMORY, PD(control) },
|
||||
{ "multiline", MOD_PATP, MOD_OPT, PCRE2_MULTILINE, PO(options) },
|
||||
{ "never_backslash_c", MOD_PAT, MOD_OPT, PCRE2_NEVER_BACKSLASH_C, PO(options) },
|
||||
|
@ -1094,6 +1095,14 @@ are supported. */
|
|||
else \
|
||||
pcre2_set_match_limit_32(G(a,32),b)
|
||||
|
||||
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
|
||||
if (test_mode == PCRE8_MODE) \
|
||||
pcre2_set_max_pattern_length_8(G(a,8),b); \
|
||||
else if (test_mode == PCRE16_MODE) \
|
||||
pcre2_set_max_pattern_length_16(G(a,16),b); \
|
||||
else \
|
||||
pcre2_set_max_pattern_length_32(G(a,32),b)
|
||||
|
||||
#define PCRE2_SET_OFFSET_LIMIT(a,b) \
|
||||
if (test_mode == PCRE8_MODE) \
|
||||
pcre2_set_offset_limit_8(G(a,8),b); \
|
||||
|
@ -1502,6 +1511,12 @@ the three different cases. */
|
|||
else \
|
||||
G(pcre2_set_match_limit_,BITTWO)(G(a,BITTWO),b)
|
||||
|
||||
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) \
|
||||
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
||||
G(pcre2_set_max_pattern_length_,BITONE)(G(a,BITONE),b); \
|
||||
else \
|
||||
G(pcre2_set_max_pattern_length_,BITTWO)(G(a,BITTWO),b)
|
||||
|
||||
#define PCRE2_SET_OFFSET_LIMIT(a,b) \
|
||||
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
||||
G(pcre2_set_offset_limit_,BITONE)(G(a,BITONE),b); \
|
||||
|
@ -1706,6 +1721,7 @@ the three different cases. */
|
|||
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
|
||||
pcre2_set_compile_recursion_guard_8(G(a,8),b,c)
|
||||
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
|
||||
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_8(G(a,8),b)
|
||||
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_8(G(a,8),b)
|
||||
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b)
|
||||
#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_8(G(a,8),b)
|
||||
|
@ -1798,6 +1814,7 @@ the three different cases. */
|
|||
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
|
||||
pcre2_set_compile_recursion_guard_16(G(a,16),b,c)
|
||||
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
|
||||
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_16(G(a,16),b)
|
||||
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_16(G(a,16),b)
|
||||
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b)
|
||||
#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_16(G(a,16),b)
|
||||
|
@ -1890,6 +1907,7 @@ the three different cases. */
|
|||
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
|
||||
pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
|
||||
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
|
||||
#define PCRE2_SET_MAX_PATTERN_LENGTH(a,b) pcre2_set_max_pattern_length_32(G(a,32),b)
|
||||
#define PCRE2_SET_OFFSET_LIMIT(a,b) pcre2_set_offset_limit_32(G(a,32),b)
|
||||
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b)
|
||||
#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_32(G(a,32),b)
|
||||
|
|
|
@ -4603,4 +4603,14 @@ B)x/alt_verbnames,mark
|
|||
|
||||
/(?'abcdefghijklmnopqrstuvwxyzABCDEF'justright)/
|
||||
|
||||
# These two use zero-termination
|
||||
/abcd/max_pattern_length=3
|
||||
|
||||
/abc/max_pattern_length=3
|
||||
|
||||
# These two, being hex, pass the length
|
||||
/abcdefab/hex,max_pattern_length=3
|
||||
|
||||
/abcdef/hex,max_pattern_length=3
|
||||
|
||||
# End of testinput2
|
||||
|
|
|
@ -14699,4 +14699,16 @@ Failed: error 148 at offset 36: subpattern name is too long (maximum 32 characte
|
|||
|
||||
/(?'abcdefghijklmnopqrstuvwxyzABCDEF'justright)/
|
||||
|
||||
# These two use zero-termination
|
||||
/abcd/max_pattern_length=3
|
||||
Failed: error 188 at offset 0: pattern string is longer than the limit set by the application
|
||||
|
||||
/abc/max_pattern_length=3
|
||||
|
||||
# These two, being hex, pass the length
|
||||
/abcdefab/hex,max_pattern_length=3
|
||||
Failed: error 188 at offset 0: pattern string is longer than the limit set by the application
|
||||
|
||||
/abcdef/hex,max_pattern_length=3
|
||||
|
||||
# End of testinput2
|
||||
|
|
Loading…
Reference in New Issue