Add user data to recursion guard; get ready for RC1 (again)
This commit is contained in:
parent
d1f5dd5bf2
commit
9fcdf2cc6f
|
@ -1,7 +1,7 @@
|
||||||
Change Log for PCRE2
|
Change Log for PCRE2
|
||||||
--------------------
|
--------------------
|
||||||
|
|
||||||
Version 10.00 24-November-2014
|
Version 10.00 28-November-2014
|
||||||
------------------------------
|
------------------------------
|
||||||
|
|
||||||
Version 10.00 is the first release of PCRE2, a revised API for the PCRE
|
Version 10.00 is the first release of PCRE2, a revised API for the PCRE
|
||||||
|
|
2
NEWS
2
NEWS
|
@ -1,7 +1,7 @@
|
||||||
News about PCRE2 releases
|
News about PCRE2 releases
|
||||||
-------------------------
|
-------------------------
|
||||||
|
|
||||||
Version 10.00 24-November-2014
|
Version 10.00 28-November-2014
|
||||||
------------------------------
|
------------------------------
|
||||||
|
|
||||||
Version 10.00 is the first release of PCRE2, a revised API for the PCRE
|
Version 10.00 is the first release of PCRE2, a revised API for the PCRE
|
||||||
|
|
|
@ -11,7 +11,7 @@ dnl be defined as -RC2, for example. For real releases, it should be empty.
|
||||||
m4_define(pcre2_major, [10])
|
m4_define(pcre2_major, [10])
|
||||||
m4_define(pcre2_minor, [00])
|
m4_define(pcre2_minor, [00])
|
||||||
m4_define(pcre2_prerelease, [-RC1])
|
m4_define(pcre2_prerelease, [-RC1])
|
||||||
m4_define(pcre2_date, [2014-11-24])
|
m4_define(pcre2_date, [2014-11-28])
|
||||||
|
|
||||||
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
||||||
# 50 lines of this file. Please update that if the variables above are moved.
|
# 50 lines of this file. Please update that if the variables above are moved.
|
||||||
|
|
|
@ -39,14 +39,12 @@ code units; for other types of data it is in bytes.
|
||||||
<P>
|
<P>
|
||||||
If <b>where</b> is not NULL, for PCRE2_CONFIG_JITTARGET,
|
If <b>where</b> is not NULL, for PCRE2_CONFIG_JITTARGET,
|
||||||
PCRE2_CONFIG_UNICODE_VERSION, and PCRE2_CONFIG_VERSION it must point to a
|
PCRE2_CONFIG_UNICODE_VERSION, and PCRE2_CONFIG_VERSION it must point to a
|
||||||
buffer that is large enough to hold the string. For PCRE2_CONFIG_MATCHLIMIT,
|
buffer that is large enough to hold the string. For all other codes it must
|
||||||
PCRE2_CONFIG_PARENSLIMIT, and PCRE2_CONFIG_RECURSIONLIMIT it must point to an
|
point to a uint32_t integer variable. The available codes are:
|
||||||
unsigned long int variable, and for all other codes to an int variable. The
|
|
||||||
available codes are:
|
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_CONFIG_BSR Indicates what \R matches by default:
|
PCRE2_CONFIG_BSR Indicates what \R matches by default:
|
||||||
0 all Unicode line endings
|
PCRE2_BSR_UNICODE
|
||||||
1 CR, LF, or CRLF only
|
PCRE2_BSR_ANYCRLF
|
||||||
PCRE2_CONFIG_JIT Availability of just-in-time compiler
|
PCRE2_CONFIG_JIT Availability of just-in-time compiler
|
||||||
support (1=yes 0=no)
|
support (1=yes 0=no)
|
||||||
PCRE2_CONFIG_JITTARGET Information about the target archi-
|
PCRE2_CONFIG_JITTARGET Information about the target archi-
|
||||||
|
@ -54,11 +52,11 @@ available codes are:
|
||||||
PCRE2_CONFIG_LINKSIZE Configured internal link size (2, 3, 4)
|
PCRE2_CONFIG_LINKSIZE Configured internal link size (2, 3, 4)
|
||||||
PCRE2_CONFIG_MATCHLIMIT Default internal resource limit
|
PCRE2_CONFIG_MATCHLIMIT Default internal resource limit
|
||||||
PCRE2_CONFIG_NEWLINE Code for the default newline sequence:
|
PCRE2_CONFIG_NEWLINE Code for the default newline sequence:
|
||||||
1 for CR
|
PCRE2_NEWLINE_CR
|
||||||
2 for LF
|
PCRE2_NEWLINE_LF
|
||||||
3 for CRLF
|
PCRE2_NEWLINE_CRLF
|
||||||
4 for ANY
|
PCRE2_NEWLINE_ANY
|
||||||
5 for ANYCRLF
|
PCRE2_NEWLINE_ANYCRLF
|
||||||
PCRE2_CONFIG_PARENSLIMIT Default parentheses nesting limit
|
PCRE2_CONFIG_PARENSLIMIT Default parentheses nesting limit
|
||||||
PCRE2_CONFIG_RECURSIONLIMIT Internal recursion depth limit
|
PCRE2_CONFIG_RECURSIONLIMIT Internal recursion depth limit
|
||||||
PCRE2_CONFIG_STACKRECURSE Recursion implementation (1=stack
|
PCRE2_CONFIG_STACKRECURSE Recursion implementation (1=stack
|
||||||
|
|
|
@ -78,7 +78,7 @@ the requested information, in bytes. The following information is available:
|
||||||
The <i>where</i> argument must point to an unsigned 32-bit integer (uint32_t
|
The <i>where</i> argument must point to an unsigned 32-bit integer (uint32_t
|
||||||
variable), except for the following <i>what</i> values:
|
variable), except for the following <i>what</i> values:
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_INFO_FIRSTBITMAP const uint8_t
|
PCRE2_INFO_FIRSTBITMAP const uint8_t *
|
||||||
PCRE2_INFO_JITSIZE size_t
|
PCRE2_INFO_JITSIZE size_t
|
||||||
PCRE2_INFO_NAMETABLE PCRE2_SPTR
|
PCRE2_INFO_NAMETABLE PCRE2_SPTR
|
||||||
PCRE2_INFO_SIZE size_t
|
PCRE2_INFO_SIZE size_t
|
||||||
|
|
|
@ -20,7 +20,7 @@ SYNOPSIS
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre2_set_compile_recursion_guard(pcre2_compile_context *<i>ccontext</i>,</b>
|
<b>int pcre2_set_compile_recursion_guard(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||||
<b> int (*<i>guard_function</i>)(uint32_t));</b>
|
<b> int (*<i>guard_function</i>)(uint32_t, void *), void *<i>user_data</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
|
@ -28,11 +28,12 @@ DESCRIPTION
|
||||||
<P>
|
<P>
|
||||||
This function defines, within a compile context, a function that is called
|
This function defines, within a compile context, a function that is called
|
||||||
whenever <b>pcre2_compile()</b> starts to compile a parenthesized part of a
|
whenever <b>pcre2_compile()</b> starts to compile a parenthesized part of a
|
||||||
pattern. The argument to the function gives the current depth of parenthesis
|
pattern. The first argument to the function gives the current depth of
|
||||||
nesting. The function should return zero if all is well, or non-zero to force
|
parenthesis nesting, and the second is user data that is supplied when the
|
||||||
an error. This feature is provided so that applications can check the available
|
function is set up. The callout function should return zero if all is well, or
|
||||||
system stack space, in order to avoid running out. The result of this function
|
non-zero to force an error. This feature is provided so that applications can
|
||||||
is always zero.
|
check the available system stack space, in order to avoid running out. The
|
||||||
|
result of <b>pcre2_set_compile_recursion_guard()</b> is always zero.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
There is a complete description of the PCRE2 native API in the
|
There is a complete description of the PCRE2 native API in the
|
||||||
|
|
|
@ -24,31 +24,32 @@ please consult the man page, in case the conversion went wrong.
|
||||||
<li><a name="TOC9" href="#SEC9">PCRE2 NATIVE API AUXILIARY FUNCTIONS</a>
|
<li><a name="TOC9" href="#SEC9">PCRE2 NATIVE API AUXILIARY FUNCTIONS</a>
|
||||||
<li><a name="TOC10" href="#SEC10">PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES</a>
|
<li><a name="TOC10" href="#SEC10">PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES</a>
|
||||||
<li><a name="TOC11" href="#SEC11">PCRE2 API OVERVIEW</a>
|
<li><a name="TOC11" href="#SEC11">PCRE2 API OVERVIEW</a>
|
||||||
<li><a name="TOC12" href="#SEC12">NEWLINES</a>
|
<li><a name="TOC12" href="#SEC12">STRING LENGTHS AND OFFSETS</a>
|
||||||
<li><a name="TOC13" href="#SEC13">MULTITHREADING</a>
|
<li><a name="TOC13" href="#SEC13">NEWLINES</a>
|
||||||
<li><a name="TOC14" href="#SEC14">PCRE2 CONTEXTS</a>
|
<li><a name="TOC14" href="#SEC14">MULTITHREADING</a>
|
||||||
<li><a name="TOC15" href="#SEC15">CHECKING BUILD-TIME OPTIONS</a>
|
<li><a name="TOC15" href="#SEC15">PCRE2 CONTEXTS</a>
|
||||||
<li><a name="TOC16" href="#SEC16">COMPILING A PATTERN</a>
|
<li><a name="TOC16" href="#SEC16">CHECKING BUILD-TIME OPTIONS</a>
|
||||||
<li><a name="TOC17" href="#SEC17">COMPILATION ERROR CODES</a>
|
<li><a name="TOC17" href="#SEC17">COMPILING A PATTERN</a>
|
||||||
<li><a name="TOC18" href="#SEC18">JUST-IN-TIME (JIT) COMPILATION</a>
|
<li><a name="TOC18" href="#SEC18">COMPILATION ERROR CODES</a>
|
||||||
<li><a name="TOC19" href="#SEC19">LOCALE SUPPORT</a>
|
<li><a name="TOC19" href="#SEC19">JUST-IN-TIME (JIT) COMPILATION</a>
|
||||||
<li><a name="TOC20" href="#SEC20">INFORMATION ABOUT A COMPILED PATTERN</a>
|
<li><a name="TOC20" href="#SEC20">LOCALE SUPPORT</a>
|
||||||
<li><a name="TOC21" href="#SEC21">THE MATCH DATA BLOCK</a>
|
<li><a name="TOC21" href="#SEC21">INFORMATION ABOUT A COMPILED PATTERN</a>
|
||||||
<li><a name="TOC22" href="#SEC22">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a>
|
<li><a name="TOC22" href="#SEC22">THE MATCH DATA BLOCK</a>
|
||||||
<li><a name="TOC23" href="#SEC23">NEWLINE HANDLING WHEN MATCHING</a>
|
<li><a name="TOC23" href="#SEC23">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a>
|
||||||
<li><a name="TOC24" href="#SEC24">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a>
|
<li><a name="TOC24" href="#SEC24">NEWLINE HANDLING WHEN MATCHING</a>
|
||||||
<li><a name="TOC25" href="#SEC25">OTHER INFORMATION ABOUT A MATCH</a>
|
<li><a name="TOC25" href="#SEC25">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a>
|
||||||
<li><a name="TOC26" href="#SEC26">ERROR RETURNS FROM <b>pcre2_match()</b></a>
|
<li><a name="TOC26" href="#SEC26">OTHER INFORMATION ABOUT A MATCH</a>
|
||||||
<li><a name="TOC27" href="#SEC27">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a>
|
<li><a name="TOC27" href="#SEC27">ERROR RETURNS FROM <b>pcre2_match()</b></a>
|
||||||
<li><a name="TOC28" href="#SEC28">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a>
|
<li><a name="TOC28" href="#SEC28">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a>
|
||||||
<li><a name="TOC29" href="#SEC29">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a>
|
<li><a name="TOC29" href="#SEC29">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a>
|
||||||
<li><a name="TOC30" href="#SEC30">CREATING A NEW STRING WITH SUBSTITUTIONS</a>
|
<li><a name="TOC30" href="#SEC30">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a>
|
||||||
<li><a name="TOC31" href="#SEC31">DUPLICATE SUBPATTERN NAMES</a>
|
<li><a name="TOC31" href="#SEC31">CREATING A NEW STRING WITH SUBSTITUTIONS</a>
|
||||||
<li><a name="TOC32" href="#SEC32">FINDING ALL POSSIBLE MATCHES AT ONE POSITION</a>
|
<li><a name="TOC32" href="#SEC32">DUPLICATE SUBPATTERN NAMES</a>
|
||||||
<li><a name="TOC33" href="#SEC33">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a>
|
<li><a name="TOC33" href="#SEC33">FINDING ALL POSSIBLE MATCHES AT ONE POSITION</a>
|
||||||
<li><a name="TOC34" href="#SEC34">SEE ALSO</a>
|
<li><a name="TOC34" href="#SEC34">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a>
|
||||||
<li><a name="TOC35" href="#SEC35">AUTHOR</a>
|
<li><a name="TOC35" href="#SEC35">SEE ALSO</a>
|
||||||
<li><a name="TOC36" href="#SEC36">REVISION</a>
|
<li><a name="TOC36" href="#SEC36">AUTHOR</a>
|
||||||
|
<li><a name="TOC37" href="#SEC37">REVISION</a>
|
||||||
</ul>
|
</ul>
|
||||||
<P>
|
<P>
|
||||||
<b>#include <pcre2.h></b>
|
<b>#include <pcre2.h></b>
|
||||||
|
@ -148,7 +149,7 @@ document for an overview of all the PCRE2 documentation.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
<b>int pcre2_set_compile_recursion_guard(pcre2_compile_context *<i>ccontext</i>,</b>
|
<b>int pcre2_set_compile_recursion_guard(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||||
<b> int (*<i>guard_function</i>)(uint32_t));</b>
|
<b> int (*<i>guard_function</i>)(uint32_t, void *), void *<i>user_data</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC5" href="#TOC1">PCRE2 NATIVE API MATCH CONTEXT FUNCTIONS</a><br>
|
<br><a name="SEC5" href="#TOC1">PCRE2 NATIVE API MATCH CONTEXT FUNCTIONS</a><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -164,7 +165,7 @@ document for an overview of all the PCRE2 documentation.
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
<b>int pcre2_set_callout(pcre2_match_context *<i>mcontext</i>,</b>
|
<b>int pcre2_set_callout(pcre2_match_context *<i>mcontext</i>,</b>
|
||||||
<b> int (*<i>callout_function</i>)(pcre2_callout_block *),</b>
|
<b> int (*<i>callout_function</i>)(pcre2_callout_block *, void *),</b>
|
||||||
<b> void *<i>callout_data</i>);</b>
|
<b> void *<i>callout_data</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
|
@ -424,8 +425,18 @@ matched.
|
||||||
Finally, there are functions for finding out information about a compiled
|
Finally, there are functions for finding out information about a compiled
|
||||||
pattern (<b>pcre2_pattern_info()</b>) and about the configuration with which
|
pattern (<b>pcre2_pattern_info()</b>) and about the configuration with which
|
||||||
PCRE2 was built (<b>pcre2_config()</b>).
|
PCRE2 was built (<b>pcre2_config()</b>).
|
||||||
|
</P>
|
||||||
|
<br><a name="SEC12" href="#TOC1">STRING LENGTHS AND OFFSETS</a><br>
|
||||||
|
<P>
|
||||||
|
The PCRE2 API uses string lengths and offsets into strings of code units in
|
||||||
|
several places. These values are always of type PCRE2_SIZE, which is an
|
||||||
|
unsigned integer type, currently always defined as <i>size_t</i>. The largest
|
||||||
|
value that can be stored in such a type (that is ~(PCRE2_SIZE)0) is reserved
|
||||||
|
as a special indicator for zero-terminated strings and unset offsets.
|
||||||
|
Therefore, the longest string that can be handled is one less than this
|
||||||
|
maximum.
|
||||||
<a name="newlines"></a></P>
|
<a name="newlines"></a></P>
|
||||||
<br><a name="SEC12" href="#TOC1">NEWLINES</a><br>
|
<br><a name="SEC13" href="#TOC1">NEWLINES</a><br>
|
||||||
<P>
|
<P>
|
||||||
PCRE2 supports five different conventions for indicating line breaks in
|
PCRE2 supports five different conventions for indicating line breaks in
|
||||||
strings: a single CR (carriage return) character, a single LF (linefeed)
|
strings: a single CR (carriage return) character, a single LF (linefeed)
|
||||||
|
@ -460,7 +471,7 @@ The choice of newline convention does not affect the interpretation of
|
||||||
the \n or \r escape sequences, nor does it affect what \R matches; this has
|
the \n or \r escape sequences, nor does it affect what \R matches; this has
|
||||||
its own separate convention.
|
its own separate convention.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC13" href="#TOC1">MULTITHREADING</a><br>
|
<br><a name="SEC14" href="#TOC1">MULTITHREADING</a><br>
|
||||||
<P>
|
<P>
|
||||||
In a multithreaded application it is important to keep thread-specific data
|
In a multithreaded application it is important to keep thread-specific data
|
||||||
separate from data that can be shared between threads. The PCRE2 library code
|
separate from data that can be shared between threads. The PCRE2 library code
|
||||||
|
@ -505,7 +516,7 @@ storing the results of a match. This includes details of what was matched, as
|
||||||
well as additional information such as the name of a (*MARK) setting. Each
|
well as additional information such as the name of a (*MARK) setting. Each
|
||||||
thread must provide its own version of this memory.
|
thread must provide its own version of this memory.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC14" href="#TOC1">PCRE2 CONTEXTS</a><br>
|
<br><a name="SEC15" href="#TOC1">PCRE2 CONTEXTS</a><br>
|
||||||
<P>
|
<P>
|
||||||
Some PCRE2 functions have a lot of parameters, many of which are used only by
|
Some PCRE2 functions have a lot of parameters, many of which are used only by
|
||||||
specialist applications, for example, those that use custom memory management
|
specialist applications, for example, those that use custom memory management
|
||||||
|
@ -636,7 +647,7 @@ This parameter ajusts the limit, set when PCRE2 is built (default 250), on the
|
||||||
depth of parenthesis nesting in a pattern. This limit stops rogue patterns
|
depth of parenthesis nesting in a pattern. This limit stops rogue patterns
|
||||||
using up too much system stack when being compiled.
|
using up too much system stack when being compiled.
|
||||||
<b>int pcre2_set_compile_recursion_guard(pcre2_compile_context *<i>ccontext</i>,</b>
|
<b>int pcre2_set_compile_recursion_guard(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||||
<b> int (*<i>guard_function</i>)(uint32_t));</b>
|
<b> int (*<i>guard_function</i>)(uint32_t, void *), void *<i>user_data</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
There is at least one application that runs PCRE2 in threads with very limited
|
There is at least one application that runs PCRE2 in threads with very limited
|
||||||
|
@ -644,8 +655,14 @@ system stack, where running out of stack is to be avoided at all costs. The
|
||||||
parenthesis limit above cannot take account of how much stack is actually
|
parenthesis limit above cannot take account of how much stack is actually
|
||||||
available. For a finer control, you can supply a function that is called
|
available. For a finer control, you can supply a function that is called
|
||||||
whenever <b>pcre2_compile()</b> starts to compile a parenthesized part of a
|
whenever <b>pcre2_compile()</b> starts to compile a parenthesized part of a
|
||||||
pattern. The argument to the function gives the current depth of nesting. The
|
pattern. This function can check the actual stack size (or anything else that
|
||||||
function should return zero if all is well, or non-zero to force an error.
|
it wants to, of course).
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The first argument to the callout function gives the current depth of
|
||||||
|
nesting, and the second is user data that is set up by the last argument of
|
||||||
|
<b>pcre2_set_compile_recursion_guard()</b>. The callout function should return
|
||||||
|
zero if all is well, or non-zero to force an error.
|
||||||
<a name="matchcontext"></a></P>
|
<a name="matchcontext"></a></P>
|
||||||
<br><b>
|
<br><b>
|
||||||
The match context
|
The match context
|
||||||
|
@ -679,7 +696,7 @@ A match context is created with default values for its parameters. These can
|
||||||
be changed by calling the following functions, which return 0 on success, or
|
be changed by calling the following functions, which return 0 on success, or
|
||||||
PCRE2_ERROR_BADDATA if invalid data is detected.
|
PCRE2_ERROR_BADDATA if invalid data is detected.
|
||||||
<b>int pcre2_set_callout(pcre2_match_context *<i>mcontext</i>,</b>
|
<b>int pcre2_set_callout(pcre2_match_context *<i>mcontext</i>,</b>
|
||||||
<b> int (*<i>callout_function</i>)(pcre2_callout_block *),</b>
|
<b> int (*<i>callout_function</i>)(pcre2_callout_block *, void *),</b>
|
||||||
<b> void *<i>callout_data</i>);</b>
|
<b> void *<i>callout_data</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
|
@ -780,7 +797,7 @@ exit so that they can be re-used when possible during the match. In the absence
|
||||||
of these functions, the normal custom memory management functions are used, if
|
of these functions, the normal custom memory management functions are used, if
|
||||||
supplied, otherwise the system functions.
|
supplied, otherwise the system functions.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC15" href="#TOC1">CHECKING BUILD-TIME OPTIONS</a><br>
|
<br><a name="SEC16" href="#TOC1">CHECKING BUILD-TIME OPTIONS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre2_config(uint32_t <i>what</i>, void *<i>where</i>);</b>
|
<b>int pcre2_config(uint32_t <i>what</i>, void *<i>where</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
|
@ -807,15 +824,15 @@ available:
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_CONFIG_BSR
|
PCRE2_CONFIG_BSR
|
||||||
</pre>
|
</pre>
|
||||||
The output is an integer whose value indicates what character sequences the \R
|
The output is a uint32_t integer whose value indicates what character
|
||||||
escape sequence matches by default. A value of PCRE2_BSR_UNICODE means that \R
|
sequences the \R escape sequence matches by default. A value of
|
||||||
matches any Unicode line ending sequence; a value of PCRE2_BSR_ANYCRLF means
|
PCRE2_BSR_UNICODE means that \R matches any Unicode line ending sequence; a
|
||||||
that \R matches only CR, LF, or CRLF. The default can be overridden when a
|
value of PCRE2_BSR_ANYCRLF means that \R matches only CR, LF, or CRLF. The
|
||||||
pattern is compiled.
|
default can be overridden when a pattern is compiled.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_CONFIG_JIT
|
PCRE2_CONFIG_JIT
|
||||||
</pre>
|
</pre>
|
||||||
The output is an integer that is set to one if support for just-in-time
|
The output is a uint32_t integer that is set to one if support for just-in-time
|
||||||
compiling is available; otherwise it is set to zero.
|
compiling is available; otherwise it is set to zero.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_CONFIG_JITTARGET
|
PCRE2_CONFIG_JITTARGET
|
||||||
|
@ -831,12 +848,13 @@ for the terminating zero.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_CONFIG_LINKSIZE
|
PCRE2_CONFIG_LINKSIZE
|
||||||
</pre>
|
</pre>
|
||||||
The output is an integer that contains the number of bytes used for internal
|
The output is a uint32_t integer that contains the number of bytes used for
|
||||||
linkage in compiled regular expressions. When PCRE2 is configured, the value
|
internal linkage in compiled regular expressions. When PCRE2 is configured, the
|
||||||
can be set to 2, 3, or 4, with the default being 2. This is the value that is
|
value can be set to 2, 3, or 4, with the default being 2. This is the value
|
||||||
returned by <b>pcre2_config()</b>. However, when the 16-bit library is compiled,
|
that is returned by <b>pcre2_config()</b>. However, when the 16-bit library is
|
||||||
a value of 3 is rounded up to 4, and when the 32-bit library is compiled,
|
compiled, a value of 3 is rounded up to 4, and when the 32-bit library is
|
||||||
internal linkages always use 4 bytes, so the configured value is not relevant.
|
compiled, internal linkages always use 4 bytes, so the configured value is not
|
||||||
|
relevant.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The default value of 2 for the 8-bit and 16-bit libraries is sufficient for all
|
The default value of 2 for the 8-bit and 16-bit libraries is sufficient for all
|
||||||
|
@ -846,14 +864,14 @@ be compiled by those two libraries, but at the expense of slower matching.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_CONFIG_MATCHLIMIT
|
PCRE2_CONFIG_MATCHLIMIT
|
||||||
</pre>
|
</pre>
|
||||||
The output is an unsigned long integer that gives the default limit for the
|
The output is a uint32_t integer that gives the default limit for the number of
|
||||||
number of internal matching function calls in a <b>pcre2_match()</b> execution.
|
internal matching function calls in a <b>pcre2_match()</b> execution. Further
|
||||||
Further details are given with <b>pcre2_match()</b> below.
|
details are given with <b>pcre2_match()</b> below.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_CONFIG_NEWLINE
|
PCRE2_CONFIG_NEWLINE
|
||||||
</pre>
|
</pre>
|
||||||
The output is an integer whose value specifies the default character sequence
|
The output is a uint32_t integer whose value specifies the default character
|
||||||
that is recognized as meaning "newline". The values are:
|
sequence that is recognized as meaning "newline". The values are:
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_NEWLINE_CR Carriage return (CR)
|
PCRE2_NEWLINE_CR Carriage return (CR)
|
||||||
PCRE2_NEWLINE_LF Linefeed (LF)
|
PCRE2_NEWLINE_LF Linefeed (LF)
|
||||||
|
@ -866,7 +884,7 @@ operating system.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_CONFIG_PARENSLIMIT
|
PCRE2_CONFIG_PARENSLIMIT
|
||||||
</pre>
|
</pre>
|
||||||
The output is an unsigned long integer that gives the maximum depth of nesting
|
The output is a uint32_t integer that gives the maximum depth of nesting
|
||||||
of parentheses (of any kind) in a pattern. This limit is imposed to cap the
|
of parentheses (of any kind) in a pattern. This limit is imposed to cap the
|
||||||
amount of system stack used when a pattern is compiled. It is specified when
|
amount of system stack used when a pattern is compiled. It is specified when
|
||||||
PCRE2 is built; the default is 250. This limit does not take into account the
|
PCRE2 is built; the default is 250. This limit does not take into account the
|
||||||
|
@ -875,16 +893,15 @@ over compilation stack usage, see <b>pcre2_set_compile_recursion_guard()</b>.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_CONFIG_RECURSIONLIMIT
|
PCRE2_CONFIG_RECURSIONLIMIT
|
||||||
</pre>
|
</pre>
|
||||||
The output is an unsigned long integer that gives the default limit for the
|
The output is a uint32_t integer that gives the default limit for the depth of
|
||||||
depth of recursion when calling the internal matching function in a
|
recursion when calling the internal matching function in a <b>pcre2_match()</b>
|
||||||
<b>pcre2_match()</b> execution. Further details are given with
|
execution. Further details are given with <b>pcre2_match()</b> below.
|
||||||
<b>pcre2_match()</b> below.
|
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_CONFIG_STACKRECURSE
|
PCRE2_CONFIG_STACKRECURSE
|
||||||
</pre>
|
</pre>
|
||||||
The output is an integer that is set to one if internal recursion when running
|
The output is a uint32_t integer that is set to one if internal recursion when
|
||||||
<b>pcre2_match()</b> is implemented by recursive function calls that use the
|
running <b>pcre2_match()</b> is implemented by recursive function calls that use
|
||||||
system stack to remember their state. This is the usual way that PCRE2 is
|
the system stack to remember their state. This is the usual way that PCRE2 is
|
||||||
compiled. The output is zero if PCRE2 was compiled to use blocks of data on the
|
compiled. The output is zero if PCRE2 was compiled to use blocks of data on the
|
||||||
heap instead of recursive function calls.
|
heap instead of recursive function calls.
|
||||||
<pre>
|
<pre>
|
||||||
|
@ -900,8 +917,8 @@ string plus one unit for the terminating zero.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_CONFIG_UNICODE
|
PCRE2_CONFIG_UNICODE
|
||||||
</pre>
|
</pre>
|
||||||
The output is an integer that is set to one if Unicode support is available;
|
The output is a uint32_t integer that is set to one if Unicode support is
|
||||||
otherwise it is set to zero. Unicode support implies UTF support.
|
available; otherwise it is set to zero. Unicode support implies UTF support.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_CONFIG_VERSION
|
PCRE2_CONFIG_VERSION
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -912,7 +929,7 @@ the PCRE2 version string, zero-terminated. The number of code units used is
|
||||||
returned. This is the length of the string plus one unit for the terminating
|
returned. This is the length of the string plus one unit for the terminating
|
||||||
zero.
|
zero.
|
||||||
<a name="compiling"></a></P>
|
<a name="compiling"></a></P>
|
||||||
<br><a name="SEC16" href="#TOC1">COMPILING A PATTERN</a><br>
|
<br><a name="SEC17" href="#TOC1">COMPILING A PATTERN</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre2_code *pcre2_compile(PCRE2_SPTR <i>pattern</i>, PCRE2_SIZE <i>length</i>,</b>
|
<b>pcre2_code *pcre2_compile(PCRE2_SPTR <i>pattern</i>, PCRE2_SIZE <i>length</i>,</b>
|
||||||
<b> uint32_t <i>options</i>, int *<i>errorcode</i>, PCRE2_SIZE *<i>erroroffset,</i></b>
|
<b> uint32_t <i>options</i>, int *<i>errorcode</i>, PCRE2_SIZE *<i>erroroffset,</i></b>
|
||||||
|
@ -1267,7 +1284,7 @@ the behaviour of PCRE2 are given in the
|
||||||
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
|
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
|
||||||
page.
|
page.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC17" href="#TOC1">COMPILATION ERROR CODES</a><br>
|
<br><a name="SEC18" href="#TOC1">COMPILATION ERROR CODES</a><br>
|
||||||
<P>
|
<P>
|
||||||
There are over 80 positive error codes that <b>pcre2_compile()</b> may return if
|
There are over 80 positive error codes that <b>pcre2_compile()</b> may return if
|
||||||
it finds an error in the pattern. There are also some negative error codes that
|
it finds an error in the pattern. There are also some negative error codes that
|
||||||
|
@ -1277,7 +1294,7 @@ are used for invalid UTF strings. These are the same as given by
|
||||||
page. The <b>pcre2_get_error_message()</b> function can be called to obtain a
|
page. The <b>pcre2_get_error_message()</b> function can be called to obtain a
|
||||||
textual error message from any error code.
|
textual error message from any error code.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC18" href="#TOC1">JUST-IN-TIME (JIT) COMPILATION</a><br>
|
<br><a name="SEC19" href="#TOC1">JUST-IN-TIME (JIT) COMPILATION</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre2_jit_compile(pcre2_code *<i>code</i>, uint32_t <i>options</i>);</b>
|
<b>int pcre2_jit_compile(pcre2_code *<i>code</i>, uint32_t <i>options</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
|
@ -1315,7 +1332,7 @@ patterns to be analyzed, and for one-off matches and simple patterns the
|
||||||
benefit of faster execution might be offset by a much slower compilation time.
|
benefit of faster execution might be offset by a much slower compilation time.
|
||||||
Most, but not all patterns can be optimized by the JIT compiler.
|
Most, but not all patterns can be optimized by the JIT compiler.
|
||||||
<a name="localesupport"></a></P>
|
<a name="localesupport"></a></P>
|
||||||
<br><a name="SEC19" href="#TOC1">LOCALE SUPPORT</a><br>
|
<br><a name="SEC20" href="#TOC1">LOCALE SUPPORT</a><br>
|
||||||
<P>
|
<P>
|
||||||
PCRE2 handles caseless matching, and determines whether characters are letters,
|
PCRE2 handles caseless matching, and determines whether characters are letters,
|
||||||
digits, or whatever, by reference to a set of tables, indexed by character code
|
digits, or whatever, by reference to a set of tables, indexed by character code
|
||||||
|
@ -1371,7 +1388,7 @@ is saved with the compiled pattern, and the same tables are used by
|
||||||
compilation, and matching all happen in the same locale, but different patterns
|
compilation, and matching all happen in the same locale, but different patterns
|
||||||
can be processed in different locales.
|
can be processed in different locales.
|
||||||
<a name="infoaboutpattern"></a></P>
|
<a name="infoaboutpattern"></a></P>
|
||||||
<br><a name="SEC20" href="#TOC1">INFORMATION ABOUT A COMPILED PATTERN</a><br>
|
<br><a name="SEC21" href="#TOC1">INFORMATION ABOUT A COMPILED PATTERN</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre2_pattern_info(const pcre2 *<i>code</i>, uint32_t <i>what</i>, void *<i>where</i>);</b>
|
<b>int pcre2_pattern_info(const pcre2 *<i>code</i>, uint32_t <i>what</i>, void *<i>where</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
|
@ -1660,7 +1677,7 @@ getting memory in which to place the compiled data is the value returned by
|
||||||
this option plus the size of the <b>pcre2_code</b> structure. Processing a
|
this option plus the size of the <b>pcre2_code</b> structure. Processing a
|
||||||
pattern with the JIT compiler does not alter the value returned by this option.
|
pattern with the JIT compiler does not alter the value returned by this option.
|
||||||
<a name="matchdatablock"></a></P>
|
<a name="matchdatablock"></a></P>
|
||||||
<br><a name="SEC21" href="#TOC1">THE MATCH DATA BLOCK</a><br>
|
<br><a name="SEC22" href="#TOC1">THE MATCH DATA BLOCK</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre2_match_data_create(uint32_t <i>ovecsize</i>,</b>
|
<b>pcre2_match_data_create(uint32_t <i>ovecsize</i>,</b>
|
||||||
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
||||||
|
@ -1712,7 +1729,7 @@ and
|
||||||
<a href="#matchotherdata">other match data</a>
|
<a href="#matchotherdata">other match data</a>
|
||||||
below.
|
below.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC22" href="#TOC1">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a><br>
|
<br><a name="SEC23" href="#TOC1">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre2_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
<b>int pcre2_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
||||||
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
||||||
|
@ -1926,7 +1943,7 @@ examples, in the
|
||||||
<a href="pcre2partial.html"><b>pcre2partial</b></a>
|
<a href="pcre2partial.html"><b>pcre2partial</b></a>
|
||||||
documentation.
|
documentation.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC23" href="#TOC1">NEWLINE HANDLING WHEN MATCHING</a><br>
|
<br><a name="SEC24" href="#TOC1">NEWLINE HANDLING WHEN MATCHING</a><br>
|
||||||
<P>
|
<P>
|
||||||
When PCRE2 is built, a default newline convention is set; this is usually the
|
When PCRE2 is built, a default newline convention is set; this is usually the
|
||||||
standard convention for the operating system. The default can be overridden in
|
standard convention for the operating system. The default can be overridden in
|
||||||
|
@ -1961,7 +1978,7 @@ LF in the characters that it matches.
|
||||||
Notwithstanding the above, anomalous effects may still occur when CRLF is a
|
Notwithstanding the above, anomalous effects may still occur when CRLF is a
|
||||||
valid newline sequence and explicit \r or \n escapes appear in the pattern.
|
valid newline sequence and explicit \r or \n escapes appear in the pattern.
|
||||||
<a name="matchedstrings"></a></P>
|
<a name="matchedstrings"></a></P>
|
||||||
<br><a name="SEC24" href="#TOC1">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a><br>
|
<br><a name="SEC25" href="#TOC1">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>uint32_t pcre2_get_ovector_count(pcre2_match_data *<i>match_data</i>);</b>
|
<b>uint32_t pcre2_get_ovector_count(pcre2_match_data *<i>match_data</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
|
@ -2051,7 +2068,7 @@ parentheses, no more than <i>ovector[0]</i> to <i>ovector[2n+1]</i> are set by
|
||||||
<b>pcre2_match()</b>. The other elements retain whatever values they previously
|
<b>pcre2_match()</b>. The other elements retain whatever values they previously
|
||||||
had.
|
had.
|
||||||
<a name="matchotherdata"></a></P>
|
<a name="matchotherdata"></a></P>
|
||||||
<br><a name="SEC25" href="#TOC1">OTHER INFORMATION ABOUT A MATCH</a><br>
|
<br><a name="SEC26" href="#TOC1">OTHER INFORMATION ABOUT A MATCH</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>PCRE2_SPTR pcre2_get_mark(pcre2_match_data *<i>match_data</i>);</b>
|
<b>PCRE2_SPTR pcre2_get_mark(pcre2_match_data *<i>match_data</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
|
@ -2081,7 +2098,7 @@ UTF character when UTF checking fails. Details are given in the
|
||||||
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
|
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
|
||||||
page.
|
page.
|
||||||
<a name="errorlist"></a></P>
|
<a name="errorlist"></a></P>
|
||||||
<br><a name="SEC26" href="#TOC1">ERROR RETURNS FROM <b>pcre2_match()</b></a><br>
|
<br><a name="SEC27" href="#TOC1">ERROR RETURNS FROM <b>pcre2_match()</b></a><br>
|
||||||
<P>
|
<P>
|
||||||
If <b>pcre2_match()</b> fails, it returns a negative number. This can be
|
If <b>pcre2_match()</b> fails, it returns a negative number. This can be
|
||||||
converted to a text string by calling <b>pcre2_get_error_message()</b>. Negative
|
converted to a text string by calling <b>pcre2_get_error_message()</b>. Negative
|
||||||
|
@ -2190,7 +2207,7 @@ is attempted.
|
||||||
</pre>
|
</pre>
|
||||||
The internal recursion limit was reached.
|
The internal recursion limit was reached.
|
||||||
<a name="extractbynumber"></a></P>
|
<a name="extractbynumber"></a></P>
|
||||||
<br><a name="SEC27" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a><br>
|
<br><a name="SEC28" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre2_substring_length_bynumber(pcre2_match_data *<i>match_data</i>,</b>
|
<b>int pcre2_substring_length_bynumber(pcre2_match_data *<i>match_data</i>,</b>
|
||||||
<b> unsigned int <i>number</i>, PCRE2_SIZE *<i>length</i>);</b>
|
<b> unsigned int <i>number</i>, PCRE2_SIZE *<i>length</i>);</b>
|
||||||
|
@ -2262,7 +2279,7 @@ no capturing group of that number in the pattern, or because the group with
|
||||||
that number did not participate in the match, or because the ovector was too
|
that number did not participate in the match, or because the ovector was too
|
||||||
small to capture that group.
|
small to capture that group.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC28" href="#TOC1">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a><br>
|
<br><a name="SEC29" href="#TOC1">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre2_substring_list_get(pcre2_match_data *<i>match_data</i>,</b>
|
<b>int pcre2_substring_list_get(pcre2_match_data *<i>match_data</i>,</b>
|
||||||
<b>" PCRE2_UCHAR ***<i>listptr</i>, PCRE2_SIZE **<i>lengthsptr</i>);</b>
|
<b>" PCRE2_UCHAR ***<i>listptr</i>, PCRE2_SIZE **<i>lengthsptr</i>);</b>
|
||||||
|
@ -2297,7 +2314,7 @@ can be distinguished from a genuine zero-length substring by inspecting the
|
||||||
appropriate offset in the ovector, which contain PCRE2_UNSET for unset
|
appropriate offset in the ovector, which contain PCRE2_UNSET for unset
|
||||||
substrings.
|
substrings.
|
||||||
<a name="extractbyname"></a></P>
|
<a name="extractbyname"></a></P>
|
||||||
<br><a name="SEC29" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a><br>
|
<br><a name="SEC30" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre2_substring_number_from_name(const pcre2_code *<i>code</i>,</b>
|
<b>int pcre2_substring_number_from_name(const pcre2_code *<i>code</i>,</b>
|
||||||
<b> PCRE2_SPTR <i>name</i>);</b>
|
<b> PCRE2_SPTR <i>name</i>);</b>
|
||||||
|
@ -2349,7 +2366,7 @@ names are not included in the compiled code. The matching process uses only
|
||||||
numbers. For this reason, the use of different names for subpatterns of the
|
numbers. For this reason, the use of different names for subpatterns of the
|
||||||
same number causes an error at compile time.
|
same number causes an error at compile time.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC30" href="#TOC1">CREATING A NEW STRING WITH SUBSTITUTIONS</a><br>
|
<br><a name="SEC31" href="#TOC1">CREATING A NEW STRING WITH SUBSTITUTIONS</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre2_substitute(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
<b>int pcre2_substitute(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
||||||
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
||||||
|
@ -2410,7 +2427,7 @@ straight back. PCRE2_ERROR_BADREPLACEMENT is returned for an invalid
|
||||||
replacement string (unrecognized sequence following a dollar sign), and
|
replacement string (unrecognized sequence following a dollar sign), and
|
||||||
PCRE2_ERROR_NOMEMORY is returned if the output buffer is not big enough.
|
PCRE2_ERROR_NOMEMORY is returned if the output buffer is not big enough.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC31" href="#TOC1">DUPLICATE SUBPATTERN NAMES</a><br>
|
<br><a name="SEC32" href="#TOC1">DUPLICATE SUBPATTERN NAMES</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre2_substring_nametable_scan(const pcre2_code *<i>code</i>,</b>
|
<b>int pcre2_substring_nametable_scan(const pcre2_code *<i>code</i>,</b>
|
||||||
<b> PCRE2_SPTR <i>name</i>, PCRE2_SPTR *<i>first</i>, PCRE2_SPTR *<i>last</i>);</b>
|
<b> PCRE2_SPTR <i>name</i>, PCRE2_SPTR *<i>first</i>, PCRE2_SPTR *<i>last</i>);</b>
|
||||||
|
@ -2455,7 +2472,7 @@ The format of the name table is described above in the section entitled
|
||||||
Given all the relevant entries for the name, you can extract each of their
|
Given all the relevant entries for the name, you can extract each of their
|
||||||
numbers, and hence the captured data.
|
numbers, and hence the captured data.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC32" href="#TOC1">FINDING ALL POSSIBLE MATCHES AT ONE POSITION</a><br>
|
<br><a name="SEC33" href="#TOC1">FINDING ALL POSSIBLE MATCHES AT ONE POSITION</a><br>
|
||||||
<P>
|
<P>
|
||||||
The traditional matching function uses a similar algorithm to Perl, which stops
|
The traditional matching function uses a similar algorithm to Perl, which stops
|
||||||
when it finds the first match at a given point in the subject. If you want to
|
when it finds the first match at a given point in the subject. If you want to
|
||||||
|
@ -2473,7 +2490,7 @@ substring. Then return 1, which forces <b>pcre2_match()</b> to backtrack and try
|
||||||
other alternatives. Ultimately, when it runs out of matches,
|
other alternatives. Ultimately, when it runs out of matches,
|
||||||
<b>pcre2_match()</b> will yield PCRE2_ERROR_NOMATCH.
|
<b>pcre2_match()</b> will yield PCRE2_ERROR_NOMATCH.
|
||||||
<a name="dfamatch"></a></P>
|
<a name="dfamatch"></a></P>
|
||||||
<br><a name="SEC33" href="#TOC1">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a><br>
|
<br><a name="SEC34" href="#TOC1">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre2_dfa_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
<b>int pcre2_dfa_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
||||||
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
||||||
|
@ -2647,13 +2664,13 @@ some plausibility checks are made on the contents of the workspace, which
|
||||||
should contain data about the previous partial match. If any of these checks
|
should contain data about the previous partial match. If any of these checks
|
||||||
fail, this error is given.
|
fail, this error is given.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC34" href="#TOC1">SEE ALSO</a><br>
|
<br><a name="SEC35" href="#TOC1">SEE ALSO</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>pcre2build</b>(3), <b>pcre2callout</b>(3), <b>pcre2demo(3)</b>,
|
<b>pcre2build</b>(3), <b>pcre2callout</b>(3), <b>pcre2demo(3)</b>,
|
||||||
<b>pcre2matching</b>(3), <b>pcre2partial</b>(3), <b>pcre2posix</b>(3),
|
<b>pcre2matching</b>(3), <b>pcre2partial</b>(3), <b>pcre2posix</b>(3),
|
||||||
<b>pcre2sample</b>(3), <b>pcre2stack</b>(3), <b>pcre2unicode</b>(3).
|
<b>pcre2sample</b>(3), <b>pcre2stack</b>(3), <b>pcre2unicode</b>(3).
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC35" href="#TOC1">AUTHOR</a><br>
|
<br><a name="SEC36" href="#TOC1">AUTHOR</a><br>
|
||||||
<P>
|
<P>
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
<br>
|
<br>
|
||||||
|
@ -2662,9 +2679,9 @@ University Computing Service
|
||||||
Cambridge, England.
|
Cambridge, England.
|
||||||
<br>
|
<br>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC36" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC37" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 23 November 2014
|
Last updated: 26 November 2014
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2014 University of Cambridge.
|
Copyright © 1997-2014 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -26,7 +26,7 @@ please consult the man page, in case the conversion went wrong.
|
||||||
<b>#include <pcre2.h></b>
|
<b>#include <pcre2.h></b>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>int (*pcre2_callout)(pcre2_callout_block *);</b>
|
<b>int (*pcre2_callout)(pcre2_callout_block *, void *);</b>
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
|
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -137,14 +137,17 @@ callouts such as the example above are obeyed.
|
||||||
<P>
|
<P>
|
||||||
During matching, when PCRE2 reaches a callout point, if an external function is
|
During matching, when PCRE2 reaches a callout point, if an external function is
|
||||||
set in the match context, it is called. This applies to both normal and DFA
|
set in the match context, it is called. This applies to both normal and DFA
|
||||||
matching. The only argument to the callout function is a pointer to a
|
matching. The first argument to the callout function is a pointer to a
|
||||||
<b>pcre2_callout</b> block. This structure contains the following fields:
|
<b>pcre2_callout</b> block. The second argument is the void * callout data that
|
||||||
|
was supplied when the callout was set up by calling <b>pcre2_set_callout()</b>
|
||||||
|
(see the
|
||||||
|
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||||
|
documentation). The callout block structure contains the following fields:
|
||||||
<pre>
|
<pre>
|
||||||
uint32_t <i>version</i>;
|
uint32_t <i>version</i>;
|
||||||
uint32_t <i>callout_number</i>;
|
uint32_t <i>callout_number</i>;
|
||||||
uint32_t <i>capture_top</i>;
|
uint32_t <i>capture_top</i>;
|
||||||
uint32_t <i>capture_last</i>;
|
uint32_t <i>capture_last</i>;
|
||||||
void *<i>callout_data</i>;
|
|
||||||
PCRE2_SIZE *<i>offset_vector</i>;
|
PCRE2_SIZE *<i>offset_vector</i>;
|
||||||
PCRE2_SPTR <i>mark</i>;
|
PCRE2_SPTR <i>mark</i>;
|
||||||
PCRE2_SPTR <i>subject</i>;
|
PCRE2_SPTR <i>subject</i>;
|
||||||
|
@ -203,14 +206,6 @@ substrings have been captured, the value of <i>capture_last</i> is 0. This is
|
||||||
always the case for the DFA matching functions.
|
always the case for the DFA matching functions.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <i>callout_data</i> field contains a value that is passed to a matching
|
|
||||||
function specifically so that it can be passed back in callouts. It is set in
|
|
||||||
the match context when the callout is set up by calling
|
|
||||||
<b>pcre2_set_callout()</b> (see the
|
|
||||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
|
||||||
documentation).
|
|
||||||
</P>
|
|
||||||
<P>
|
|
||||||
The <i>pattern_position</i> field contains the offset to the next item to be
|
The <i>pattern_position</i> field contains the offset to the next item to be
|
||||||
matched in the pattern string.
|
matched in the pattern string.
|
||||||
</P>
|
</P>
|
||||||
|
@ -259,7 +254,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC7" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC7" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 23 November 2014
|
Last updated: 25 November 2014
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2014 University of Cambridge.
|
Copyright © 1997-2014 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -32,6 +32,21 @@ However, the speed of execution is slower. In the 32-bit library, the internal
|
||||||
linkage size is always 4.
|
linkage size is always 4.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
The maximum length (in code units) of a subject string is one less than the
|
||||||
|
largest number a PCRE2_SIZE variable can hold. PCRE2_SIZE is an unsigned
|
||||||
|
integer type, usually defined as size_t. Its maximum value (that is
|
||||||
|
~(PCRE2_SIZE)0) is reserved as a special indicator for zero-terminated strings
|
||||||
|
and unset offsets.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
Note that when using the traditional matching function, PCRE2 uses recursion to
|
||||||
|
handle subpatterns and indefinite repetition. This means that the available
|
||||||
|
stack space may limit the size of a subject string that can be processed by
|
||||||
|
certain patterns. For a discussion of stack issues, see the
|
||||||
|
<a href="pcre2stack.html"><b>pcre2stack</b></a>
|
||||||
|
documentation.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
All values in repeating quantifiers must be less than 65536.
|
All values in repeating quantifiers must be less than 65536.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -55,16 +70,6 @@ maximum number of named subpatterns is 10000.
|
||||||
The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb
|
The maximum length of a name in a (*MARK), (*PRUNE), (*SKIP), or (*THEN) verb
|
||||||
is 255 for the 8-bit library and 65535 for the 16-bit and 32-bit libraries.
|
is 255 for the 8-bit library and 65535 for the 16-bit and 32-bit libraries.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
|
||||||
The maximum length of a subject string is the largest number a PCRE2_SIZE
|
|
||||||
variable can hold. PCRE2_SIZE is an unsigned integer type, usually defined as
|
|
||||||
size_t. However, when using the traditional matching function, PCRE2 uses
|
|
||||||
recursion to handle subpatterns and indefinite repetition. This means that the
|
|
||||||
available stack space may limit the size of a subject string that can be
|
|
||||||
processed by certain patterns. For a discussion of stack issues, see the
|
|
||||||
<a href="pcre2stack.html"><b>pcre2stack</b></a>
|
|
||||||
documentation.
|
|
||||||
</P>
|
|
||||||
<br><b>
|
<br><b>
|
||||||
AUTHOR
|
AUTHOR
|
||||||
</b><br>
|
</b><br>
|
||||||
|
@ -80,7 +85,7 @@ Cambridge, England.
|
||||||
REVISION
|
REVISION
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 29 September 2014
|
Last updated: 25 November 2014
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2014 University of Cambridge.
|
Copyright © 1997-2014 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
177
doc/pcre2.txt
177
doc/pcre2.txt
|
@ -248,7 +248,7 @@ PCRE2 NATIVE API COMPILE CONTEXT FUNCTIONS
|
||||||
uint32_t value);
|
uint32_t value);
|
||||||
|
|
||||||
int pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
|
int pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
|
||||||
int (*guard_function)(uint32_t));
|
int (*guard_function)(uint32_t, void *), void *user_data);
|
||||||
|
|
||||||
|
|
||||||
PCRE2 NATIVE API MATCH CONTEXT FUNCTIONS
|
PCRE2 NATIVE API MATCH CONTEXT FUNCTIONS
|
||||||
|
@ -262,7 +262,7 @@ PCRE2 NATIVE API MATCH CONTEXT FUNCTIONS
|
||||||
void pcre2_match_context_free(pcre2_match_context *mcontext);
|
void pcre2_match_context_free(pcre2_match_context *mcontext);
|
||||||
|
|
||||||
int pcre2_set_callout(pcre2_match_context *mcontext,
|
int pcre2_set_callout(pcre2_match_context *mcontext,
|
||||||
int (*callout_function)(pcre2_callout_block *),
|
int (*callout_function)(pcre2_callout_block *, void *),
|
||||||
void *callout_data);
|
void *callout_data);
|
||||||
|
|
||||||
int pcre2_set_match_limit(pcre2_match_context *mcontext,
|
int pcre2_set_match_limit(pcre2_match_context *mcontext,
|
||||||
|
@ -492,6 +492,17 @@ PCRE2 API OVERVIEW
|
||||||
which PCRE2 was built (pcre2_config()).
|
which PCRE2 was built (pcre2_config()).
|
||||||
|
|
||||||
|
|
||||||
|
STRING LENGTHS AND OFFSETS
|
||||||
|
|
||||||
|
The PCRE2 API uses string lengths and offsets into strings of code
|
||||||
|
units in several places. These values are always of type PCRE2_SIZE,
|
||||||
|
which is an unsigned integer type, currently always defined as size_t.
|
||||||
|
The largest value that can be stored in such a type (that is
|
||||||
|
~(PCRE2_SIZE)0) is reserved as a special indicator for zero-terminated
|
||||||
|
strings and unset offsets. Therefore, the longest string that can be
|
||||||
|
handled is one less than this maximum.
|
||||||
|
|
||||||
|
|
||||||
NEWLINES
|
NEWLINES
|
||||||
|
|
||||||
PCRE2 supports five different conventions for indicating line breaks in
|
PCRE2 supports five different conventions for indicating line breaks in
|
||||||
|
@ -694,16 +705,20 @@ PCRE2 CONTEXTS
|
||||||
rogue patterns using up too much system stack when being compiled.
|
rogue patterns using up too much system stack when being compiled.
|
||||||
|
|
||||||
int pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
|
int pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
|
||||||
int (*guard_function)(uint32_t));
|
int (*guard_function)(uint32_t, void *), void *user_data);
|
||||||
|
|
||||||
There is at least one application that runs PCRE2 in threads with very
|
There is at least one application that runs PCRE2 in threads with very
|
||||||
limited system stack, where running out of stack is to be avoided at
|
limited system stack, where running out of stack is to be avoided at
|
||||||
all costs. The parenthesis limit above cannot take account of how much
|
all costs. The parenthesis limit above cannot take account of how much
|
||||||
stack is actually available. For a finer control, you can supply a
|
stack is actually available. For a finer control, you can supply a
|
||||||
function that is called whenever pcre2_compile() starts to compile a
|
function that is called whenever pcre2_compile() starts to compile a
|
||||||
parenthesized part of a pattern. The argument to the function gives the
|
parenthesized part of a pattern. This function can check the actual
|
||||||
current depth of nesting. The function should return zero if all is
|
stack size (or anything else that it wants to, of course).
|
||||||
well, or non-zero to force an error.
|
|
||||||
|
The first argument to the callout function gives the current depth of
|
||||||
|
nesting, and the second is user data that is set up by the last argu-
|
||||||
|
ment of pcre2_set_compile_recursion_guard(). The callout function
|
||||||
|
should return zero if all is well, or non-zero to force an error.
|
||||||
|
|
||||||
The match context
|
The match context
|
||||||
|
|
||||||
|
@ -734,7 +749,7 @@ PCRE2 CONTEXTS
|
||||||
on success, or PCRE2_ERROR_BADDATA if invalid data is detected.
|
on success, or PCRE2_ERROR_BADDATA if invalid data is detected.
|
||||||
|
|
||||||
int pcre2_set_callout(pcre2_match_context *mcontext,
|
int pcre2_set_callout(pcre2_match_context *mcontext,
|
||||||
int (*callout_function)(pcre2_callout_block *),
|
int (*callout_function)(pcre2_callout_block *, void *),
|
||||||
void *callout_data);
|
void *callout_data);
|
||||||
|
|
||||||
This sets up a "callout" function, which PCRE2 will call at specified
|
This sets up a "callout" function, which PCRE2 will call at specified
|
||||||
|
@ -853,16 +868,16 @@ CHECKING BUILD-TIME OPTIONS
|
||||||
|
|
||||||
PCRE2_CONFIG_BSR
|
PCRE2_CONFIG_BSR
|
||||||
|
|
||||||
The output is an integer whose value indicates what character sequences
|
The output is a uint32_t integer whose value indicates what character
|
||||||
the \R escape sequence matches by default. A value of PCRE2_BSR_UNICODE
|
sequences the \R escape sequence matches by default. A value of
|
||||||
means that \R matches any Unicode line ending sequence; a value of
|
PCRE2_BSR_UNICODE means that \R matches any Unicode line ending
|
||||||
PCRE2_BSR_ANYCRLF means that \R matches only CR, LF, or CRLF. The
|
sequence; a value of PCRE2_BSR_ANYCRLF means that \R matches only CR,
|
||||||
default can be overridden when a pattern is compiled.
|
LF, or CRLF. The default can be overridden when a pattern is compiled.
|
||||||
|
|
||||||
PCRE2_CONFIG_JIT
|
PCRE2_CONFIG_JIT
|
||||||
|
|
||||||
The output is an integer that is set to one if support for just-in-time
|
The output is a uint32_t integer that is set to one if support for
|
||||||
compiling is available; otherwise it is set to zero.
|
just-in-time compiling is available; otherwise it is set to zero.
|
||||||
|
|
||||||
PCRE2_CONFIG_JITTARGET
|
PCRE2_CONFIG_JITTARGET
|
||||||
|
|
||||||
|
@ -877,13 +892,13 @@ CHECKING BUILD-TIME OPTIONS
|
||||||
|
|
||||||
PCRE2_CONFIG_LINKSIZE
|
PCRE2_CONFIG_LINKSIZE
|
||||||
|
|
||||||
The output is an integer that contains the number of bytes used for
|
The output is a uint32_t integer that contains the number of bytes used
|
||||||
internal linkage in compiled regular expressions. When PCRE2 is config-
|
for internal linkage in compiled regular expressions. When PCRE2 is
|
||||||
ured, the value can be set to 2, 3, or 4, with the default being 2.
|
configured, the value can be set to 2, 3, or 4, with the default being
|
||||||
This is the value that is returned by pcre2_config(). However, when the
|
2. This is the value that is returned by pcre2_config(). However, when
|
||||||
16-bit library is compiled, a value of 3 is rounded up to 4, and when
|
the 16-bit library is compiled, a value of 3 is rounded up to 4, and
|
||||||
the 32-bit library is compiled, internal linkages always use 4 bytes,
|
when the 32-bit library is compiled, internal linkages always use 4
|
||||||
so the configured value is not relevant.
|
bytes, so the configured value is not relevant.
|
||||||
|
|
||||||
The default value of 2 for the 8-bit and 16-bit libraries is sufficient
|
The default value of 2 for the 8-bit and 16-bit libraries is sufficient
|
||||||
for all but the most massive patterns, since it allows the size of the
|
for all but the most massive patterns, since it allows the size of the
|
||||||
|
@ -893,14 +908,15 @@ CHECKING BUILD-TIME OPTIONS
|
||||||
|
|
||||||
PCRE2_CONFIG_MATCHLIMIT
|
PCRE2_CONFIG_MATCHLIMIT
|
||||||
|
|
||||||
The output is an unsigned long integer that gives the default limit for
|
The output is a uint32_t integer that gives the default limit for the
|
||||||
the number of internal matching function calls in a pcre2_match() exe-
|
number of internal matching function calls in a pcre2_match() execu-
|
||||||
cution. Further details are given with pcre2_match() below.
|
tion. Further details are given with pcre2_match() below.
|
||||||
|
|
||||||
PCRE2_CONFIG_NEWLINE
|
PCRE2_CONFIG_NEWLINE
|
||||||
|
|
||||||
The output is an integer whose value specifies the default character
|
The output is a uint32_t integer whose value specifies the default
|
||||||
sequence that is recognized as meaning "newline". The values are:
|
character sequence that is recognized as meaning "newline". The values
|
||||||
|
are:
|
||||||
|
|
||||||
PCRE2_NEWLINE_CR Carriage return (CR)
|
PCRE2_NEWLINE_CR Carriage return (CR)
|
||||||
PCRE2_NEWLINE_LF Linefeed (LF)
|
PCRE2_NEWLINE_LF Linefeed (LF)
|
||||||
|
@ -908,33 +924,34 @@ CHECKING BUILD-TIME OPTIONS
|
||||||
PCRE2_NEWLINE_ANY Any Unicode line ending
|
PCRE2_NEWLINE_ANY Any Unicode line ending
|
||||||
PCRE2_NEWLINE_ANYCRLF Any of CR, LF, or CRLF
|
PCRE2_NEWLINE_ANYCRLF Any of CR, LF, or CRLF
|
||||||
|
|
||||||
The default should normally correspond to the standard sequence for
|
The default should normally correspond to the standard sequence for
|
||||||
your operating system.
|
your operating system.
|
||||||
|
|
||||||
PCRE2_CONFIG_PARENSLIMIT
|
PCRE2_CONFIG_PARENSLIMIT
|
||||||
|
|
||||||
The output is an unsigned long integer that gives the maximum depth of
|
The output is a uint32_t integer that gives the maximum depth of nest-
|
||||||
nesting of parentheses (of any kind) in a pattern. This limit is
|
ing of parentheses (of any kind) in a pattern. This limit is imposed to
|
||||||
imposed to cap the amount of system stack used when a pattern is com-
|
cap the amount of system stack used when a pattern is compiled. It is
|
||||||
piled. It is specified when PCRE2 is built; the default is 250. This
|
specified when PCRE2 is built; the default is 250. This limit does not
|
||||||
limit does not take into account the stack that may already be used by
|
take into account the stack that may already be used by the calling
|
||||||
the calling application. For finer control over compilation stack
|
application. For finer control over compilation stack usage, see
|
||||||
usage, see pcre2_set_compile_recursion_guard().
|
pcre2_set_compile_recursion_guard().
|
||||||
|
|
||||||
PCRE2_CONFIG_RECURSIONLIMIT
|
PCRE2_CONFIG_RECURSIONLIMIT
|
||||||
|
|
||||||
The output is an unsigned long integer that gives the default limit for
|
The output is a uint32_t integer that gives the default limit for the
|
||||||
the depth of recursion when calling the internal matching function in a
|
depth of recursion when calling the internal matching function in a
|
||||||
pcre2_match() execution. Further details are given with pcre2_match()
|
pcre2_match() execution. Further details are given with pcre2_match()
|
||||||
below.
|
below.
|
||||||
|
|
||||||
PCRE2_CONFIG_STACKRECURSE
|
PCRE2_CONFIG_STACKRECURSE
|
||||||
|
|
||||||
The output is an integer that is set to one if internal recursion when
|
The output is a uint32_t integer that is set to one if internal recur-
|
||||||
running pcre2_match() is implemented by recursive function calls that
|
sion when running pcre2_match() is implemented by recursive function
|
||||||
use the system stack to remember their state. This is the usual way
|
calls that use the system stack to remember their state. This is the
|
||||||
that PCRE2 is compiled. The output is zero if PCRE2 was compiled to use
|
usual way that PCRE2 is compiled. The output is zero if PCRE2 was com-
|
||||||
blocks of data on the heap instead of recursive function calls.
|
piled to use blocks of data on the heap instead of recursive function
|
||||||
|
calls.
|
||||||
|
|
||||||
PCRE2_CONFIG_UNICODE_VERSION
|
PCRE2_CONFIG_UNICODE_VERSION
|
||||||
|
|
||||||
|
@ -948,8 +965,8 @@ CHECKING BUILD-TIME OPTIONS
|
||||||
|
|
||||||
PCRE2_CONFIG_UNICODE
|
PCRE2_CONFIG_UNICODE
|
||||||
|
|
||||||
The output is an integer that is set to one if Unicode support is
|
The output is a uint32_t integer that is set to one if Unicode support
|
||||||
available; otherwise it is set to zero. Unicode support implies UTF
|
is available; otherwise it is set to zero. Unicode support implies UTF
|
||||||
support.
|
support.
|
||||||
|
|
||||||
PCRE2_CONFIG_VERSION
|
PCRE2_CONFIG_VERSION
|
||||||
|
@ -2605,7 +2622,7 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 23 November 2014
|
Last updated: 26 November 2014
|
||||||
Copyright (c) 1997-2014 University of Cambridge.
|
Copyright (c) 1997-2014 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@ -3076,7 +3093,7 @@ SYNOPSIS
|
||||||
|
|
||||||
#include <pcre2.h>
|
#include <pcre2.h>
|
||||||
|
|
||||||
int (*pcre2_callout)(pcre2_callout_block *);
|
int (*pcre2_callout)(pcre2_callout_block *, void *);
|
||||||
|
|
||||||
|
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
|
@ -3183,15 +3200,16 @@ THE CALLOUT INTERFACE
|
||||||
|
|
||||||
During matching, when PCRE2 reaches a callout point, if an external
|
During matching, when PCRE2 reaches a callout point, if an external
|
||||||
function is set in the match context, it is called. This applies to
|
function is set in the match context, it is called. This applies to
|
||||||
both normal and DFA matching. The only argument to the callout function
|
both normal and DFA matching. The first argument to the callout func-
|
||||||
is a pointer to a pcre2_callout block. This structure contains the fol-
|
tion is a pointer to a pcre2_callout block. The second argument is the
|
||||||
lowing fields:
|
void * callout data that was supplied when the callout was set up by
|
||||||
|
calling pcre2_set_callout() (see the pcre2api documentation). The call-
|
||||||
|
out block structure contains the following fields:
|
||||||
|
|
||||||
uint32_t version;
|
uint32_t version;
|
||||||
uint32_t callout_number;
|
uint32_t callout_number;
|
||||||
uint32_t capture_top;
|
uint32_t capture_top;
|
||||||
uint32_t capture_last;
|
uint32_t capture_last;
|
||||||
void *callout_data;
|
|
||||||
PCRE2_SIZE *offset_vector;
|
PCRE2_SIZE *offset_vector;
|
||||||
PCRE2_SPTR mark;
|
PCRE2_SPTR mark;
|
||||||
PCRE2_SPTR subject;
|
PCRE2_SPTR subject;
|
||||||
|
@ -3242,28 +3260,23 @@ THE CALLOUT INTERFACE
|
||||||
substrings. If no substrings have been captured, the value of cap-
|
substrings. If no substrings have been captured, the value of cap-
|
||||||
ture_last is 0. This is always the case for the DFA matching functions.
|
ture_last is 0. This is always the case for the DFA matching functions.
|
||||||
|
|
||||||
The callout_data field contains a value that is passed to a matching
|
The pattern_position field contains the offset to the next item to be
|
||||||
function specifically so that it can be passed back in callouts. It is
|
|
||||||
set in the match context when the callout is set up by calling
|
|
||||||
pcre2_set_callout() (see the pcre2api documentation).
|
|
||||||
|
|
||||||
The pattern_position field contains the offset to the next item to be
|
|
||||||
matched in the pattern string.
|
matched in the pattern string.
|
||||||
|
|
||||||
The next_item_length field contains the length of the next item to be
|
The next_item_length field contains the length of the next item to be
|
||||||
matched in the pattern string. When the callout immediately precedes an
|
matched in the pattern string. When the callout immediately precedes an
|
||||||
alternation bar, a closing parenthesis, or the end of the pattern, the
|
alternation bar, a closing parenthesis, or the end of the pattern, the
|
||||||
length is zero. When the callout precedes an opening parenthesis, the
|
length is zero. When the callout precedes an opening parenthesis, the
|
||||||
length is that of the entire subpattern.
|
length is that of the entire subpattern.
|
||||||
|
|
||||||
The pattern_position and next_item_length fields are intended to help
|
The pattern_position and next_item_length fields are intended to help
|
||||||
in distinguishing between different automatic callouts, which all have
|
in distinguishing between different automatic callouts, which all have
|
||||||
the same callout number. However, they are set for all callouts.
|
the same callout number. However, they are set for all callouts.
|
||||||
|
|
||||||
In callouts from pcre2_match() the mark field contains a pointer to the
|
In callouts from pcre2_match() the mark field contains a pointer to the
|
||||||
zero-terminated name of the most recently passed (*MARK), (*PRUNE), or
|
zero-terminated name of the most recently passed (*MARK), (*PRUNE), or
|
||||||
(*THEN) item in the match, or NULL if no such items have been passed.
|
(*THEN) item in the match, or NULL if no such items have been passed.
|
||||||
Instances of (*PRUNE) or (*THEN) without a name do not obliterate a
|
Instances of (*PRUNE) or (*THEN) without a name do not obliterate a
|
||||||
previous (*MARK). In callouts from the DFA matching function this field
|
previous (*MARK). In callouts from the DFA matching function this field
|
||||||
always contains NULL.
|
always contains NULL.
|
||||||
|
|
||||||
|
@ -3271,16 +3284,16 @@ THE CALLOUT INTERFACE
|
||||||
RETURN VALUES
|
RETURN VALUES
|
||||||
|
|
||||||
The external callout function returns an integer to PCRE2. If the value
|
The external callout function returns an integer to PCRE2. If the value
|
||||||
is zero, matching proceeds as normal. If the value is greater than
|
is zero, matching proceeds as normal. If the value is greater than
|
||||||
zero, matching fails at the current point, but the testing of other
|
zero, matching fails at the current point, but the testing of other
|
||||||
matching possibilities goes ahead, just as if a lookahead assertion had
|
matching possibilities goes ahead, just as if a lookahead assertion had
|
||||||
failed. If the value is less than zero, the match is abandoned, and the
|
failed. If the value is less than zero, the match is abandoned, and the
|
||||||
matching function returns the negative value.
|
matching function returns the negative value.
|
||||||
|
|
||||||
Negative values should normally be chosen from the set of
|
Negative values should normally be chosen from the set of
|
||||||
PCRE2_ERROR_xxx values. In particular, PCRE2_ERROR_NOMATCH forces a
|
PCRE2_ERROR_xxx values. In particular, PCRE2_ERROR_NOMATCH forces a
|
||||||
standard "no match" failure. The error number PCRE2_ERROR_CALLOUT is
|
standard "no match" failure. The error number PCRE2_ERROR_CALLOUT is
|
||||||
reserved for use by callout functions; it will never be used by PCRE2
|
reserved for use by callout functions; it will never be used by PCRE2
|
||||||
itself.
|
itself.
|
||||||
|
|
||||||
|
|
||||||
|
@ -3293,7 +3306,7 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 23 November 2014
|
Last updated: 25 November 2014
|
||||||
Copyright (c) 1997-2014 University of Cambridge.
|
Copyright (c) 1997-2014 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@ -3891,6 +3904,18 @@ SIZE AND OTHER LIMITATIONS
|
||||||
of execution is slower. In the 32-bit library, the internal linkage
|
of execution is slower. In the 32-bit library, the internal linkage
|
||||||
size is always 4.
|
size is always 4.
|
||||||
|
|
||||||
|
The maximum length (in code units) of a subject string is one less than
|
||||||
|
the largest number a PCRE2_SIZE variable can hold. PCRE2_SIZE is an
|
||||||
|
unsigned integer type, usually defined as size_t. Its maximum value
|
||||||
|
(that is ~(PCRE2_SIZE)0) is reserved as a special indicator for zero-
|
||||||
|
terminated strings and unset offsets.
|
||||||
|
|
||||||
|
Note that when using the traditional matching function, PCRE2 uses
|
||||||
|
recursion to handle subpatterns and indefinite repetition. This means
|
||||||
|
that the available stack space may limit the size of a subject string
|
||||||
|
that can be processed by certain patterns. For a discussion of stack
|
||||||
|
issues, see the pcre2stack documentation.
|
||||||
|
|
||||||
All values in repeating quantifiers must be less than 65536.
|
All values in repeating quantifiers must be less than 65536.
|
||||||
|
|
||||||
There is no limit to the number of parenthesized subpatterns, but there
|
There is no limit to the number of parenthesized subpatterns, but there
|
||||||
|
@ -3913,14 +3938,6 @@ SIZE AND OTHER LIMITATIONS
|
||||||
(*THEN) verb is 255 for the 8-bit library and 65535 for the 16-bit and
|
(*THEN) verb is 255 for the 8-bit library and 65535 for the 16-bit and
|
||||||
32-bit libraries.
|
32-bit libraries.
|
||||||
|
|
||||||
The maximum length of a subject string is the largest number a
|
|
||||||
PCRE2_SIZE variable can hold. PCRE2_SIZE is an unsigned integer type,
|
|
||||||
usually defined as size_t. However, when using the traditional matching
|
|
||||||
function, PCRE2 uses recursion to handle subpatterns and indefinite
|
|
||||||
repetition. This means that the available stack space may limit the
|
|
||||||
size of a subject string that can be processed by certain patterns. For
|
|
||||||
a discussion of stack issues, see the pcre2stack documentation.
|
|
||||||
|
|
||||||
|
|
||||||
AUTHOR
|
AUTHOR
|
||||||
|
|
||||||
|
@ -3931,7 +3948,7 @@ AUTHOR
|
||||||
|
|
||||||
REVISION
|
REVISION
|
||||||
|
|
||||||
Last updated: 29 September 2014
|
Last updated: 25 November 2014
|
||||||
Copyright (c) 1997-2014 University of Cambridge.
|
Copyright (c) 1997-2014 University of Cambridge.
|
||||||
------------------------------------------------------------------------------
|
------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,7 @@ PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.PP
|
.PP
|
||||||
.nf
|
.nf
|
||||||
.B int pcre2_set_compile_recursion_guard(pcre2_compile_context *\fIccontext\fP,
|
.B int pcre2_set_compile_recursion_guard(pcre2_compile_context *\fIccontext\fP,
|
||||||
.B " int (*\fIguard_function\fP)(uint32_t));"
|
.B " int (*\fIguard_function\fP)(uint32_t, void *), void *\fIuser_data\fP);"
|
||||||
.fi
|
.fi
|
||||||
.
|
.
|
||||||
.SH DESCRIPTION
|
.SH DESCRIPTION
|
||||||
|
@ -16,11 +16,12 @@ PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.sp
|
.sp
|
||||||
This function defines, within a compile context, a function that is called
|
This function defines, within a compile context, a function that is called
|
||||||
whenever \fBpcre2_compile()\fP starts to compile a parenthesized part of a
|
whenever \fBpcre2_compile()\fP starts to compile a parenthesized part of a
|
||||||
pattern. The argument to the function gives the current depth of parenthesis
|
pattern. The first argument to the function gives the current depth of
|
||||||
nesting. The function should return zero if all is well, or non-zero to force
|
parenthesis nesting, and the second is user data that is supplied when the
|
||||||
an error. This feature is provided so that applications can check the available
|
function is set up. The callout function should return zero if all is well, or
|
||||||
system stack space, in order to avoid running out. The result of this function
|
non-zero to force an error. This feature is provided so that applications can
|
||||||
is always zero.
|
check the available system stack space, in order to avoid running out. The
|
||||||
|
result of \fBpcre2_set_compile_recursion_guard()\fP is always zero.
|
||||||
.P
|
.P
|
||||||
There is a complete description of the PCRE2 native API in the
|
There is a complete description of the PCRE2 native API in the
|
||||||
.\" HREF
|
.\" HREF
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2API 3 "25 November 2014" "PCRE2 10.00"
|
.TH PCRE2API 3 "26 November 2014" "PCRE2 10.00"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.sp
|
.sp
|
||||||
|
@ -97,7 +97,7 @@ document for an overview of all the PCRE2 documentation.
|
||||||
.B " uint32_t \fIvalue\fP);"
|
.B " uint32_t \fIvalue\fP);"
|
||||||
.sp
|
.sp
|
||||||
.B int pcre2_set_compile_recursion_guard(pcre2_compile_context *\fIccontext\fP,
|
.B int pcre2_set_compile_recursion_guard(pcre2_compile_context *\fIccontext\fP,
|
||||||
.B " int (*\fIguard_function\fP)(uint32_t));"
|
.B " int (*\fIguard_function\fP)(uint32_t, void *), void *\fIuser_data\fP);"
|
||||||
.fi
|
.fi
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
@ -375,11 +375,11 @@ PCRE2 was built (\fBpcre2_config()\fP).
|
||||||
.sp
|
.sp
|
||||||
The PCRE2 API uses string lengths and offsets into strings of code units in
|
The PCRE2 API uses string lengths and offsets into strings of code units in
|
||||||
several places. These values are always of type PCRE2_SIZE, which is an
|
several places. These values are always of type PCRE2_SIZE, which is an
|
||||||
unsigned integer type, currently always defined as \fIsize_t\fP. The largest
|
unsigned integer type, currently always defined as \fIsize_t\fP. The largest
|
||||||
value that can be stored in such a type (that is ~(PCRE2_SIZE)0) is reserved
|
value that can be stored in such a type (that is ~(PCRE2_SIZE)0) is reserved
|
||||||
as a special indicator for zero-terminated strings and unset offsets.
|
as a special indicator for zero-terminated strings and unset offsets.
|
||||||
Therefore, the longest string that can be handled is one less than this
|
Therefore, the longest string that can be handled is one less than this
|
||||||
maximum.
|
maximum.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.\" HTML <a name="newlines"></a>
|
.\" HTML <a name="newlines"></a>
|
||||||
|
@ -612,7 +612,7 @@ using up too much system stack when being compiled.
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
.B int pcre2_set_compile_recursion_guard(pcre2_compile_context *\fIccontext\fP,
|
.B int pcre2_set_compile_recursion_guard(pcre2_compile_context *\fIccontext\fP,
|
||||||
.B " int (*\fIguard_function\fP)(uint32_t));"
|
.B " int (*\fIguard_function\fP)(uint32_t, void *), void *\fIuser_data\fP);"
|
||||||
.fi
|
.fi
|
||||||
.sp
|
.sp
|
||||||
There is at least one application that runs PCRE2 in threads with very limited
|
There is at least one application that runs PCRE2 in threads with very limited
|
||||||
|
@ -620,8 +620,13 @@ system stack, where running out of stack is to be avoided at all costs. The
|
||||||
parenthesis limit above cannot take account of how much stack is actually
|
parenthesis limit above cannot take account of how much stack is actually
|
||||||
available. For a finer control, you can supply a function that is called
|
available. For a finer control, you can supply a function that is called
|
||||||
whenever \fBpcre2_compile()\fP starts to compile a parenthesized part of a
|
whenever \fBpcre2_compile()\fP starts to compile a parenthesized part of a
|
||||||
pattern. The argument to the function gives the current depth of nesting. The
|
pattern. This function can check the actual stack size (or anything else that
|
||||||
function should return zero if all is well, or non-zero to force an error.
|
it wants to, of course).
|
||||||
|
.P
|
||||||
|
The first argument to the callout function gives the current depth of
|
||||||
|
nesting, and the second is user data that is set up by the last argument of
|
||||||
|
\fBpcre2_set_compile_recursion_guard()\fP. The callout function should return
|
||||||
|
zero if all is well, or non-zero to force an error.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.\" HTML <a name="matchcontext"></a>
|
.\" HTML <a name="matchcontext"></a>
|
||||||
|
@ -2726,6 +2731,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 25 November 2014
|
Last updated: 26 November 2014
|
||||||
Copyright (c) 1997-2014 University of Cambridge.
|
Copyright (c) 1997-2014 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
41
maint/README
41
maint/README
|
@ -37,11 +37,11 @@ pcre2_chartables.c.non-standard
|
||||||
|
|
||||||
README This file.
|
README This file.
|
||||||
|
|
||||||
Unicode.tables The files in this directory (CaseFolding.txt,
|
Unicode.tables The files in this directory (CaseFolding.txt,
|
||||||
DerivedGeneralCategory.txt, GraphemeBreakProperty.txt,
|
DerivedGeneralCategory.txt, GraphemeBreakProperty.txt,
|
||||||
Scripts.txt and UnicodeData.txt) were downloaded from the
|
Scripts.txt and UnicodeData.txt) were downloaded from the
|
||||||
Unicode web site. They contain information about Unicode
|
Unicode web site. They contain information about Unicode
|
||||||
characters and scripts.
|
characters and scripts.
|
||||||
|
|
||||||
ucptest.c A short C program for testing the Unicode property macros
|
ucptest.c A short C program for testing the Unicode property macros
|
||||||
that do lookups in the pcre2_ucd.c data, mainly useful after
|
that do lookups in the pcre2_ucd.c data, mainly useful after
|
||||||
|
@ -87,21 +87,21 @@ Note also that both the pcre2syntax.3 and pcre2pattern.3 man pages contain
|
||||||
lists of Unicode script names.
|
lists of Unicode script names.
|
||||||
|
|
||||||
|
|
||||||
Preparing for a PCRE release
|
Preparing for a PCRE2 release
|
||||||
============================
|
=============================
|
||||||
|
|
||||||
This section contains a checklist of things that I consult before building a
|
This section contains a checklist of things that I consult before building a
|
||||||
distribution for a new release.
|
distribution for a new release.
|
||||||
|
|
||||||
. Ensure that the version number and version date are correct in configure.ac.
|
. Ensure that the version number and version date are correct in configure.ac.
|
||||||
|
|
||||||
. Update the library version numbers in configure.ac according to the rules
|
. Update the library version numbers in configure.ac according to the rules
|
||||||
given below.
|
given below.
|
||||||
|
|
||||||
. If new build options have been added, ensure that they are added to the CMake
|
. If new build options have been added, ensure that they are added to the CMake
|
||||||
files as well as to the autoconf files. The relevant files are CMakeLists.txt
|
files as well as to the autoconf files. The relevant files are CMakeLists.txt
|
||||||
and config-cmake.h.in. After making a release tarball, test it out with CMake
|
and config-cmake.h.in. After making a release tarball, test it out with CMake
|
||||||
if there have been changes here.
|
if there have been changes here.
|
||||||
|
|
||||||
. Run ./autogen.sh to ensure everything is up-to-date.
|
. Run ./autogen.sh to ensure everything is up-to-date.
|
||||||
|
|
||||||
|
@ -112,7 +112,7 @@ distribution for a new release.
|
||||||
different configurations, and it also runs some of them with valgrind, all of
|
different configurations, and it also runs some of them with valgrind, all of
|
||||||
which can take quite some time.
|
which can take quite some time.
|
||||||
|
|
||||||
. Run perltest.sh on the test data for tests 1 and 4. The output should match
|
. Run perltest.sh on the test data for tests 1 and 4. The output should match
|
||||||
the PCRE2 test output, apart from the version identification at the start of
|
the PCRE2 test output, apart from the version identification at the start of
|
||||||
each test. The other tests are not Perl-compatible (they use various
|
each test. The other tests are not Perl-compatible (they use various
|
||||||
PCRE2-specific features or options).
|
PCRE2-specific features or options).
|
||||||
|
@ -122,7 +122,7 @@ distribution for a new release.
|
||||||
may see a number of "pcre2_memmove defined but not used" warnings for the
|
may see a number of "pcre2_memmove defined but not used" warnings for the
|
||||||
modules in which there is no call to memmove(). These can be ignored.
|
modules in which there is no call to memmove(). These can be ignored.
|
||||||
|
|
||||||
. Documentation: check AUTHORS, ChangeLog (check version and date), LICENCE,
|
. Documentation: check AUTHORS, ChangeLog (check version and date), LICENCE,
|
||||||
NEWS (check version and date), NON-AUTOTOOLS-BUILD, and README. Many of these
|
NEWS (check version and date), NON-AUTOTOOLS-BUILD, and README. Many of these
|
||||||
won't need changing, but over the long term things do change.
|
won't need changing, but over the long term things do change.
|
||||||
|
|
||||||
|
@ -133,15 +133,15 @@ distribution for a new release.
|
||||||
pcre2test to increase the stack size for test 2. Since I retired I can no
|
pcre2test to increase the stack size for test 2. Since I retired I can no
|
||||||
longer do this, but instead I rely on putting out release candidates for
|
longer do this, but instead I rely on putting out release candidates for
|
||||||
folks on the pcre-dev list to test.
|
folks on the pcre-dev list to test.
|
||||||
|
|
||||||
. The buildbots at http://buildfarm.opencsw.org/ do some automated testing
|
. The buildbots at http://buildfarm.opencsw.org/ do some automated testing
|
||||||
of PCRE2 and should be checked before putting out a release.
|
of PCRE2 and should be checked before putting out a release.
|
||||||
|
|
||||||
|
|
||||||
Updating version info for libtool
|
Updating version info for libtool
|
||||||
=================================
|
=================================
|
||||||
|
|
||||||
This set of rules for updating library version information came from a web page
|
This set of rules for updating library version information came from a web page
|
||||||
whose URL I have forgotten. The version information consists of three parts:
|
whose URL I have forgotten. The version information consists of three parts:
|
||||||
(current, revision, age).
|
(current, revision, age).
|
||||||
|
|
||||||
|
@ -194,7 +194,7 @@ and the zipball. Double-check with "svn status", then create an SVN tagged
|
||||||
copy:
|
copy:
|
||||||
|
|
||||||
svn copy svn://vcs.exim.org/pcre2/code/trunk \
|
svn copy svn://vcs.exim.org/pcre2/code/trunk \
|
||||||
svn://vcs.exim.org/pcre2/code/tags/pcre-10.xx
|
svn://vcs.exim.org/pcre2/code/tags/pcre2-10.xx
|
||||||
|
|
||||||
When the new release is out, don't forget to tell webmaster@pcre.org and the
|
When the new release is out, don't forget to tell webmaster@pcre.org and the
|
||||||
mailing list. Also, update the list of version numbers in Bugzilla (edit
|
mailing list. Also, update the list of version numbers in Bugzilla (edit
|
||||||
|
@ -255,7 +255,7 @@ very sensible; some are rather wacky. Some have been on this list for years.
|
||||||
|
|
||||||
. An option to convert results into character offsets and character lengths.
|
. An option to convert results into character offsets and character lengths.
|
||||||
|
|
||||||
. An option for pcre2grep to scan only the start of a file. I am not keen -
|
. An option for pcre2grep to scan only the start of a file. I am not keen -
|
||||||
this is the job of "head".
|
this is the job of "head".
|
||||||
|
|
||||||
. A (non-Unix) user wanted pcregrep options to (a) list a file name just once,
|
. A (non-Unix) user wanted pcregrep options to (a) list a file name just once,
|
||||||
|
@ -282,14 +282,14 @@ very sensible; some are rather wacky. Some have been on this list for years.
|
||||||
|
|
||||||
. Callouts with arguments: (?Cn:ARG) for instance.
|
. Callouts with arguments: (?Cn:ARG) for instance.
|
||||||
|
|
||||||
. Write a function that generates random matching strings for a compiled
|
. Write a function that generates random matching strings for a compiled
|
||||||
pattern.
|
pattern.
|
||||||
|
|
||||||
. Pcre2grep: an option to specify the output line separator, either as a string
|
. Pcre2grep: an option to specify the output line separator, either as a string
|
||||||
or select from a fixed list. This is not straightforward, because at the
|
or select from a fixed list. This is not straightforward, because at the
|
||||||
moment it outputs whatever is in the input file.
|
moment it outputs whatever is in the input file.
|
||||||
|
|
||||||
. Improve the code for duplicate checking in pcre_dfa_match(). An incomplete,
|
. Improve the code for duplicate checking in pcre2_dfa_match(). An incomplete,
|
||||||
non-thread-safe patch showed that this can help performance for patterns
|
non-thread-safe patch showed that this can help performance for patterns
|
||||||
where there are many alternatives. However, a simple thread-safe
|
where there are many alternatives. However, a simple thread-safe
|
||||||
implementation that I tried made things worse in many simple cases, so this
|
implementation that I tried made things worse in many simple cases, so this
|
||||||
|
@ -303,7 +303,12 @@ very sensible; some are rather wacky. Some have been on this list for years.
|
||||||
. Instead of having #ifdef HAVE_CONFIG_H in each module, put #include
|
. Instead of having #ifdef HAVE_CONFIG_H in each module, put #include
|
||||||
"something" and the the #ifdef appears only in one place, in "something".
|
"something" and the the #ifdef appears only in one place, in "something".
|
||||||
|
|
||||||
|
. Implement something like (?(R2+)... to check outer recursions.
|
||||||
|
|
||||||
|
. If Perl ever supports the POSIX notation [[.something.]] PCRE2 should try
|
||||||
|
to follow.
|
||||||
|
|
||||||
Philip Hazel
|
Philip Hazel
|
||||||
Email local part: ph10
|
Email local part: ph10
|
||||||
Email domain: cam.ac.uk
|
Email domain: cam.ac.uk
|
||||||
Last updated: 18 November 2014
|
Last updated: 26 November 2014
|
||||||
|
|
|
@ -368,7 +368,8 @@ PCRE2_EXP_DECL int pcre2_set_newline(pcre2_compile_context *, uint32_t); \
|
||||||
PCRE2_EXP_DECL int pcre2_set_parens_nest_limit(pcre2_compile_context *, \
|
PCRE2_EXP_DECL int pcre2_set_parens_nest_limit(pcre2_compile_context *, \
|
||||||
uint32_t); \
|
uint32_t); \
|
||||||
PCRE2_EXP_DECL int pcre2_set_compile_recursion_guard(\
|
PCRE2_EXP_DECL int pcre2_set_compile_recursion_guard(\
|
||||||
pcre2_compile_context *, int (*)(uint32_t)); \
|
pcre2_compile_context *, int (*)(uint32_t, void *), \
|
||||||
|
void *);
|
||||||
|
|
||||||
#define PCRE2_MATCH_CONTEXT_FUNCTIONS \
|
#define PCRE2_MATCH_CONTEXT_FUNCTIONS \
|
||||||
PCRE2_EXP_DECL \
|
PCRE2_EXP_DECL \
|
||||||
|
|
|
@ -6646,7 +6646,8 @@ branch_chain bc;
|
||||||
|
|
||||||
/* If set, call the external function that checks for stack availability. */
|
/* If set, call the external function that checks for stack availability. */
|
||||||
|
|
||||||
if (cb->cx->stack_guard != NULL && cb->cx->stack_guard(cb->parens_depth))
|
if (cb->cx->stack_guard != NULL &&
|
||||||
|
cb->cx->stack_guard(cb->parens_depth, cb->cx->stack_guard_data))
|
||||||
{
|
{
|
||||||
*errorcodeptr= ERR33;
|
*errorcodeptr= ERR33;
|
||||||
return FALSE;
|
return FALSE;
|
||||||
|
|
|
@ -133,6 +133,7 @@ when no context is supplied to the compile function. */
|
||||||
const pcre2_compile_context PRIV(default_compile_context) = {
|
const pcre2_compile_context PRIV(default_compile_context) = {
|
||||||
{ default_malloc, default_free, NULL },
|
{ default_malloc, default_free, NULL },
|
||||||
NULL,
|
NULL,
|
||||||
|
NULL,
|
||||||
PRIV(default_tables),
|
PRIV(default_tables),
|
||||||
BSR_DEFAULT,
|
BSR_DEFAULT,
|
||||||
NEWLINE_DEFAULT,
|
NEWLINE_DEFAULT,
|
||||||
|
@ -320,9 +321,10 @@ return 0;
|
||||||
|
|
||||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
|
pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
|
||||||
int (*guard)(uint32_t))
|
int (*guard)(uint32_t, void *), void *user_data)
|
||||||
{
|
{
|
||||||
ccontext->stack_guard = guard;
|
ccontext->stack_guard = guard;
|
||||||
|
ccontext->stack_guard_data = user_data;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2623,7 +2623,7 @@ for (;;)
|
||||||
cb.current_position = (PCRE2_SIZE)(ptr - start_subject);
|
cb.current_position = (PCRE2_SIZE)(ptr - start_subject);
|
||||||
cb.pattern_position = GET(code, LINK_SIZE + 3);
|
cb.pattern_position = GET(code, LINK_SIZE + 3);
|
||||||
cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
|
cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
|
||||||
if ((rrc = (mb->callout)(&cb, mb->callout_data)) < 0)
|
if ((rrc = (mb->callout)(&cb, mb->callout_data)) < 0)
|
||||||
return rrc; /* Abandon */
|
return rrc; /* Abandon */
|
||||||
}
|
}
|
||||||
if (rrc > 0) break; /* Fail this thread */
|
if (rrc > 0) break; /* Fail this thread */
|
||||||
|
@ -2970,7 +2970,7 @@ for (;;)
|
||||||
cb.current_position = (PCRE2_SIZE)(ptr - start_subject);
|
cb.current_position = (PCRE2_SIZE)(ptr - start_subject);
|
||||||
cb.pattern_position = GET(code, 2);
|
cb.pattern_position = GET(code, 2);
|
||||||
cb.next_item_length = GET(code, 2 + LINK_SIZE);
|
cb.next_item_length = GET(code, 2 + LINK_SIZE);
|
||||||
if ((rrc = (mb->callout)(&cb, mb->callout_data)) < 0)
|
if ((rrc = (mb->callout)(&cb, mb->callout_data)) < 0)
|
||||||
return rrc; /* Abandon */
|
return rrc; /* Abandon */
|
||||||
}
|
}
|
||||||
if (rrc == 0)
|
if (rrc == 0)
|
||||||
|
|
|
@ -1877,10 +1877,10 @@ is available. */
|
||||||
#define _pcre2_was_newline PCRE2_SUFFIX(_pcre2_was_newline_)
|
#define _pcre2_was_newline PCRE2_SUFFIX(_pcre2_was_newline_)
|
||||||
#define _pcre2_xclass PCRE2_SUFFIX(_pcre2_xclass_)
|
#define _pcre2_xclass PCRE2_SUFFIX(_pcre2_xclass_)
|
||||||
|
|
||||||
extern void _pcre2_auto_possessify(PCRE2_UCHAR *, BOOL,
|
extern void _pcre2_auto_possessify(PCRE2_UCHAR *, BOOL,
|
||||||
const compile_block *);
|
const compile_block *);
|
||||||
extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
|
extern PCRE2_SPTR _pcre2_find_bracket(PCRE2_SPTR, BOOL, int);
|
||||||
extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR,
|
extern BOOL _pcre2_is_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR,
|
||||||
uint32_t *, BOOL);
|
uint32_t *, BOOL);
|
||||||
extern void _pcre2_jit_free(void *, pcre2_memctl *);
|
extern void _pcre2_jit_free(void *, pcre2_memctl *);
|
||||||
extern size_t _pcre2_jit_get_size(void *);
|
extern size_t _pcre2_jit_get_size(void *);
|
||||||
|
@ -1895,7 +1895,7 @@ extern int _pcre2_strncmp(PCRE2_SPTR, PCRE2_SPTR, size_t);
|
||||||
extern int _pcre2_strncmp_c8(PCRE2_SPTR, const char *, size_t);
|
extern int _pcre2_strncmp_c8(PCRE2_SPTR, const char *, size_t);
|
||||||
extern int _pcre2_study(pcre2_real_code *);
|
extern int _pcre2_study(pcre2_real_code *);
|
||||||
extern int _pcre2_valid_utf(PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE *);
|
extern int _pcre2_valid_utf(PCRE2_SPTR, PCRE2_SIZE, PCRE2_SIZE *);
|
||||||
extern BOOL _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR,
|
extern BOOL _pcre2_was_newline(PCRE2_SPTR, uint32_t, PCRE2_SPTR,
|
||||||
uint32_t *, BOOL);
|
uint32_t *, BOOL);
|
||||||
extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, BOOL);
|
extern BOOL _pcre2_xclass(uint32_t, PCRE2_SPTR, BOOL);
|
||||||
#endif /* PCRE2_CODE_UNIT_WIDTH */
|
#endif /* PCRE2_CODE_UNIT_WIDTH */
|
||||||
|
|
|
@ -552,41 +552,42 @@ code that uses them is simpler because it assumes this. */
|
||||||
memory control. */
|
memory control. */
|
||||||
|
|
||||||
typedef struct pcre2_real_general_context {
|
typedef struct pcre2_real_general_context {
|
||||||
pcre2_memctl memctl;
|
pcre2_memctl memctl;
|
||||||
} pcre2_real_general_context;
|
} pcre2_real_general_context;
|
||||||
|
|
||||||
/* The real compile context structure */
|
/* The real compile context structure */
|
||||||
|
|
||||||
typedef struct pcre2_real_compile_context {
|
typedef struct pcre2_real_compile_context {
|
||||||
pcre2_memctl memctl;
|
pcre2_memctl memctl;
|
||||||
int (*stack_guard)(uint32_t);
|
int (*stack_guard)(uint32_t, void *);
|
||||||
|
void *stack_guard_data;
|
||||||
const uint8_t *tables;
|
const uint8_t *tables;
|
||||||
uint16_t bsr_convention;
|
uint16_t bsr_convention;
|
||||||
uint16_t newline_convention;
|
uint16_t newline_convention;
|
||||||
uint32_t parens_nest_limit;
|
uint32_t parens_nest_limit;
|
||||||
} pcre2_real_compile_context;
|
} pcre2_real_compile_context;
|
||||||
|
|
||||||
/* The real match context structure. */
|
/* The real match context structure. */
|
||||||
|
|
||||||
typedef struct pcre2_real_match_context {
|
typedef struct pcre2_real_match_context {
|
||||||
pcre2_memctl memctl;
|
pcre2_memctl memctl;
|
||||||
#ifdef HEAP_MATCH_RECURSE
|
#ifdef HEAP_MATCH_RECURSE
|
||||||
pcre2_memctl stack_memctl;
|
pcre2_memctl stack_memctl;
|
||||||
#endif
|
#endif
|
||||||
#ifdef SUPPORT_JIT
|
#ifdef SUPPORT_JIT
|
||||||
pcre2_jit_callback jit_callback;
|
pcre2_jit_callback jit_callback;
|
||||||
void *jit_callback_data;
|
void *jit_callback_data;
|
||||||
#endif
|
#endif
|
||||||
int (*callout)(pcre2_callout_block *, void *);
|
int (*callout)(pcre2_callout_block *, void *);
|
||||||
void *callout_data;
|
void *callout_data;
|
||||||
uint32_t match_limit;
|
uint32_t match_limit;
|
||||||
uint32_t recursion_limit;
|
uint32_t recursion_limit;
|
||||||
} pcre2_real_match_context;
|
} pcre2_real_match_context;
|
||||||
|
|
||||||
/* The real compiled code structure */
|
/* The real compiled code structure */
|
||||||
|
|
||||||
typedef struct pcre2_real_code {
|
typedef struct pcre2_real_code {
|
||||||
pcre2_memctl memctl; /* Memory control fields */
|
pcre2_memctl memctl; /* Memory control fields */
|
||||||
const uint8_t *tables; /* The character tables */
|
const uint8_t *tables; /* The character tables */
|
||||||
void *executable_jit; /* Pointer to JIT code */
|
void *executable_jit; /* Pointer to JIT code */
|
||||||
uint8_t start_bitmap[32]; /* Bitmap for starting code unit < 256 */
|
uint8_t start_bitmap[32]; /* Bitmap for starting code unit < 256 */
|
||||||
|
|
|
@ -1319,7 +1319,7 @@ for (;;)
|
||||||
cb.current_position = (PCRE2_SIZE)(eptr - mb->start_subject);
|
cb.current_position = (PCRE2_SIZE)(eptr - mb->start_subject);
|
||||||
cb.pattern_position = GET(ecode, 2);
|
cb.pattern_position = GET(ecode, 2);
|
||||||
cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
|
cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
|
||||||
if ((rrc = mb->callout(&cb, mb->callout_data)) > 0)
|
if ((rrc = mb->callout(&cb, mb->callout_data)) > 0)
|
||||||
RRETURN(MATCH_NOMATCH);
|
RRETURN(MATCH_NOMATCH);
|
||||||
if (rrc < 0) RRETURN(rrc);
|
if (rrc < 0) RRETURN(rrc);
|
||||||
}
|
}
|
||||||
|
@ -1723,7 +1723,7 @@ for (;;)
|
||||||
cb.current_position = (PCRE2_SIZE)(eptr - mb->start_subject);
|
cb.current_position = (PCRE2_SIZE)(eptr - mb->start_subject);
|
||||||
cb.pattern_position = GET(ecode, 2);
|
cb.pattern_position = GET(ecode, 2);
|
||||||
cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
|
cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
|
||||||
if ((rrc = mb->callout(&cb, mb->callout_data)) > 0)
|
if ((rrc = mb->callout(&cb, mb->callout_data)) > 0)
|
||||||
RRETURN(MATCH_NOMATCH);
|
RRETURN(MATCH_NOMATCH);
|
||||||
if (rrc < 0) RRETURN(rrc);
|
if (rrc < 0) RRETURN(rrc);
|
||||||
}
|
}
|
||||||
|
|
|
@ -943,13 +943,13 @@ are supported. */
|
||||||
else \
|
else \
|
||||||
pcre2_set_character_tables_32(G(a,32),b)
|
pcre2_set_character_tables_32(G(a,32),b)
|
||||||
|
|
||||||
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b) \
|
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
|
||||||
if (test_mode == PCRE8_MODE) \
|
if (test_mode == PCRE8_MODE) \
|
||||||
pcre2_set_compile_recursion_guard_8(G(a,8),b); \
|
pcre2_set_compile_recursion_guard_8(G(a,8),b,c); \
|
||||||
else if (test_mode == PCRE16_MODE) \
|
else if (test_mode == PCRE16_MODE) \
|
||||||
pcre2_set_compile_recursion_guard_16(G(a,16),b); \
|
pcre2_set_compile_recursion_guard_16(G(a,16),b,c); \
|
||||||
else \
|
else \
|
||||||
pcre2_set_compile_recursion_guard_32(G(a,32),b)
|
pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
|
||||||
|
|
||||||
#define PCRE2_SET_MATCH_LIMIT(a,b) \
|
#define PCRE2_SET_MATCH_LIMIT(a,b) \
|
||||||
if (test_mode == PCRE8_MODE) \
|
if (test_mode == PCRE8_MODE) \
|
||||||
|
@ -1315,11 +1315,11 @@ the three different cases. */
|
||||||
else \
|
else \
|
||||||
G(pcre2_set_character_tables_,BITTWO)(G(a,BITTWO),b)
|
G(pcre2_set_character_tables_,BITTWO)(G(a,BITTWO),b)
|
||||||
|
|
||||||
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b) \
|
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
|
||||||
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
||||||
G(pcre2_set_compile_recursion_guard_,BITONE)(G(a,BITONE),b); \
|
G(pcre2_set_compile_recursion_guard_,BITONE)(G(a,BITONE),b,c); \
|
||||||
else \
|
else \
|
||||||
G(pcre2_set_compile_recursion_guard_,BITTWO)(G(a,BITTWO),b)
|
G(pcre2_set_compile_recursion_guard_,BITTWO)(G(a,BITTWO),b,c)
|
||||||
|
|
||||||
#define PCRE2_SET_MATCH_LIMIT(a,b) \
|
#define PCRE2_SET_MATCH_LIMIT(a,b) \
|
||||||
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
if (test_mode == G(G(PCRE,BITONE),_MODE)) \
|
||||||
|
@ -1512,8 +1512,8 @@ the three different cases. */
|
||||||
#define PCRE2_SET_CALLOUT(a,b,c) \
|
#define PCRE2_SET_CALLOUT(a,b,c) \
|
||||||
pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c)
|
pcre2_set_callout_8(G(a,8),(int (*)(pcre2_callout_block_8 *, void *))b,c)
|
||||||
#define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_8(G(a,8),b)
|
#define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_8(G(a,8),b)
|
||||||
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b) \
|
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
|
||||||
pcre2_set_compile_recursion_guard_8(G(a,8),b)
|
pcre2_set_compile_recursion_guard_8(G(a,8),b,c)
|
||||||
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
|
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_8(G(a,8),b)
|
||||||
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b)
|
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_8(G(a,8),b)
|
||||||
#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_8(G(a,8),b)
|
#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_8(G(a,8),b)
|
||||||
|
@ -1593,8 +1593,8 @@ the three different cases. */
|
||||||
#define PCRE2_SET_CALLOUT(a,b,c) \
|
#define PCRE2_SET_CALLOUT(a,b,c) \
|
||||||
pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c);
|
pcre2_set_callout_16(G(a,16),(int (*)(pcre2_callout_block_16 *, void *))b,c);
|
||||||
#define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_16(G(a,16),b)
|
#define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_16(G(a,16),b)
|
||||||
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b) \
|
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
|
||||||
pcre2_set_compile_recursion_guard_16(G(a,16),b)
|
pcre2_set_compile_recursion_guard_16(G(a,16),b,c)
|
||||||
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
|
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_16(G(a,16),b)
|
||||||
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b)
|
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_16(G(a,16),b)
|
||||||
#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_16(G(a,16),b)
|
#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_16(G(a,16),b)
|
||||||
|
@ -1674,8 +1674,8 @@ the three different cases. */
|
||||||
#define PCRE2_SET_CALLOUT(a,b,c) \
|
#define PCRE2_SET_CALLOUT(a,b,c) \
|
||||||
pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c);
|
pcre2_set_callout_32(G(a,32),(int (*)(pcre2_callout_block_32 *, void *))b,c);
|
||||||
#define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b)
|
#define PCRE2_SET_CHARACTER_TABLES(a,b) pcre2_set_character_tables_32(G(a,32),b)
|
||||||
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b) \
|
#define PCRE2_SET_COMPILE_RECURSION_GUARD(a,b,c) \
|
||||||
pcre2_set_compile_recursion_guard_32(G(a,32),b)
|
pcre2_set_compile_recursion_guard_32(G(a,32),b,c)
|
||||||
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
|
#define PCRE2_SET_MATCH_LIMIT(a,b) pcre2_set_match_limit_32(G(a,32),b)
|
||||||
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b)
|
#define PCRE2_SET_PARENS_NEST_LIMIT(a,b) pcre2_set_parens_nest_limit_32(G(a,32),b)
|
||||||
#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_32(G(a,32),b)
|
#define PCRE2_SET_RECURSION_LIMIT(a,b) pcre2_set_recursion_limit_32(G(a,32),b)
|
||||||
|
@ -2104,8 +2104,9 @@ Returns: non-zero to kill the compilation
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static int
|
static int
|
||||||
stack_guard(uint32_t depth)
|
stack_guard(uint32_t depth, void *user_data)
|
||||||
{
|
{
|
||||||
|
(void)user_data;
|
||||||
return depth > pat_patctl.stackguard_test;
|
return depth > pat_patctl.stackguard_test;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3827,7 +3828,7 @@ PCRE2_SET_CHARACTER_TABLES(pat_context, use_tables);
|
||||||
|
|
||||||
if (pat_patctl.stackguard_test != 0)
|
if (pat_patctl.stackguard_test != 0)
|
||||||
{
|
{
|
||||||
PCRE2_SET_COMPILE_RECURSION_GUARD(pat_context, stack_guard);
|
PCRE2_SET_COMPILE_RECURSION_GUARD(pat_context, stack_guard, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Handle compiling via the POSIX interface, which doesn't support the
|
/* Handle compiling via the POSIX interface, which doesn't support the
|
||||||
|
@ -5686,13 +5687,13 @@ Returns: nothing
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static void
|
static void
|
||||||
print_newline_config(unsigned int rc, BOOL isc)
|
print_newline_config(uint32_t optval, BOOL isc)
|
||||||
{
|
{
|
||||||
if (!isc) printf(" Newline sequence is ");
|
if (!isc) printf(" Newline sequence is ");
|
||||||
if (rc < sizeof(newlines)/sizeof(char *))
|
if (optval < sizeof(newlines)/sizeof(char *))
|
||||||
printf("%s\n", newlines[rc]);
|
printf("%s\n", newlines[optval]);
|
||||||
else
|
else
|
||||||
printf("a non-standard value: %d\n", rc);
|
printf("a non-standard value: %d\n", optval);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -5769,8 +5770,7 @@ Returns: the return code
|
||||||
static int
|
static int
|
||||||
c_option(const char *arg)
|
c_option(const char *arg)
|
||||||
{
|
{
|
||||||
unsigned long int lrc;
|
uint32_t optval;
|
||||||
int rc;
|
|
||||||
int yield = 0;
|
int yield = 0;
|
||||||
|
|
||||||
if (arg != NULL)
|
if (arg != NULL)
|
||||||
|
@ -5789,8 +5789,8 @@ if (arg != NULL)
|
||||||
switch (coptlist[i].type)
|
switch (coptlist[i].type)
|
||||||
{
|
{
|
||||||
case CONF_BSR:
|
case CONF_BSR:
|
||||||
(void)PCRE2_CONFIG(coptlist[i].value, &rc);
|
(void)PCRE2_CONFIG(coptlist[i].value, &optval);
|
||||||
printf("%s\n", rc? "ANYCRLF" : "ANY");
|
printf("%s\n", optval? "ANYCRLF" : "ANY");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case CONF_FIX:
|
case CONF_FIX:
|
||||||
|
@ -5799,8 +5799,8 @@ if (arg != NULL)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case CONF_FIZ:
|
case CONF_FIZ:
|
||||||
rc = coptlist[i].value;
|
optval = coptlist[i].value;
|
||||||
printf("%d\n", rc);
|
printf("%d\n", optval);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case CONF_INT:
|
case CONF_INT:
|
||||||
|
@ -5809,8 +5809,8 @@ if (arg != NULL)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case CONF_NL:
|
case CONF_NL:
|
||||||
(void)PCRE2_CONFIG(coptlist[i].value, &rc);
|
(void)PCRE2_CONFIG(coptlist[i].value, &optval);
|
||||||
print_newline_config(rc, TRUE);
|
print_newline_config(optval, TRUE);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5822,7 +5822,7 @@ if (arg != NULL)
|
||||||
char ucname[16];
|
char ucname[16];
|
||||||
strcpy(ucname, coptlist[i].name);
|
strcpy(ucname, coptlist[i].name);
|
||||||
for (i = 0; ucname[i] != 0; i++) ucname[i] = toupper[ucname[i];
|
for (i = 0; ucname[i] != 0; i++) ucname[i] = toupper[ucname[i];
|
||||||
vms_setsymbol(ucname, 0, rc);
|
vms_setsymbol(ucname, 0, optval);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -5848,8 +5848,8 @@ printf(" 16-bit support\n");
|
||||||
printf(" 32-bit support\n");
|
printf(" 32-bit support\n");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
(void)PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, &rc);
|
(void)PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, &optval);
|
||||||
if (rc != 0)
|
if (optval != 0)
|
||||||
{
|
{
|
||||||
printf(" UTF and UCP support (");
|
printf(" UTF and UCP support (");
|
||||||
print_unicode_version(stdout);
|
print_unicode_version(stdout);
|
||||||
|
@ -5857,8 +5857,8 @@ if (rc != 0)
|
||||||
}
|
}
|
||||||
else printf(" No Unicode support\n");
|
else printf(" No Unicode support\n");
|
||||||
|
|
||||||
(void)PCRE2_CONFIG(PCRE2_CONFIG_JIT, &rc);
|
(void)PCRE2_CONFIG(PCRE2_CONFIG_JIT, &optval);
|
||||||
if (rc != 0)
|
if (optval != 0)
|
||||||
{
|
{
|
||||||
printf(" Just-in-time compiler support: ");
|
printf(" Just-in-time compiler support: ");
|
||||||
print_jit_target(stdout);
|
print_jit_target(stdout);
|
||||||
|
@ -5869,21 +5869,21 @@ else
|
||||||
printf(" No just-in-time compiler support\n");
|
printf(" No just-in-time compiler support\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
(void)PCRE2_CONFIG(PCRE2_CONFIG_NEWLINE, &rc);
|
(void)PCRE2_CONFIG(PCRE2_CONFIG_NEWLINE, &optval);
|
||||||
print_newline_config(rc, FALSE);
|
print_newline_config(optval, FALSE);
|
||||||
(void)PCRE2_CONFIG(PCRE2_CONFIG_BSR, &rc);
|
(void)PCRE2_CONFIG(PCRE2_CONFIG_BSR, &optval);
|
||||||
printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
|
printf(" \\R matches %s\n", optval? "CR, LF, or CRLF only" :
|
||||||
"all Unicode newlines");
|
"all Unicode newlines");
|
||||||
(void)PCRE2_CONFIG(PCRE2_CONFIG_LINKSIZE, &rc);
|
(void)PCRE2_CONFIG(PCRE2_CONFIG_LINKSIZE, &optval);
|
||||||
printf(" Internal link size = %d\n", rc);
|
printf(" Internal link size = %d\n", optval);
|
||||||
(void)PCRE2_CONFIG(PCRE2_CONFIG_PARENSLIMIT, &lrc);
|
(void)PCRE2_CONFIG(PCRE2_CONFIG_PARENSLIMIT, &optval);
|
||||||
printf(" Parentheses nest limit = %ld\n", lrc);
|
printf(" Parentheses nest limit = %d\n", optval);
|
||||||
(void)PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, &lrc);
|
(void)PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, &optval);
|
||||||
printf(" Default match limit = %ld\n", lrc);
|
printf(" Default match limit = %d\n", optval);
|
||||||
(void)PCRE2_CONFIG(PCRE2_CONFIG_RECURSIONLIMIT, &lrc);
|
(void)PCRE2_CONFIG(PCRE2_CONFIG_RECURSIONLIMIT, &optval);
|
||||||
printf(" Default recursion depth limit = %ld\n", lrc);
|
printf(" Default recursion depth limit = %d\n", optval);
|
||||||
(void)PCRE2_CONFIG(PCRE2_CONFIG_STACKRECURSE, &rc);
|
(void)PCRE2_CONFIG(PCRE2_CONFIG_STACKRECURSE, &optval);
|
||||||
printf(" Match recursion uses %s", rc? "stack" : "heap");
|
printf(" Match recursion uses %s", optval? "stack" : "heap");
|
||||||
|
|
||||||
printf("\n");
|
printf("\n");
|
||||||
return 0;
|
return 0;
|
||||||
|
|
Loading…
Reference in New Issue