Add additional compile options and PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES.
This commit is contained in:
parent
d9c33d0708
commit
dfc9712bcd
7
132html
7
132html
|
@ -109,8 +109,9 @@ while (<STDIN>)
|
||||||
# Handling .sp is subtle. If it is inside a literal section, do nothing if
|
# Handling .sp is subtle. If it is inside a literal section, do nothing if
|
||||||
# the next line is a non literal text line; similarly, if not inside a
|
# the next line is a non literal text line; similarly, if not inside a
|
||||||
# literal section, do nothing if a literal follows, unless we are inside
|
# literal section, do nothing if a literal follows, unless we are inside
|
||||||
# a .nf/.ne section. The point being that the <pre> and </pre> that delimit
|
# a .nf/.fi section or about to enter one. The point being that the <pre>
|
||||||
# literal sections will do the spacing. Always skip if no previous output.
|
# and </pre> that delimit literal sections will do the spacing. Always skip
|
||||||
|
# if no previous output.
|
||||||
|
|
||||||
elsif (/^\.sp/)
|
elsif (/^\.sp/)
|
||||||
{
|
{
|
||||||
|
@ -123,7 +124,7 @@ while (<STDIN>)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
print TEMP "<br>\n<br>\n" if ($innf || !/^[\s.]/);
|
print TEMP "<br>\n<br>\n" if ($innf || /^\.nf/ || !/^[\s.]/);
|
||||||
}
|
}
|
||||||
redo; # Now process the lookahead line we just read
|
redo; # Now process the lookahead line we just read
|
||||||
}
|
}
|
||||||
|
|
|
@ -166,6 +166,9 @@ pcre2test, a crash could occur.
|
||||||
32. Make -bigstack in RunTest allocate a 64Mb stack (instead of 16 MB) so that
|
32. Make -bigstack in RunTest allocate a 64Mb stack (instead of 16 MB) so that
|
||||||
all the tests can run with clang's sanitizing options.
|
all the tests can run with clang's sanitizing options.
|
||||||
|
|
||||||
|
33. Implement extra compile options in the compile context and add the first
|
||||||
|
one: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Version 10.23 14-February-2017
|
Version 10.23 14-February-2017
|
||||||
|
|
|
@ -67,6 +67,7 @@ dist_html_DATA = \
|
||||||
doc/html/pcre2_set_bsr.html \
|
doc/html/pcre2_set_bsr.html \
|
||||||
doc/html/pcre2_set_callout.html \
|
doc/html/pcre2_set_callout.html \
|
||||||
doc/html/pcre2_set_character_tables.html \
|
doc/html/pcre2_set_character_tables.html \
|
||||||
|
doc/html/pcre2_set_compile_extra_options.html \
|
||||||
doc/html/pcre2_set_compile_recursion_guard.html \
|
doc/html/pcre2_set_compile_recursion_guard.html \
|
||||||
doc/html/pcre2_set_depth_limit.html \
|
doc/html/pcre2_set_depth_limit.html \
|
||||||
doc/html/pcre2_set_heap_limit.html \
|
doc/html/pcre2_set_heap_limit.html \
|
||||||
|
@ -151,6 +152,7 @@ dist_man_MANS = \
|
||||||
doc/pcre2_set_bsr.3 \
|
doc/pcre2_set_bsr.3 \
|
||||||
doc/pcre2_set_callout.3 \
|
doc/pcre2_set_callout.3 \
|
||||||
doc/pcre2_set_character_tables.3 \
|
doc/pcre2_set_character_tables.3 \
|
||||||
|
doc/pcre2_set_compile_extra_options.3 \
|
||||||
doc/pcre2_set_compile_recursion_guard.3 \
|
doc/pcre2_set_compile_recursion_guard.3 \
|
||||||
doc/pcre2_set_depth_limit.3 \
|
doc/pcre2_set_depth_limit.3 \
|
||||||
doc/pcre2_set_heap_limit.3 \
|
doc/pcre2_set_heap_limit.3 \
|
||||||
|
|
2
RunTest
2
RunTest
|
@ -500,7 +500,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
||||||
for opt in "" $jitopt; do
|
for opt in "" $jitopt; do
|
||||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput2 testtry
|
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput2 testtry
|
||||||
if [ $? = 0 ] ; then
|
if [ $? = 0 ] ; then
|
||||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -65,-62,-2,-1,0,100,188,189,190,191 >>testtry
|
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -65,-62,-2,-1,0,100,101,191,192 >>testtry
|
||||||
checkresult $? 2 "$opt"
|
checkresult $? 2 "$opt"
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
|
@ -207,6 +207,9 @@ in the library.
|
||||||
<tr><td><a href="pcre2_set_character_tables.html">pcre2_set_character_tables</a></td>
|
<tr><td><a href="pcre2_set_character_tables.html">pcre2_set_character_tables</a></td>
|
||||||
<td> Set character tables</td></tr>
|
<td> Set character tables</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre2_set_compile_extra_options.html">pcre2_set_compile_extra_options</a></td>
|
||||||
|
<td> Set compile time extra options</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre2_set_compile_recursion_guard.html">pcre2_set_compile_recursion_guard</a></td>
|
<tr><td><a href="pcre2_set_compile_recursion_guard.html">pcre2_set_compile_recursion_guard</a></td>
|
||||||
<td> Set up a compile recursion guard function</td></tr>
|
<td> Set up a compile recursion guard function</td></tr>
|
||||||
|
|
||||||
|
|
|
@ -47,6 +47,7 @@ system stack size checking, or to change one or more of these parameters:
|
||||||
The newline character sequence;
|
The newline character sequence;
|
||||||
The compile time nested parentheses limit;
|
The compile time nested parentheses limit;
|
||||||
The maximum pattern length (in code units) that is allowed.
|
The maximum pattern length (in code units) that is allowed.
|
||||||
|
The additional options bits
|
||||||
</pre>
|
</pre>
|
||||||
The option bits are:
|
The option bits are:
|
||||||
<pre>
|
<pre>
|
||||||
|
|
|
@ -0,0 +1,42 @@
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>pcre2_set_compile_extra_options specification</title>
|
||||||
|
</head>
|
||||||
|
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
|
||||||
|
<h1>pcre2_set_compile_extra_options man page</h1>
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||||
|
</p>
|
||||||
|
<p>
|
||||||
|
This page is part of the PCRE2 HTML documentation. It was generated
|
||||||
|
automatically from the original man page. If there is any nonsense in it,
|
||||||
|
please consult the man page, in case the conversion went wrong.
|
||||||
|
<br>
|
||||||
|
<br><b>
|
||||||
|
SYNOPSIS
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
<b>#include <pcre2.h></b>
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
<b>int pcre2_set_compile_extra_options(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||||
|
<b> PCRE2_SIZE <i>extra_options</i>);</b>
|
||||||
|
</P>
|
||||||
|
<br><b>
|
||||||
|
DESCRIPTION
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
This function sets additional option bits for <b>pcre2_compile()</b> that are
|
||||||
|
housed in a compile context. It completely replaces all the bits. The extra
|
||||||
|
options are:
|
||||||
|
<pre>
|
||||||
|
PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES Allow \x{df800} to \x{dfff} in UTF-8 and UTF-32 modes
|
||||||
|
</pre>
|
||||||
|
There is a complete description of the PCRE2 native API in the
|
||||||
|
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||||
|
page and a description of the POSIX API in the
|
||||||
|
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||||
|
page.
|
||||||
|
<p>
|
||||||
|
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||||
|
</p>
|
|
@ -60,8 +60,8 @@ please consult the man page, in case the conversion went wrong.
|
||||||
<b>#include <pcre2.h></b>
|
<b>#include <pcre2.h></b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
PCRE2 is a new API for PCRE. This document contains a description of all its
|
PCRE2 is a new API for PCRE, starting at release 10.0. This document contains a
|
||||||
functions. See the
|
description of all its native functions. See the
|
||||||
<a href="pcre2.html"><b>pcre2</b></a>
|
<a href="pcre2.html"><b>pcre2</b></a>
|
||||||
document for an overview of all the PCRE2 documentation.
|
document for an overview of all the PCRE2 documentation.
|
||||||
</P>
|
</P>
|
||||||
|
@ -145,6 +145,10 @@ document for an overview of all the PCRE2 documentation.
|
||||||
<b> const unsigned char *<i>tables</i>);</b>
|
<b> const unsigned char *<i>tables</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
|
<b>int pcre2_set_compile_extra_options(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||||
|
<b> uint32_t <i>extra_options</i>);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
<b>int pcre2_set_max_pattern_length(pcre2_compile_context *<i>ccontext</i>,</b>
|
<b>int pcre2_set_max_pattern_length(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||||
<b> PCRE2_SIZE <i>value</i>);</b>
|
<b> PCRE2_SIZE <i>value</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
|
@ -328,7 +332,7 @@ document for an overview of all the PCRE2 documentation.
|
||||||
These functions became obsolete at release 10.30 and are retained only for
|
These functions became obsolete at release 10.30 and are retained only for
|
||||||
backward compatibility. They should not be used in new code. The first is
|
backward compatibility. They should not be used in new code. The first is
|
||||||
replaced by <b>pcre2_set_depth_limit()</b>; the second is no longer needed and
|
replaced by <b>pcre2_set_depth_limit()</b>; the second is no longer needed and
|
||||||
no longer has any effect (it always returns zero).
|
has no effect (it always returns zero).
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC12" href="#TOC1">PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES</a><br>
|
<br><a name="SEC12" href="#TOC1">PCRE2 8-BIT, 16-BIT, AND 32-BIT LIBRARIES</a><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -389,23 +393,23 @@ For example, if you want to run a match using a pattern that was compiled with
|
||||||
<P>
|
<P>
|
||||||
In the function summaries above, and in the rest of this document and other
|
In the function summaries above, and in the rest of this document and other
|
||||||
PCRE2 documents, functions and data types are described using their generic
|
PCRE2 documents, functions and data types are described using their generic
|
||||||
names, without the 8, 16, or 32 suffix.
|
names, without the _8, _16, or _32 suffix.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC13" href="#TOC1">PCRE2 API OVERVIEW</a><br>
|
<br><a name="SEC13" href="#TOC1">PCRE2 API OVERVIEW</a><br>
|
||||||
<P>
|
<P>
|
||||||
PCRE2 has its own native API, which is described in this document. There are
|
PCRE2 has its own native API, which is described in this document. There are
|
||||||
also some wrapper functions for the 8-bit library that correspond to the
|
also some wrapper functions for the 8-bit library that correspond to the
|
||||||
POSIX regular expression API, but they do not give access to all the
|
POSIX regular expression API, but they do not give access to all the
|
||||||
functionality. They are described in the
|
functionality of PCRE2. They are described in the
|
||||||
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
<a href="pcre2posix.html"><b>pcre2posix</b></a>
|
||||||
documentation. Both these APIs define a set of C function calls.
|
documentation. Both these APIs define a set of C function calls.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The native API C data types, function prototypes, option values, and error
|
The native API C data types, function prototypes, option values, and error
|
||||||
codes are defined in the header file <b>pcre2.h</b>, which contains definitions
|
codes are defined in the header file <b>pcre2.h</b>, which also contains
|
||||||
of PCRE2_MAJOR and PCRE2_MINOR, the major and minor release numbers for the
|
definitions of PCRE2_MAJOR and PCRE2_MINOR, the major and minor release numbers
|
||||||
library. Applications can use these to include support for different releases
|
for the library. Applications can use these to include support for different
|
||||||
of PCRE2.
|
releases of PCRE2.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
In a Windows environment, if you want to statically link an application program
|
In a Windows environment, if you want to statically link an application program
|
||||||
|
@ -478,7 +482,7 @@ been matched by <b>pcre2_match()</b>. They are:
|
||||||
<b>pcre2_substring_number_from_name()</b>
|
<b>pcre2_substring_number_from_name()</b>
|
||||||
</pre>
|
</pre>
|
||||||
<b>pcre2_substring_free()</b> and <b>pcre2_substring_list_free()</b> are also
|
<b>pcre2_substring_free()</b> and <b>pcre2_substring_list_free()</b> are also
|
||||||
provided, to free the memory used for extracted strings.
|
provided, to free memory used for extracted strings.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The function <b>pcre2_substitute()</b> can be called to match a pattern and
|
The function <b>pcre2_substitute()</b> can be called to match a pattern and
|
||||||
|
@ -595,7 +599,7 @@ required. JIT compilation updates a pointer within the compiled code block, so
|
||||||
a thread must gain unique write access to the pointer before calling
|
a thread must gain unique write access to the pointer before calling
|
||||||
<b>pcre2_jit_compile()</b>. Alternatively, <b>pcre2_code_copy()</b> or
|
<b>pcre2_jit_compile()</b>. Alternatively, <b>pcre2_code_copy()</b> or
|
||||||
<b>pcre2_code_copy_with_tables()</b> can be used to obtain a private copy of the
|
<b>pcre2_code_copy_with_tables()</b> can be used to obtain a private copy of the
|
||||||
compiled code.
|
compiled code before calling the JIT compiler.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
Context blocks
|
Context blocks
|
||||||
|
@ -649,6 +653,8 @@ library. The context is named `general' rather than specifically `memory'
|
||||||
because in future other fields may be added. If you do not want to supply your
|
because in future other fields may be added. If you do not want to supply your
|
||||||
own custom memory management functions, you do not need to bother with a
|
own custom memory management functions, you do not need to bother with a
|
||||||
general context. A general context is created by:
|
general context. A general context is created by:
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
<b>pcre2_general_context *pcre2_general_context_create(</b>
|
<b>pcre2_general_context *pcre2_general_context_create(</b>
|
||||||
<b> void *(*<i>private_malloc</i>)(PCRE2_SIZE, void *),</b>
|
<b> void *(*<i>private_malloc</i>)(PCRE2_SIZE, void *),</b>
|
||||||
<b> void (*<i>private_free</i>)(void *, void *), void *<i>memory_data</i>);</b>
|
<b> void (*<i>private_free</i>)(void *, void *), void *<i>memory_data</i>);</b>
|
||||||
|
@ -675,11 +681,15 @@ used. When the time comes to free the block, this function is called.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
A general context can be copied by calling:
|
A general context can be copied by calling:
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
<b>pcre2_general_context *pcre2_general_context_copy(</b>
|
<b>pcre2_general_context *pcre2_general_context_copy(</b>
|
||||||
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
<br>
|
<br>
|
||||||
The memory used for a general context should be freed by calling:
|
The memory used for a general context should be freed by calling:
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
<b>void pcre2_general_context_free(pcre2_general_context *<i>gcontext</i>);</b>
|
<b>void pcre2_general_context_free(pcre2_general_context *<i>gcontext</i>);</b>
|
||||||
<a name="compilecontext"></a></P>
|
<a name="compilecontext"></a></P>
|
||||||
<br><b>
|
<br><b>
|
||||||
|
@ -695,6 +705,7 @@ following compile-time parameters:
|
||||||
The newline character sequence
|
The newline character sequence
|
||||||
The compile time nested parentheses limit
|
The compile time nested parentheses limit
|
||||||
The maximum length of the pattern string
|
The maximum length of the pattern string
|
||||||
|
The extra options bits (none set by default)
|
||||||
</pre>
|
</pre>
|
||||||
A compile context is also required if you are using custom memory management.
|
A compile context is also required if you are using custom memory management.
|
||||||
If none of these apply, just pass NULL as the context argument of
|
If none of these apply, just pass NULL as the context argument of
|
||||||
|
@ -702,6 +713,8 @@ If none of these apply, just pass NULL as the context argument of
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
A compile context is created, copied, and freed by the following functions:
|
A compile context is created, copied, and freed by the following functions:
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
<b>pcre2_compile_context *pcre2_compile_context_create(</b>
|
<b>pcre2_compile_context *pcre2_compile_context_create(</b>
|
||||||
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
|
@ -716,6 +729,8 @@ A compile context is created, copied, and freed by the following functions:
|
||||||
A compile context is created with default values for its parameters. These can
|
A compile context is created with default values for its parameters. These can
|
||||||
be changed by calling the following functions, which return 0 on success, or
|
be changed by calling the following functions, which return 0 on success, or
|
||||||
PCRE2_ERROR_BADDATA if invalid data is detected.
|
PCRE2_ERROR_BADDATA if invalid data is detected.
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
<b>int pcre2_set_bsr(pcre2_compile_context *<i>ccontext</i>,</b>
|
<b>int pcre2_set_bsr(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||||
<b> uint32_t <i>value</i>);</b>
|
<b> uint32_t <i>value</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
|
@ -725,6 +740,8 @@ or CRLF, or PCRE2_BSR_UNICODE, to specify that \R matches any Unicode line
|
||||||
ending sequence. The value is used by the JIT compiler and by the two
|
ending sequence. The value is used by the JIT compiler and by the two
|
||||||
interpreted matching functions, <i>pcre2_match()</i> and
|
interpreted matching functions, <i>pcre2_match()</i> and
|
||||||
<i>pcre2_dfa_match()</i>.
|
<i>pcre2_dfa_match()</i>.
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
<b>int pcre2_set_character_tables(pcre2_compile_context *<i>ccontext</i>,</b>
|
<b>int pcre2_set_character_tables(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||||
<b> const unsigned char *<i>tables</i>);</b>
|
<b> const unsigned char *<i>tables</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
|
@ -732,6 +749,22 @@ interpreted matching functions, <i>pcre2_match()</i> and
|
||||||
The value must be the result of a call to <i>pcre2_maketables()</i>, whose only
|
The value must be the result of a call to <i>pcre2_maketables()</i>, whose only
|
||||||
argument is a general context. This function builds a set of character tables
|
argument is a general context. This function builds a set of character tables
|
||||||
in the current locale.
|
in the current locale.
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
<b>int pcre2_set_compile_extra_options(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||||
|
<b> uint32_t <i>extra_options</i>);</b>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
|
As PCRE2 has developed, almost all the 32 option bits that are available in
|
||||||
|
the <i>options</i> argument of <b>pcre2_compile()</b> have been used up. To avoid
|
||||||
|
running out, the compile context contains a set of extra option bits which are
|
||||||
|
used for some newer, assumed rarer, options. This function sets those bits. It
|
||||||
|
always sets all the bits (either on or off). It does not modify any existing
|
||||||
|
setting. The available options are defined in the section entitled "Extra
|
||||||
|
compile options"
|
||||||
|
<a href="#extracompileoptions">below.</a>
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
<b>int pcre2_set_max_pattern_length(pcre2_compile_context *<i>ccontext</i>,</b>
|
<b>int pcre2_set_max_pattern_length(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||||
<b> PCRE2_SIZE <i>value</i>);</b>
|
<b> PCRE2_SIZE <i>value</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
|
@ -741,6 +774,8 @@ compiled with this context. If the pattern is longer, an error is generated.
|
||||||
This facility is provided so that applications that accept patterns from
|
This facility is provided so that applications that accept patterns from
|
||||||
external sources can limit their size. The default is the largest number that a
|
external sources can limit their size. The default is the largest number that a
|
||||||
PCRE2_SIZE variable can hold, which is effectively unlimited.
|
PCRE2_SIZE variable can hold, which is effectively unlimited.
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
<b>int pcre2_set_newline(pcre2_compile_context *<i>ccontext</i>,</b>
|
<b>int pcre2_set_newline(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||||
<b> uint32_t <i>value</i>);</b>
|
<b> uint32_t <i>value</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
|
@ -758,11 +793,13 @@ sequence such as (*CRLF). See the
|
||||||
page for details.
|
page for details.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
When a pattern is compiled with the PCRE2_EXTENDED option, the newline
|
When a pattern is compiled with the PCRE2_EXTENDED or PCRE2_EXTENDED_MORE
|
||||||
convention affects the recognition of white space and the end of internal
|
option, the newline convention affects the recognition of white space and the
|
||||||
comments starting with #. The value is saved with the compiled pattern for
|
end of internal comments starting with #. The value is saved with the compiled
|
||||||
subsequent use by the JIT compiler and by the two interpreted matching
|
pattern for subsequent use by the JIT compiler and by the two interpreted
|
||||||
functions, <i>pcre2_match()</i> and <i>pcre2_dfa_match()</i>.
|
matching functions, <i>pcre2_match()</i> and <i>pcre2_dfa_match()</i>.
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
<b>int pcre2_set_parens_nest_limit(pcre2_compile_context *<i>ccontext</i>,</b>
|
<b>int pcre2_set_parens_nest_limit(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||||
<b> uint32_t <i>value</i>);</b>
|
<b> uint32_t <i>value</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
|
@ -771,6 +808,8 @@ This parameter ajusts the limit, set when PCRE2 is built (default 250), on the
|
||||||
depth of parenthesis nesting in a pattern. This limit stops rogue patterns
|
depth of parenthesis nesting in a pattern. This limit stops rogue patterns
|
||||||
using up too much system stack when being compiled. The limit applies to
|
using up too much system stack when being compiled. The limit applies to
|
||||||
parentheses of all kinds, not just capturing parentheses.
|
parentheses of all kinds, not just capturing parentheses.
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
<b>int pcre2_set_compile_recursion_guard(pcre2_compile_context *<i>ccontext</i>,</b>
|
<b>int pcre2_set_compile_recursion_guard(pcre2_compile_context *<i>ccontext</i>,</b>
|
||||||
<b> int (*<i>guard_function</i>)(uint32_t, void *), void *<i>user_data</i>);</b>
|
<b> int (*<i>guard_function</i>)(uint32_t, void *), void *<i>user_data</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
|
@ -778,10 +817,10 @@ parentheses of all kinds, not just capturing parentheses.
|
||||||
There is at least one application that runs PCRE2 in threads with very limited
|
There is at least one application that runs PCRE2 in threads with very limited
|
||||||
system stack, where running out of stack is to be avoided at all costs. The
|
system stack, where running out of stack is to be avoided at all costs. The
|
||||||
parenthesis limit above cannot take account of how much stack is actually
|
parenthesis limit above cannot take account of how much stack is actually
|
||||||
available. For a finer control, you can supply a function that is called
|
available during compilation. For a finer control, you can supply a function
|
||||||
whenever <b>pcre2_compile()</b> starts to compile a parenthesized part of a
|
that is called whenever <b>pcre2_compile()</b> starts to compile a parenthesized
|
||||||
pattern. This function can check the actual stack size (or anything else that
|
part of a pattern. This function can check the actual stack size (or anything
|
||||||
it wants to, of course).
|
else that it wants to, of course).
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The first argument to the callout function gives the current depth of
|
The first argument to the callout function gives the current depth of
|
||||||
|
@ -807,6 +846,8 @@ If none of these apply, just pass NULL as the context argument of
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
A match context is created, copied, and freed by the following functions:
|
A match context is created, copied, and freed by the following functions:
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
<b>pcre2_match_context *pcre2_match_context_create(</b>
|
<b>pcre2_match_context *pcre2_match_context_create(</b>
|
||||||
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
|
@ -821,6 +862,8 @@ A match context is created, copied, and freed by the following functions:
|
||||||
A match context is created with default values for its parameters. These can
|
A match context is created with default values for its parameters. These can
|
||||||
be changed by calling the following functions, which return 0 on success, or
|
be changed by calling the following functions, which return 0 on success, or
|
||||||
PCRE2_ERROR_BADDATA if invalid data is detected.
|
PCRE2_ERROR_BADDATA if invalid data is detected.
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
<b>int pcre2_set_callout(pcre2_match_context *<i>mcontext</i>,</b>
|
<b>int pcre2_set_callout(pcre2_match_context *<i>mcontext</i>,</b>
|
||||||
<b> int (*<i>callout_function</i>)(pcre2_callout_block *, void *),</b>
|
<b> int (*<i>callout_function</i>)(pcre2_callout_block *, void *),</b>
|
||||||
<b> void *<i>callout_data</i>);</b>
|
<b> void *<i>callout_data</i>);</b>
|
||||||
|
@ -830,6 +873,8 @@ This sets up a "callout" function for PCRE2 to call at specified points
|
||||||
during a matching operation. Details are given in the
|
during a matching operation. Details are given in the
|
||||||
<a href="pcre2callout.html"><b>pcre2callout</b></a>
|
<a href="pcre2callout.html"><b>pcre2callout</b></a>
|
||||||
documentation.
|
documentation.
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
<b>int pcre2_set_offset_limit(pcre2_match_context *<i>mcontext</i>,</b>
|
<b>int pcre2_set_offset_limit(pcre2_match_context *<i>mcontext</i>,</b>
|
||||||
<b> PCRE2_SIZE <i>value</i>);</b>
|
<b> PCRE2_SIZE <i>value</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
|
@ -856,6 +901,8 @@ subject strings. See also the PCRE2_FIRSTLINE option, which requires a match to
|
||||||
start within the first line of the subject. If this is set with an offset
|
start within the first line of the subject. If this is set with an offset
|
||||||
limit, a match must occur in the first line and also within the offset limit.
|
limit, a match must occur in the first line and also within the offset limit.
|
||||||
In other words, whichever limit comes first is used.
|
In other words, whichever limit comes first is used.
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
<b>int pcre2_set_heap_limit(pcre2_match_context *<i>mcontext</i>,</b>
|
<b>int pcre2_set_heap_limit(pcre2_match_context *<i>mcontext</i>,</b>
|
||||||
<b> uint32_t <i>value</i>);</b>
|
<b> uint32_t <i>value</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
|
@ -889,6 +936,8 @@ Heap memory is used only if the initial vector is too small. If the heap limit
|
||||||
is set to a value less than 21 (in particular, zero) no heap memory will be
|
is set to a value less than 21 (in particular, zero) no heap memory will be
|
||||||
used. In this case, only patterns that do not have a lot of nested backtracking
|
used. In this case, only patterns that do not have a lot of nested backtracking
|
||||||
can be successfully processed.
|
can be successfully processed.
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
<b>int pcre2_set_match_limit(pcre2_match_context *<i>mcontext</i>,</b>
|
<b>int pcre2_set_match_limit(pcre2_match_context *<i>mcontext</i>,</b>
|
||||||
<b> uint32_t <i>value</i>);</b>
|
<b> uint32_t <i>value</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
|
@ -926,6 +975,8 @@ of the form
|
||||||
where ddd is a decimal number. However, such a setting is ignored unless ddd is
|
where ddd is a decimal number. However, such a setting is ignored unless ddd is
|
||||||
less than the limit set by the caller of <b>pcre2_match()</b> or, if no such
|
less than the limit set by the caller of <b>pcre2_match()</b> or, if no such
|
||||||
limit is set, less than the default.
|
limit is set, less than the default.
|
||||||
|
<br>
|
||||||
|
<br>
|
||||||
<b>int pcre2_set_depth_limit(pcre2_match_context *<i>mcontext</i>,</b>
|
<b>int pcre2_set_depth_limit(pcre2_match_context *<i>mcontext</i>,</b>
|
||||||
<b> uint32_t <i>value</i>);</b>
|
<b> uint32_t <i>value</i>);</b>
|
||||||
<br>
|
<br>
|
||||||
|
@ -1281,9 +1332,10 @@ parenthesis. The name is not processed in any way, and it is not possible to
|
||||||
include a closing parenthesis in the name. However, if the PCRE2_ALT_VERBNAMES
|
include a closing parenthesis in the name. However, if the PCRE2_ALT_VERBNAMES
|
||||||
option is set, normal backslash processing is applied to verb names and only an
|
option is set, normal backslash processing is applied to verb names and only an
|
||||||
unescaped closing parenthesis terminates the name. A closing parenthesis can be
|
unescaped closing parenthesis terminates the name. A closing parenthesis can be
|
||||||
included in a name either as \) or between \Q and \E. If the PCRE2_EXTENDED
|
included in a name either as \) or between \Q and \E. If the PCRE2_EXTENDED
|
||||||
option is set, unescaped whitespace in verb names is skipped and #-comments are
|
or PCRE2_EXTENDED_MORE option is set, unescaped whitespace in verb names is
|
||||||
recognized in this mode, exactly as in the rest of the pattern.
|
skipped and #-comments are recognized in this mode, exactly as in the rest of
|
||||||
|
the pattern.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_AUTO_CALLOUT
|
PCRE2_AUTO_CALLOUT
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -1298,7 +1350,13 @@ documentation.
|
||||||
</pre>
|
</pre>
|
||||||
If this bit is set, letters in the pattern match both upper and lower case
|
If this bit is set, letters in the pattern match both upper and lower case
|
||||||
letters in the subject. It is equivalent to Perl's /i option, and it can be
|
letters in the subject. It is equivalent to Perl's /i option, and it can be
|
||||||
changed within a pattern by a (?i) option setting.
|
changed within a pattern by a (?i) option setting. If PCRE2_UTF is set, Unicode
|
||||||
|
properties are used for all characters with more than one other case, and for
|
||||||
|
all characters whose code points are greater than U+007f. For lower valued
|
||||||
|
characters with only one other case, a lookup table is used for speed. When
|
||||||
|
PCRE2_UTF is not set, a lookup table is used for all code points less than 256,
|
||||||
|
and higher code points (available only in 16-bit or 32-bit mode) are treated as
|
||||||
|
not having another case.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_DOLLAR_ENDONLY
|
PCRE2_DOLLAR_ENDONLY
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -1380,18 +1438,18 @@ built.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_EXTENDED_MORE
|
PCRE2_EXTENDED_MORE
|
||||||
</pre>
|
</pre>
|
||||||
This option has the effect of PCRE2_EXTENDED, but, in addition, space and
|
This option has the effect of PCRE2_EXTENDED, but, in addition, unescaped space
|
||||||
horizontal tab characters are also ignored inside a character class.
|
and horizontal tab characters are ignored inside a character class.
|
||||||
PCRE2_EXTENDED_MORE is equivalent to Perl's 5.26 /xx option, and it can be
|
PCRE2_EXTENDED_MORE is equivalent to Perl's 5.26 /xx option, and it can be
|
||||||
changed within a pattern by a (?xx) option setting.
|
changed within a pattern by a (?xx) option setting.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_FIRSTLINE
|
PCRE2_FIRSTLINE
|
||||||
</pre>
|
</pre>
|
||||||
If this option is set, an unanchored pattern is required to match before or at
|
If this option is set, the start of an unanchored pattern match must be before
|
||||||
the first newline in the subject string, though the matched text may continue
|
or at the first newline in the subject string, though the matched text may
|
||||||
over the newline. See also PCRE2_USE_OFFSET_LIMIT, which provides a more
|
continue over the newline. See also PCRE2_USE_OFFSET_LIMIT, which provides a
|
||||||
general limiting facility. If PCRE2_FIRSTLINE is set with an offset limit, a
|
more general limiting facility. If PCRE2_FIRSTLINE is set with an offset limit,
|
||||||
match must occur in the first line and also within the offset limit. In other
|
a match must occur in the first line and also within the offset limit. In other
|
||||||
words, whichever limit comes first is used.
|
words, whichever limit comes first is used.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_MATCH_UNSET_BACKREF
|
PCRE2_MATCH_UNSET_BACKREF
|
||||||
|
@ -1457,8 +1515,8 @@ PCRE2_NEVER_UTF causes an error.
|
||||||
If this option is set, it disables the use of numbered capturing parentheses in
|
If this option is set, it disables the use of numbered capturing parentheses in
|
||||||
the pattern. Any opening parenthesis that is not followed by ? behaves as if it
|
the pattern. Any opening parenthesis that is not followed by ? behaves as if it
|
||||||
were followed by ?: but named parentheses can still be used for capturing (and
|
were followed by ?: but named parentheses can still be used for capturing (and
|
||||||
they acquire numbers in the usual way). There is no equivalent of this option
|
they acquire numbers in the usual way). This is the same as Perl's /n option.
|
||||||
in Perl. Note that, if this option is set, references to capturing groups (back
|
Note that, when this option is set, references to capturing groups (back
|
||||||
references or recursion/subroutine calls) may only refer to named groups,
|
references or recursion/subroutine calls) may only refer to named groups,
|
||||||
though the reference can be by name or by number.
|
though the reference can be by name or by number.
|
||||||
<pre>
|
<pre>
|
||||||
|
@ -1494,8 +1552,8 @@ compiler.
|
||||||
<P>
|
<P>
|
||||||
There are a number of optimizations that may occur at the start of a match, in
|
There are a number of optimizations that may occur at the start of a match, in
|
||||||
order to speed up the process. For example, if it is known that an unanchored
|
order to speed up the process. For example, if it is known that an unanchored
|
||||||
match must start with a specific character, the matching code searches the
|
match must start with a specific code unit value, the matching code searches
|
||||||
subject for that character, and fails immediately if it cannot find it, without
|
the subject for that value, and fails immediately if it cannot find it, without
|
||||||
actually running the main matching function. This means that a special item
|
actually running the main matching function. This means that a special item
|
||||||
such as (*COMMIT) at the start of a pattern is not considered until after a
|
such as (*COMMIT) at the start of a pattern is not considered until after a
|
||||||
suitable starting point for the match has been found. Also, when callouts or
|
suitable starting point for the match has been found. Also, when callouts or
|
||||||
|
@ -1524,9 +1582,11 @@ current starting position, which in this case, it does. However, if the same
|
||||||
match is run with PCRE2_NO_START_OPTIMIZE set, the initial scan along the
|
match is run with PCRE2_NO_START_OPTIMIZE set, the initial scan along the
|
||||||
subject string does not happen. The first match attempt is run starting from
|
subject string does not happen. The first match attempt is run starting from
|
||||||
"D" and when this fails, (*COMMIT) prevents any further matches being tried, so
|
"D" and when this fails, (*COMMIT) prevents any further matches being tried, so
|
||||||
the overall result is "no match". There are also other start-up optimizations.
|
the overall result is "no match".
|
||||||
For example, a minimum length for the subject may be recorded. Consider the
|
</P>
|
||||||
pattern
|
<P>
|
||||||
|
There are also other start-up optimizations. For example, a minimum length for
|
||||||
|
the subject may be recorded. Consider the pattern
|
||||||
<pre>
|
<pre>
|
||||||
(*MARK:A)(X|Y)
|
(*MARK:A)(X|Y)
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -1548,15 +1608,29 @@ and
|
||||||
in the
|
in the
|
||||||
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
|
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
|
||||||
document. If an invalid UTF sequence is found, <b>pcre2_compile()</b> returns a
|
document. If an invalid UTF sequence is found, <b>pcre2_compile()</b> returns a
|
||||||
negative error code.
|
negative error code.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If you know that your pattern is valid, and you want to skip this check for
|
If you know that your pattern is a valid UTF string, and you want to skip this
|
||||||
performance reasons, you can set the PCRE2_NO_UTF_CHECK option. When it is set,
|
check for performance reasons, you can set the PCRE2_NO_UTF_CHECK option. When
|
||||||
the effect of passing an invalid UTF string as a pattern is undefined. It may
|
it is set, the effect of passing an invalid UTF string as a pattern is
|
||||||
cause your program to crash or loop. Note that this option can also be passed
|
undefined. It may cause your program to crash or loop.
|
||||||
to <b>pcre2_match()</b> and <b>pcre_dfa_match()</b>, to suppress validity
|
</P>
|
||||||
checking of the subject string.
|
<P>
|
||||||
|
Note that this option can also be passed to <b>pcre2_match()</b> and
|
||||||
|
<b>pcre_dfa_match()</b>, to suppress UTF validity checking of the subject
|
||||||
|
string.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
Note also that setting PCRE2_NO_UTF_CHECK at compile time does not disable the
|
||||||
|
error that is given if an escape sequence for an invalid Unicode code point is
|
||||||
|
encountered in the pattern. In particular, the so-called "surrogate" code
|
||||||
|
points (0xd800 to 0xdfff) are invalid. If you want to allow escape sequences
|
||||||
|
such as \x{d800} you can set the PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES extra
|
||||||
|
option, as described in the section entitled "Extra compile options"
|
||||||
|
<a href="#extracompileoptions">below.</a>
|
||||||
|
However, this is possible only in UTF-8 and UTF-32 modes, because these values
|
||||||
|
are not representable in UTF-16.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_UCP
|
PCRE2_UCP
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -1594,10 +1668,42 @@ This option causes PCRE2 to regard both the pattern and the subject strings
|
||||||
that are subsequently processed as strings of UTF characters instead of
|
that are subsequently processed as strings of UTF characters instead of
|
||||||
single-code-unit strings. It is available when PCRE2 is built to include
|
single-code-unit strings. It is available when PCRE2 is built to include
|
||||||
Unicode support (which is the default). If Unicode support is not available,
|
Unicode support (which is the default). If Unicode support is not available,
|
||||||
the use of this option provokes an error. Details of how this option changes
|
the use of this option provokes an error. Details of how PCRE2_UTF changes the
|
||||||
the behaviour of PCRE2 are given in the
|
behaviour of PCRE2 are given in the
|
||||||
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
|
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
|
||||||
page.
|
page.
|
||||||
|
<a name="extracompileoptions"></a></P>
|
||||||
|
<br><b>
|
||||||
|
Extra compile options
|
||||||
|
</b><br>
|
||||||
|
<P>
|
||||||
|
Unlike the main compile-time options, the extra options are not saved with the
|
||||||
|
compiled pattern. The option bits that can be set in a compile context by
|
||||||
|
calling the <b>pcre2_set_compile_extra_options()</b> function are as follows:
|
||||||
|
<pre>
|
||||||
|
PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES
|
||||||
|
</pre>
|
||||||
|
This option applies when compiling a pattern in UTF-8 or UTF-32 mode. It is
|
||||||
|
forbidden in UTF-16 mode, and ignored in non-UTF modes. Unicode "surrogate"
|
||||||
|
code points in the range 0xd800 to 0xdfff are used in pairs in UTF-16 to encode
|
||||||
|
code points with values in the range 0x10000 to 0x10ffff. The surrogates cannot
|
||||||
|
therefore be represented in UTF-16. They can be represented in UTF-8 and
|
||||||
|
UTF-32, but are defined as invalid code points, and cause errors if encountered
|
||||||
|
in a UTF-8 or UTF-32 string that is being checked for validity by PCRE2.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
These values also cause errors if encountered in escape sequences such as
|
||||||
|
\x{d912} within a pattern. However, it seems that some applications, when
|
||||||
|
using PCRE2 to check for unwanted characters in UTF-8 strings, explicitly test
|
||||||
|
for the surrogates using escape sequences. The PCRE2_NO_UTF_CHECK option does
|
||||||
|
not disable the error that occurs, because it applies only to the testing of
|
||||||
|
input strings for UTF validity.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
If the extra option PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is set, surrogate code
|
||||||
|
point values in UTF-8 and UTF-32 patterns no longer provoke errors and are
|
||||||
|
incorporated in the compiled pattern. However, they can only match subject
|
||||||
|
characters if the matching function is called with PCRE2_NO_UTF_CHECK set.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC20" href="#TOC1">COMPILATION ERROR CODES</a><br>
|
<br><a name="SEC20" href="#TOC1">COMPILATION ERROR CODES</a><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -1806,7 +1912,9 @@ The third argument should point to an <b>uint32_t</b> variable.
|
||||||
If the pattern set a backtracking depth limit by including an item of the form
|
If the pattern set a backtracking depth limit by including an item of the form
|
||||||
(*LIMIT_DEPTH=nnnn) at the start, the value is returned. The third argument
|
(*LIMIT_DEPTH=nnnn) at the start, the value is returned. The third argument
|
||||||
should point to an unsigned 32-bit integer. If no such value has been set, the
|
should point to an unsigned 32-bit integer. If no such value has been set, the
|
||||||
call to <b>pcre2_pattern_info()</b> returns the error PCRE2_ERROR_UNSET.
|
call to <b>pcre2_pattern_info()</b> returns the error PCRE2_ERROR_UNSET. Note
|
||||||
|
that this limit will only be used during matching if it is less than the limit
|
||||||
|
set or defaulted by the caller of the match function.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_INFO_FIRSTBITMAP
|
PCRE2_INFO_FIRSTBITMAP
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -1824,15 +1932,15 @@ returned. Otherwise NULL is returned. The third argument should point to an
|
||||||
Return information about the first code unit of any matched string, for a
|
Return information about the first code unit of any matched string, for a
|
||||||
non-anchored pattern. The third argument should point to an <b>uint32_t</b>
|
non-anchored pattern. The third argument should point to an <b>uint32_t</b>
|
||||||
variable. If there is a fixed first value, for example, the letter "c" from a
|
variable. If there is a fixed first value, for example, the letter "c" from a
|
||||||
pattern such as (cat|cow|coyote), 1 is returned, and the character value can be
|
pattern such as (cat|cow|coyote), 1 is returned, and the value can be retrieved
|
||||||
retrieved using PCRE2_INFO_FIRSTCODEUNIT. If there is no fixed first value, but
|
using PCRE2_INFO_FIRSTCODEUNIT. If there is no fixed first value, but it is
|
||||||
it is known that a match can occur only at the start of the subject or
|
known that a match can occur only at the start of the subject or following a
|
||||||
following a newline in the subject, 2 is returned. Otherwise, and for anchored
|
newline in the subject, 2 is returned. Otherwise, and for anchored patterns, 0
|
||||||
patterns, 0 is returned.
|
is returned.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_INFO_FIRSTCODEUNIT
|
PCRE2_INFO_FIRSTCODEUNIT
|
||||||
</pre>
|
</pre>
|
||||||
Return the value of the first code unit of any matched string in the situation
|
Return the value of the first code unit of any matched string for a pattern
|
||||||
where PCRE2_INFO_FIRSTCODETYPE returns 1; otherwise return 0. The third
|
where PCRE2_INFO_FIRSTCODETYPE returns 1; otherwise return 0. The third
|
||||||
argument should point to an <b>uint32_t</b> variable. In the 8-bit library, the
|
argument should point to an <b>uint32_t</b> variable. In the 8-bit library, the
|
||||||
value is always less than 256. In the 16-bit library the value can be up to
|
value is always less than 256. In the 16-bit library the value can be up to
|
||||||
|
@ -1864,7 +1972,9 @@ the equivalent hexadecimal or octal escape sequences.
|
||||||
If the pattern set a heap memory limit by including an item of the form
|
If the pattern set a heap memory limit by including an item of the form
|
||||||
(*LIMIT_HEAP=nnnn) at the start, the value is returned. The third argument
|
(*LIMIT_HEAP=nnnn) at the start, the value is returned. The third argument
|
||||||
should point to an unsigned 32-bit integer. If no such value has been set, the
|
should point to an unsigned 32-bit integer. If no such value has been set, the
|
||||||
call to <b>pcre2_pattern_info()</b> returns the error PCRE2_ERROR_UNSET.
|
call to <b>pcre2_pattern_info()</b> returns the error PCRE2_ERROR_UNSET. Note
|
||||||
|
that this limit will only be used during matching if it is less than the limit
|
||||||
|
set or defaulted by the caller of the match function.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_INFO_JCHANGED
|
PCRE2_INFO_JCHANGED
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -1891,10 +2001,10 @@ PCRE2_INFO_LASTCODEUNIT), but for /^a\dz\d/ the returned value is 0.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_INFO_LASTCODEUNIT
|
PCRE2_INFO_LASTCODEUNIT
|
||||||
</pre>
|
</pre>
|
||||||
Return the value of the rightmost literal data unit that must exist in any
|
Return the value of the rightmost literal code unit that must exist in any
|
||||||
matched string, other than at its start, if such a value has been recorded. The
|
matched string, other than at its start, for a pattern where
|
||||||
third argument should point to an <b>uint32_t</b> variable. If there is no such
|
PCRE2_INFO_LASTCODETYPE returns 1. Otherwise, return 0. The third argument
|
||||||
value, 0 is returned.
|
should point to an <b>uint32_t</b> variable.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_INFO_MATCHEMPTY
|
PCRE2_INFO_MATCHEMPTY
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -1909,7 +2019,9 @@ in such cases.
|
||||||
If the pattern set a match limit by including an item of the form
|
If the pattern set a match limit by including an item of the form
|
||||||
(*LIMIT_MATCH=nnnn) at the start, the value is returned. The third argument
|
(*LIMIT_MATCH=nnnn) at the start, the value is returned. The third argument
|
||||||
should point to an unsigned 32-bit integer. If no such value has been set, the
|
should point to an unsigned 32-bit integer. If no such value has been set, the
|
||||||
call to <b>pcre2_pattern_info()</b> returns the error PCRE2_ERROR_UNSET.
|
call to <b>pcre2_pattern_info()</b> returns the error PCRE2_ERROR_UNSET. Note
|
||||||
|
that this limit will only be used during matching if it is less than the limit
|
||||||
|
set or defaulted by the caller of the match function.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_INFO_MAXLOOKBEHIND
|
PCRE2_INFO_MAXLOOKBEHIND
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -1921,7 +2033,8 @@ require a one-character lookbehind. \A also registers a one-character
|
||||||
lookbehind, though it does not actually inspect the previous character. This is
|
lookbehind, though it does not actually inspect the previous character. This is
|
||||||
to ensure that at least one character from the old segment is retained when a
|
to ensure that at least one character from the old segment is retained when a
|
||||||
new segment is processed. Otherwise, if there are no lookbehinds in the
|
new segment is processed. Otherwise, if there are no lookbehinds in the
|
||||||
pattern, \A might match incorrectly at the start of a new segment.
|
pattern, \A might match incorrectly at the start of a second or subsequent
|
||||||
|
segment.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_INFO_MINLENGTH
|
PCRE2_INFO_MINLENGTH
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -2216,7 +2329,7 @@ character is CR followed by LF, advance the starting offset by two characters
|
||||||
instead of one.
|
instead of one.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If a non-zero starting offset is passed when the pattern is anchored, an single
|
If a non-zero starting offset is passed when the pattern is anchored, a single
|
||||||
attempt to match at the given offset is made. This can only succeed if the
|
attempt to match at the given offset is made. This can only succeed if the
|
||||||
pattern does not require the match to be at the start of the subject. In other
|
pattern does not require the match to be at the start of the subject. In other
|
||||||
words, the anchoring must be the result of setting the PCRE2_ANCHORED option or
|
words, the anchoring must be the result of setting the PCRE2_ANCHORED option or
|
||||||
|
@ -2611,6 +2724,10 @@ documentation for details.
|
||||||
PCRE2_ERROR_DEPTHLIMIT
|
PCRE2_ERROR_DEPTHLIMIT
|
||||||
</pre>
|
</pre>
|
||||||
The nested backtracking depth limit was reached.
|
The nested backtracking depth limit was reached.
|
||||||
|
<pre>
|
||||||
|
PCRE2_ERROR_HEAPLIMIT
|
||||||
|
</pre>
|
||||||
|
The heap limit was reached.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_ERROR_INTERNAL
|
PCRE2_ERROR_INTERNAL
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -3290,7 +3407,7 @@ NOTE: PCRE2's "auto-possessification" optimization usually applies to character
|
||||||
repeats at the end of a pattern (as well as internally). For example, the
|
repeats at the end of a pattern (as well as internally). For example, the
|
||||||
pattern "a\d+" is compiled as if it were "a\d++". For DFA matching, this
|
pattern "a\d+" is compiled as if it were "a\d++". For DFA matching, this
|
||||||
means that only one possible match is found. If you really do want multiple
|
means that only one possible match is found. If you really do want multiple
|
||||||
matches in such cases, either use an ungreedy repeat auch as "a\d+?" or set
|
matches in such cases, either use an ungreedy repeat such as "a\d+?" or set
|
||||||
the PCRE2_NO_AUTO_POSSESS option when compiling.
|
the PCRE2_NO_AUTO_POSSESS option when compiling.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
|
@ -3351,7 +3468,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC42" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC42" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 17 April 2017
|
Last updated: 17 May 2017
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2017 University of Cambridge.
|
Copyright © 1997-2017 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -1545,12 +1545,13 @@ alternative in the subpattern.
|
||||||
<br><a name="SEC13" href="#TOC1">INTERNAL OPTION SETTING</a><br>
|
<br><a name="SEC13" href="#TOC1">INTERNAL OPTION SETTING</a><br>
|
||||||
<P>
|
<P>
|
||||||
The settings of the PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL,
|
The settings of the PCRE2_CASELESS, PCRE2_MULTILINE, PCRE2_DOTALL,
|
||||||
PCRE2_EXTENDED, and PCRE2_EXTENDED_MORE options (which are Perl-compatible) can
|
PCRE2_EXTENDED, PCRE2_EXTENDED_MORE, and PCRE2_NO_AUTO_CAPTURE options (which
|
||||||
be changed from within the pattern by a sequence of Perl option letters
|
are Perl-compatible) can be changed from within the pattern by a sequence of
|
||||||
enclosed between "(?" and ")". The option letters are
|
Perl option letters enclosed between "(?" and ")". The option letters are
|
||||||
<pre>
|
<pre>
|
||||||
i for PCRE2_CASELESS
|
i for PCRE2_CASELESS
|
||||||
m for PCRE2_MULTILINE
|
m for PCRE2_MULTILINE
|
||||||
|
n for PCRE2_NO_AUTO_CAPTURE
|
||||||
s for PCRE2_DOTALL
|
s for PCRE2_DOTALL
|
||||||
x for PCRE2_EXTENDED
|
x for PCRE2_EXTENDED
|
||||||
xx for PCRE2_EXTENDED_MORE
|
xx for PCRE2_EXTENDED_MORE
|
||||||
|
|
|
@ -430,6 +430,7 @@ but some of them use Unicode properties if PCRE2_UCP is set. You can use
|
||||||
(?i) caseless
|
(?i) caseless
|
||||||
(?J) allow duplicate names
|
(?J) allow duplicate names
|
||||||
(?m) multiline
|
(?m) multiline
|
||||||
|
(?n) no auto capture
|
||||||
(?s) single line (dotall)
|
(?s) single line (dotall)
|
||||||
(?U) default ungreedy (lazy)
|
(?U) default ungreedy (lazy)
|
||||||
(?x) extended: ignore white space except in classes
|
(?x) extended: ignore white space except in classes
|
||||||
|
|
|
@ -559,14 +559,19 @@ by a previous <b>#pattern</b> command.
|
||||||
Setting compilation options
|
Setting compilation options
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
The following modifiers set options for <b>pcre2_compile()</b>. The most common
|
The following modifiers set options for <b>pcre2_compile()</b>. Most of them set
|
||||||
ones have single-letter abbreviations, with special handling for /x (to make
|
bits in the options argument of that function, but those whose names start with
|
||||||
it like Perl). If a second x is present, PCRE2_EXTENDED is converted into
|
PCRE2_EXTRA are additional options that are set in the compile context. For the
|
||||||
PCRE2_EXTENDED_MORE. A third appearance adds PCRE2_EXTENDED as well. See
|
main options, there are some single-letter abbreviations that are the same as
|
||||||
|
Perl options. There is special handling for /x: if a second x is present,
|
||||||
|
PCRE2_EXTENDED is converted into PCRE2_EXTENDED_MORE as in Perl. A third
|
||||||
|
appearance adds PCRE2_EXTENDED as well, though this makes no difference to the
|
||||||
|
way <b>pcre2_compile()</b> behaves. See
|
||||||
<a href="pcre2api.html"><b>pcre2api</b></a>
|
<a href="pcre2api.html"><b>pcre2api</b></a>
|
||||||
for a description of the effects of these options.
|
for a description of the effects of these options.
|
||||||
<pre>
|
<pre>
|
||||||
allow_empty_class set PCRE2_ALLOW_EMPTY_CLASS
|
allow_empty_class set PCRE2_ALLOW_EMPTY_CLASS
|
||||||
|
allow_surrogate_escapes set PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES
|
||||||
alt_bsux set PCRE2_ALT_BSUX
|
alt_bsux set PCRE2_ALT_BSUX
|
||||||
alt_circumflex set PCRE2_ALT_CIRCUMFLEX
|
alt_circumflex set PCRE2_ALT_CIRCUMFLEX
|
||||||
alt_verbnames set PCRE2_ALT_VERBNAMES
|
alt_verbnames set PCRE2_ALT_VERBNAMES
|
||||||
|
@ -585,7 +590,7 @@ for a description of the effects of these options.
|
||||||
never_backslash_c set PCRE2_NEVER_BACKSLASH_C
|
never_backslash_c set PCRE2_NEVER_BACKSLASH_C
|
||||||
never_ucp set PCRE2_NEVER_UCP
|
never_ucp set PCRE2_NEVER_UCP
|
||||||
never_utf set PCRE2_NEVER_UTF
|
never_utf set PCRE2_NEVER_UTF
|
||||||
no_auto_capture set PCRE2_NO_AUTO_CAPTURE
|
/n no_auto_capture set PCRE2_NO_AUTO_CAPTURE
|
||||||
no_auto_possess set PCRE2_NO_AUTO_POSSESS
|
no_auto_possess set PCRE2_NO_AUTO_POSSESS
|
||||||
no_dotstar_anchor set PCRE2_NO_DOTSTAR_ANCHOR
|
no_dotstar_anchor set PCRE2_NO_DOTSTAR_ANCHOR
|
||||||
no_start_optimize set PCRE2_NO_START_OPTIMIZE
|
no_start_optimize set PCRE2_NO_START_OPTIMIZE
|
||||||
|
@ -607,7 +612,8 @@ Setting compilation controls
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
The following modifiers affect the compilation process or request information
|
The following modifiers affect the compilation process or request information
|
||||||
about the pattern:
|
about the pattern. There are single-letter abbreviations for some that are
|
||||||
|
heavily used in the test files.
|
||||||
<pre>
|
<pre>
|
||||||
bsr=[anycrlf|unicode] specify \R handling
|
bsr=[anycrlf|unicode] specify \R handling
|
||||||
/B bincode show binary code without lengths
|
/B bincode show binary code without lengths
|
||||||
|
@ -1810,7 +1816,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 17 April 2017
|
Last updated: 17 May 2017
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2017 University of Cambridge.
|
Copyright © 1997-2017 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
|
@ -47,7 +47,7 @@ and
|
||||||
documentation. Only the short names for properties are supported. For example,
|
documentation. Only the short names for properties are supported. For example,
|
||||||
\p{L} matches a letter. Its Perl synonym, \p{Letter}, is not supported.
|
\p{L} matches a letter. Its Perl synonym, \p{Letter}, is not supported.
|
||||||
Furthermore, in Perl, many properties may optionally be prefixed by "Is", for
|
Furthermore, in Perl, many properties may optionally be prefixed by "Is", for
|
||||||
compatibility with Perl 5.6. PCRE does not support this.
|
compatibility with Perl 5.6. PCRE2 does not support this.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
WIDE CHARACTERS AND UTF MODES
|
WIDE CHARACTERS AND UTF MODES
|
||||||
|
@ -109,10 +109,15 @@ However, the special horizontal and vertical white space matching escapes (\h,
|
||||||
\H, \v, and \V) do match all the appropriate Unicode characters, whether or
|
\H, \v, and \V) do match all the appropriate Unicode characters, whether or
|
||||||
not PCRE2_UCP is set.
|
not PCRE2_UCP is set.
|
||||||
</P>
|
</P>
|
||||||
|
<br><b>
|
||||||
|
CASE-EQUIVALENCE IN UTF MODES
|
||||||
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
Case-insensitive matching in UTF mode makes use of Unicode properties. A few
|
Case-insensitive matching in a UTF mode makes use of Unicode properties except
|
||||||
Unicode characters such as Greek sigma have more than two codepoints that are
|
for characters whose code points are less than 128 and that have at most two
|
||||||
case-equivalent, and these are treated as such.
|
case-equivalent values. For these, a direct table lookup is used for speed. A
|
||||||
|
few Unicode characters such as Greek sigma have more than two codepoints that
|
||||||
|
are case-equivalent, and these are treated as such.
|
||||||
</P>
|
</P>
|
||||||
<br><b>
|
<br><b>
|
||||||
VALIDITY OF UTF STRINGS
|
VALIDITY OF UTF STRINGS
|
||||||
|
@ -173,6 +178,15 @@ or <b>pcre2_dfa_match()</b>.
|
||||||
<P>
|
<P>
|
||||||
If you pass an invalid UTF string when PCRE2_NO_UTF_CHECK is set, the result
|
If you pass an invalid UTF string when PCRE2_NO_UTF_CHECK is set, the result
|
||||||
is undefined and your program may crash or loop indefinitely.
|
is undefined and your program may crash or loop indefinitely.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
Note that setting PCRE2_NO_UTF_CHECK at compile time does not disable the error
|
||||||
|
that is given if an escape sequence for an invalid Unicode code point is
|
||||||
|
encountered in the pattern. If you want to allow escape sequences such as
|
||||||
|
\x{d800} (a surrogate code point) you can set the
|
||||||
|
PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES extra option. However, this is possible
|
||||||
|
only in UTF-8 and UTF-32 modes, because these values are not representable in
|
||||||
|
UTF-16.
|
||||||
<a name="utf8strings"></a></P>
|
<a name="utf8strings"></a></P>
|
||||||
<br><b>
|
<br><b>
|
||||||
Errors in UTF-8 strings
|
Errors in UTF-8 strings
|
||||||
|
@ -280,9 +294,9 @@ Cambridge, England.
|
||||||
REVISION
|
REVISION
|
||||||
</b><br>
|
</b><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 03 July 2016
|
Last updated: 17 May 2017
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2016 University of Cambridge.
|
Copyright © 1997-2017 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
<p>
|
<p>
|
||||||
Return to the <a href="index.html">PCRE2 index page</a>.
|
Return to the <a href="index.html">PCRE2 index page</a>.
|
||||||
|
|
|
@ -207,6 +207,9 @@ in the library.
|
||||||
<tr><td><a href="pcre2_set_character_tables.html">pcre2_set_character_tables</a></td>
|
<tr><td><a href="pcre2_set_character_tables.html">pcre2_set_character_tables</a></td>
|
||||||
<td> Set character tables</td></tr>
|
<td> Set character tables</td></tr>
|
||||||
|
|
||||||
|
<tr><td><a href="pcre2_set_compile_extra_options.html">pcre2_set_compile_extra_options</a></td>
|
||||||
|
<td> Set compile time extra options</td></tr>
|
||||||
|
|
||||||
<tr><td><a href="pcre2_set_compile_recursion_guard.html">pcre2_set_compile_recursion_guard</a></td>
|
<tr><td><a href="pcre2_set_compile_recursion_guard.html">pcre2_set_compile_recursion_guard</a></td>
|
||||||
<td> Set up a compile recursion guard function</td></tr>
|
<td> Set up a compile recursion guard function</td></tr>
|
||||||
|
|
||||||
|
|
3689
doc/pcre2.txt
3689
doc/pcre2.txt
File diff suppressed because it is too large
Load Diff
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2_COMPILE 3 "04 April 2017" "PCRE2 10.30"
|
.TH PCRE2_COMPILE 3 "17 May 2017" "PCRE2 10.30"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -35,6 +35,7 @@ system stack size checking, or to change one or more of these parameters:
|
||||||
The newline character sequence;
|
The newline character sequence;
|
||||||
The compile time nested parentheses limit;
|
The compile time nested parentheses limit;
|
||||||
The maximum pattern length (in code units) that is allowed.
|
The maximum pattern length (in code units) that is allowed.
|
||||||
|
The additional options bits
|
||||||
.sp
|
.sp
|
||||||
The option bits are:
|
The option bits are:
|
||||||
.sp
|
.sp
|
||||||
|
|
|
@ -0,0 +1,33 @@
|
||||||
|
.TH PCRE2_SET_MAX_PATTERN_LENGTH 3 "17 May 2017" "PCRE2 10.30"
|
||||||
|
.SH NAME
|
||||||
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
|
.SH SYNOPSIS
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
.B #include <pcre2.h>
|
||||||
|
.PP
|
||||||
|
.nf
|
||||||
|
.B int pcre2_set_compile_extra_options(pcre2_compile_context *\fIccontext\fP,
|
||||||
|
.B " PCRE2_SIZE \fIextra_options\fP);"
|
||||||
|
.fi
|
||||||
|
.
|
||||||
|
.SH DESCRIPTION
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
This function sets additional option bits for \fBpcre2_compile()\fP that are
|
||||||
|
housed in a compile context. It completely replaces all the bits. The extra
|
||||||
|
options are:
|
||||||
|
.sp
|
||||||
|
.\" JOIN
|
||||||
|
PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES Allow \ex{df800} to \ex{dfff}
|
||||||
|
in UTF-8 and UTF-32 modes
|
||||||
|
.sp
|
||||||
|
There is a complete description of the PCRE2 native API in the
|
||||||
|
.\" HREF
|
||||||
|
\fBpcre2api\fP
|
||||||
|
.\"
|
||||||
|
page and a description of the POSIX API in the
|
||||||
|
.\" HREF
|
||||||
|
\fBpcre2posix\fP
|
||||||
|
.\"
|
||||||
|
page.
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2API 3 "20 April 2017" "PCRE2 10.30"
|
.TH PCRE2API 3 "17 May 2017" "PCRE2 10.30"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||||
.sp
|
.sp
|
||||||
|
@ -90,6 +90,9 @@ document for an overview of all the PCRE2 documentation.
|
||||||
.B int pcre2_set_character_tables(pcre2_compile_context *\fIccontext\fP,
|
.B int pcre2_set_character_tables(pcre2_compile_context *\fIccontext\fP,
|
||||||
.B " const unsigned char *\fItables\fP);"
|
.B " const unsigned char *\fItables\fP);"
|
||||||
.sp
|
.sp
|
||||||
|
.B int pcre2_set_compile_extra_options(pcre2_compile_context *\fIccontext\fP,
|
||||||
|
.B " uint32_t \fIextra_options\fP);"
|
||||||
|
.sp
|
||||||
.B int pcre2_set_max_pattern_length(pcre2_compile_context *\fIccontext\fP,
|
.B int pcre2_set_max_pattern_length(pcre2_compile_context *\fIccontext\fP,
|
||||||
.B " PCRE2_SIZE \fIvalue\fP);"
|
.B " PCRE2_SIZE \fIvalue\fP);"
|
||||||
.sp
|
.sp
|
||||||
|
@ -643,6 +646,7 @@ following compile-time parameters:
|
||||||
The newline character sequence
|
The newline character sequence
|
||||||
The compile time nested parentheses limit
|
The compile time nested parentheses limit
|
||||||
The maximum length of the pattern string
|
The maximum length of the pattern string
|
||||||
|
The extra options bits (none set by default)
|
||||||
.sp
|
.sp
|
||||||
A compile context is also required if you are using custom memory management.
|
A compile context is also required if you are using custom memory management.
|
||||||
If none of these apply, just pass NULL as the context argument of
|
If none of these apply, just pass NULL as the context argument of
|
||||||
|
@ -685,6 +689,23 @@ argument is a general context. This function builds a set of character tables
|
||||||
in the current locale.
|
in the current locale.
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
|
.B int pcre2_set_compile_extra_options(pcre2_compile_context *\fIccontext\fP,
|
||||||
|
.B " uint32_t \fIextra_options\fP);"
|
||||||
|
.fi
|
||||||
|
.sp
|
||||||
|
As PCRE2 has developed, almost all the 32 option bits that are available in
|
||||||
|
the \fIoptions\fP argument of \fBpcre2_compile()\fP have been used up. To avoid
|
||||||
|
running out, the compile context contains a set of extra option bits which are
|
||||||
|
used for some newer, assumed rarer, options. This function sets those bits. It
|
||||||
|
always sets all the bits (either on or off). It does not modify any existing
|
||||||
|
setting. The available options are defined in the section entitled "Extra
|
||||||
|
compile options"
|
||||||
|
.\" HTML <a href="#extracompileoptions">
|
||||||
|
.\" </a>
|
||||||
|
below.
|
||||||
|
.\"
|
||||||
|
.sp
|
||||||
|
.nf
|
||||||
.B int pcre2_set_max_pattern_length(pcre2_compile_context *\fIccontext\fP,
|
.B int pcre2_set_max_pattern_length(pcre2_compile_context *\fIccontext\fP,
|
||||||
.B " PCRE2_SIZE \fIvalue\fP);"
|
.B " PCRE2_SIZE \fIvalue\fP);"
|
||||||
.fi
|
.fi
|
||||||
|
@ -1533,14 +1554,29 @@ in the
|
||||||
\fBpcre2unicode\fP
|
\fBpcre2unicode\fP
|
||||||
.\"
|
.\"
|
||||||
document. If an invalid UTF sequence is found, \fBpcre2_compile()\fP returns a
|
document. If an invalid UTF sequence is found, \fBpcre2_compile()\fP returns a
|
||||||
negative error code.
|
negative error code.
|
||||||
.P
|
.P
|
||||||
If you know that your pattern is valid, and you want to skip this check for
|
If you know that your pattern is a valid UTF string, and you want to skip this
|
||||||
performance reasons, you can set the PCRE2_NO_UTF_CHECK option. When it is set,
|
check for performance reasons, you can set the PCRE2_NO_UTF_CHECK option. When
|
||||||
the effect of passing an invalid UTF string as a pattern is undefined. It may
|
it is set, the effect of passing an invalid UTF string as a pattern is
|
||||||
cause your program to crash or loop. Note that this option can also be passed
|
undefined. It may cause your program to crash or loop.
|
||||||
to \fBpcre2_match()\fP and \fBpcre_dfa_match()\fP, to suppress validity
|
.P
|
||||||
checking of the subject string.
|
Note that this option can also be passed to \fBpcre2_match()\fP and
|
||||||
|
\fBpcre_dfa_match()\fP, to suppress UTF validity checking of the subject
|
||||||
|
string.
|
||||||
|
.P
|
||||||
|
Note also that setting PCRE2_NO_UTF_CHECK at compile time does not disable the
|
||||||
|
error that is given if an escape sequence for an invalid Unicode code point is
|
||||||
|
encountered in the pattern. In particular, the so-called "surrogate" code
|
||||||
|
points (0xd800 to 0xdfff) are invalid. If you want to allow escape sequences
|
||||||
|
such as \ex{d800} you can set the PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES extra
|
||||||
|
option, as described in the section entitled "Extra compile options"
|
||||||
|
.\" HTML <a href="#extracompileoptions">
|
||||||
|
.\" </a>
|
||||||
|
below.
|
||||||
|
.\"
|
||||||
|
However, this is possible only in UTF-8 and UTF-32 modes, because these values
|
||||||
|
are not representable in UTF-16.
|
||||||
.sp
|
.sp
|
||||||
PCRE2_UCP
|
PCRE2_UCP
|
||||||
.sp
|
.sp
|
||||||
|
@ -1594,6 +1630,37 @@ behaviour of PCRE2 are given in the
|
||||||
page.
|
page.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
|
.\" HTML <a name="extracompileoptions"></a>
|
||||||
|
.SS "Extra compile options"
|
||||||
|
.rs
|
||||||
|
.sp
|
||||||
|
Unlike the main compile-time options, the extra options are not saved with the
|
||||||
|
compiled pattern. The option bits that can be set in a compile context by
|
||||||
|
calling the \fBpcre2_set_compile_extra_options()\fP function are as follows:
|
||||||
|
.sp
|
||||||
|
PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES
|
||||||
|
.sp
|
||||||
|
This option applies when compiling a pattern in UTF-8 or UTF-32 mode. It is
|
||||||
|
forbidden in UTF-16 mode, and ignored in non-UTF modes. Unicode "surrogate"
|
||||||
|
code points in the range 0xd800 to 0xdfff are used in pairs in UTF-16 to encode
|
||||||
|
code points with values in the range 0x10000 to 0x10ffff. The surrogates cannot
|
||||||
|
therefore be represented in UTF-16. They can be represented in UTF-8 and
|
||||||
|
UTF-32, but are defined as invalid code points, and cause errors if encountered
|
||||||
|
in a UTF-8 or UTF-32 string that is being checked for validity by PCRE2.
|
||||||
|
.P
|
||||||
|
These values also cause errors if encountered in escape sequences such as
|
||||||
|
\ex{d912} within a pattern. However, it seems that some applications, when
|
||||||
|
using PCRE2 to check for unwanted characters in UTF-8 strings, explicitly test
|
||||||
|
for the surrogates using escape sequences. The PCRE2_NO_UTF_CHECK option does
|
||||||
|
not disable the error that occurs, because it applies only to the testing of
|
||||||
|
input strings for UTF validity.
|
||||||
|
.P
|
||||||
|
If the extra option PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is set, surrogate code
|
||||||
|
point values in UTF-8 and UTF-32 patterns no longer provoke errors and are
|
||||||
|
incorporated in the compiled pattern. However, they can only match subject
|
||||||
|
characters if the matching function is called with PCRE2_NO_UTF_CHECK set.
|
||||||
|
.
|
||||||
|
.
|
||||||
.SH "COMPILATION ERROR CODES"
|
.SH "COMPILATION ERROR CODES"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
|
@ -3421,6 +3488,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 20 April 2017
|
Last updated: 17 May 2017
|
||||||
Copyright (c) 1997-2017 University of Cambridge.
|
Copyright (c) 1997-2017 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2TEST 1 "18 April 2017" "PCRE 10.30"
|
.TH PCRE2TEST 1 "17 May 2017" "PCRE 10.30"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
pcre2test - a program for testing Perl-compatible regular expressions.
|
pcre2test - a program for testing Perl-compatible regular expressions.
|
||||||
.SH SYNOPSIS
|
.SH SYNOPSIS
|
||||||
|
@ -519,17 +519,21 @@ by a previous \fB#pattern\fP command.
|
||||||
.SS "Setting compilation options"
|
.SS "Setting compilation options"
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
The following modifiers set options for \fBpcre2_compile()\fP. There are some
|
The following modifiers set options for \fBpcre2_compile()\fP. Most of them set
|
||||||
single-letter abbreviations that are the same as Perl options. There is special
|
bits in the options argument of that function, but those whose names start with
|
||||||
handling for /x: if a second x is present, PCRE2_EXTENDED is converted into
|
PCRE2_EXTRA are additional options that are set in the compile context. For the
|
||||||
PCRE2_EXTENDED_MORE as in Perl. A third appearance adds PCRE2_EXTENDED as well,
|
main options, there are some single-letter abbreviations that are the same as
|
||||||
though this makes no difference to the way \fBpcre2_compile()\fP behaves. See
|
Perl options. There is special handling for /x: if a second x is present,
|
||||||
|
PCRE2_EXTENDED is converted into PCRE2_EXTENDED_MORE as in Perl. A third
|
||||||
|
appearance adds PCRE2_EXTENDED as well, though this makes no difference to the
|
||||||
|
way \fBpcre2_compile()\fP behaves. See
|
||||||
.\" HREF
|
.\" HREF
|
||||||
\fBpcre2api\fP
|
\fBpcre2api\fP
|
||||||
.\"
|
.\"
|
||||||
for a description of the effects of these options.
|
for a description of the effects of these options.
|
||||||
.sp
|
.sp
|
||||||
allow_empty_class set PCRE2_ALLOW_EMPTY_CLASS
|
allow_empty_class set PCRE2_ALLOW_EMPTY_CLASS
|
||||||
|
allow_surrogate_escapes set PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES
|
||||||
alt_bsux set PCRE2_ALT_BSUX
|
alt_bsux set PCRE2_ALT_BSUX
|
||||||
alt_circumflex set PCRE2_ALT_CIRCUMFLEX
|
alt_circumflex set PCRE2_ALT_CIRCUMFLEX
|
||||||
alt_verbnames set PCRE2_ALT_VERBNAMES
|
alt_verbnames set PCRE2_ALT_VERBNAMES
|
||||||
|
@ -1788,6 +1792,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 18 April 2017
|
Last updated: 17 May 2017
|
||||||
Copyright (c) 1997-2017 University of Cambridge.
|
Copyright (c) 1997-2017 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,4 +1,4 @@
|
||||||
.TH PCRE2UNICODE 3 "20 April 2017" "PCRE2 10.30"
|
.TH PCRE2UNICODE 3 "17 May 2017" "PCRE2 10.30"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
PCRE - Perl-compatible regular expressions (revised API)
|
PCRE - Perl-compatible regular expressions (revised API)
|
||||||
.SH "UNICODE AND UTF SUPPORT"
|
.SH "UNICODE AND UTF SUPPORT"
|
||||||
|
@ -164,6 +164,14 @@ or \fBpcre2_dfa_match()\fP.
|
||||||
.P
|
.P
|
||||||
If you pass an invalid UTF string when PCRE2_NO_UTF_CHECK is set, the result
|
If you pass an invalid UTF string when PCRE2_NO_UTF_CHECK is set, the result
|
||||||
is undefined and your program may crash or loop indefinitely.
|
is undefined and your program may crash or loop indefinitely.
|
||||||
|
.P
|
||||||
|
Note that setting PCRE2_NO_UTF_CHECK at compile time does not disable the error
|
||||||
|
that is given if an escape sequence for an invalid Unicode code point is
|
||||||
|
encountered in the pattern. If you want to allow escape sequences such as
|
||||||
|
\ex{d800} (a surrogate code point) you can set the
|
||||||
|
PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES extra option. However, this is possible
|
||||||
|
only in UTF-8 and UTF-32 modes, because these values are not representable in
|
||||||
|
UTF-16.
|
||||||
.
|
.
|
||||||
.
|
.
|
||||||
.\" HTML <a name="utf8strings"></a>
|
.\" HTML <a name="utf8strings"></a>
|
||||||
|
@ -272,6 +280,6 @@ Cambridge, England.
|
||||||
.rs
|
.rs
|
||||||
.sp
|
.sp
|
||||||
.nf
|
.nf
|
||||||
Last updated: 20 April 2017
|
Last updated: 17 May 2017
|
||||||
Copyright (c) 1997-2017 University of Cambridge.
|
Copyright (c) 1997-2017 University of Cambridge.
|
||||||
.fi
|
.fi
|
||||||
|
|
|
@ -139,6 +139,10 @@ D is inspected during pcre2_dfa_match() execution
|
||||||
#define PCRE2_USE_OFFSET_LIMIT 0x00800000u /* J M D */
|
#define PCRE2_USE_OFFSET_LIMIT 0x00800000u /* J M D */
|
||||||
#define PCRE2_EXTENDED_MORE 0x01000000u /* C */
|
#define PCRE2_EXTENDED_MORE 0x01000000u /* C */
|
||||||
|
|
||||||
|
/* An additional compile options word is available in the compile context. */
|
||||||
|
|
||||||
|
#define PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES 0x00000001u /* C */
|
||||||
|
|
||||||
/* These are for pcre2_jit_compile(). */
|
/* These are for pcre2_jit_compile(). */
|
||||||
|
|
||||||
#define PCRE2_JIT_COMPLETE 0x00000001u /* For full matching */
|
#define PCRE2_JIT_COMPLETE 0x00000001u /* For full matching */
|
||||||
|
@ -448,6 +452,8 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
pcre2_set_bsr(pcre2_compile_context *, uint32_t); \
|
pcre2_set_bsr(pcre2_compile_context *, uint32_t); \
|
||||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
pcre2_set_character_tables(pcre2_compile_context *, const unsigned char *); \
|
pcre2_set_character_tables(pcre2_compile_context *, const unsigned char *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_compile_extra_options(pcre2_compile_context *, uint32_t); \
|
||||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
pcre2_set_max_pattern_length(pcre2_compile_context *, PCRE2_SIZE); \
|
pcre2_set_max_pattern_length(pcre2_compile_context *, PCRE2_SIZE); \
|
||||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
@ -721,6 +727,7 @@ pcre2_compile are called by application code. */
|
||||||
#define pcre2_set_bsr PCRE2_SUFFIX(pcre2_set_bsr_)
|
#define pcre2_set_bsr PCRE2_SUFFIX(pcre2_set_bsr_)
|
||||||
#define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_)
|
#define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_)
|
||||||
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
|
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
|
||||||
|
#define pcre2_set_compile_extra_options PCRE2_SUFFIX(pcre2_set_compile_extra_options_)
|
||||||
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
|
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
|
||||||
#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
|
#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
|
||||||
#define pcre2_set_glob_separator PCRE2_SUFFIX(pcre2_set_glob_separator_)
|
#define pcre2_set_glob_separator PCRE2_SUFFIX(pcre2_set_glob_separator_)
|
||||||
|
|
|
@ -139,6 +139,10 @@ D is inspected during pcre2_dfa_match() execution
|
||||||
#define PCRE2_USE_OFFSET_LIMIT 0x00800000u /* J M D */
|
#define PCRE2_USE_OFFSET_LIMIT 0x00800000u /* J M D */
|
||||||
#define PCRE2_EXTENDED_MORE 0x01000000u /* C */
|
#define PCRE2_EXTENDED_MORE 0x01000000u /* C */
|
||||||
|
|
||||||
|
/* An additional compile options word is available in the compile context. */
|
||||||
|
|
||||||
|
#define PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES 0x00000001u /* C */
|
||||||
|
|
||||||
/* These are for pcre2_jit_compile(). */
|
/* These are for pcre2_jit_compile(). */
|
||||||
|
|
||||||
#define PCRE2_JIT_COMPLETE 0x00000001u /* For full matching */
|
#define PCRE2_JIT_COMPLETE 0x00000001u /* For full matching */
|
||||||
|
@ -448,6 +452,8 @@ PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
pcre2_set_bsr(pcre2_compile_context *, uint32_t); \
|
pcre2_set_bsr(pcre2_compile_context *, uint32_t); \
|
||||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
pcre2_set_character_tables(pcre2_compile_context *, const unsigned char *); \
|
pcre2_set_character_tables(pcre2_compile_context *, const unsigned char *); \
|
||||||
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
pcre2_set_compile_extra_options(pcre2_compile_context *, uint32_t); \
|
||||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
pcre2_set_max_pattern_length(pcre2_compile_context *, PCRE2_SIZE); \
|
pcre2_set_max_pattern_length(pcre2_compile_context *, PCRE2_SIZE); \
|
||||||
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
|
||||||
|
@ -721,6 +727,7 @@ pcre2_compile are called by application code. */
|
||||||
#define pcre2_set_bsr PCRE2_SUFFIX(pcre2_set_bsr_)
|
#define pcre2_set_bsr PCRE2_SUFFIX(pcre2_set_bsr_)
|
||||||
#define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_)
|
#define pcre2_set_callout PCRE2_SUFFIX(pcre2_set_callout_)
|
||||||
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
|
#define pcre2_set_character_tables PCRE2_SUFFIX(pcre2_set_character_tables_)
|
||||||
|
#define pcre2_set_compile_extra_options PCRE2_SUFFIX(pcre2_set_compile_extra_options_)
|
||||||
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
|
#define pcre2_set_compile_recursion_guard PCRE2_SUFFIX(pcre2_set_compile_recursion_guard_)
|
||||||
#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
|
#define pcre2_set_depth_limit PCRE2_SUFFIX(pcre2_set_depth_limit_)
|
||||||
#define pcre2_set_glob_separator PCRE2_SUFFIX(pcre2_set_glob_separator_)
|
#define pcre2_set_glob_separator PCRE2_SUFFIX(pcre2_set_glob_separator_)
|
||||||
|
|
|
@ -717,7 +717,8 @@ enum { ERR0 = COMPILE_ERROR_BASE,
|
||||||
ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60,
|
ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60,
|
||||||
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
|
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
|
||||||
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
|
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
|
||||||
ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90 };
|
ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90,
|
||||||
|
ERR91};
|
||||||
|
|
||||||
/* This is a table of start-of-pattern options such as (*UTF) and settings such
|
/* This is a table of start-of-pattern options such as (*UTF) and settings such
|
||||||
as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
|
as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
|
||||||
|
@ -728,7 +729,7 @@ enum { PSO_OPT, /* Value is an option bit */
|
||||||
PSO_FLG, /* Value is a flag bit */
|
PSO_FLG, /* Value is a flag bit */
|
||||||
PSO_NL, /* Value is a newline type */
|
PSO_NL, /* Value is a newline type */
|
||||||
PSO_BSR, /* Value is a \R type */
|
PSO_BSR, /* Value is a \R type */
|
||||||
PSO_LIMH, /* Read integer value for heap limit */
|
PSO_LIMH, /* Read integer value for heap limit */
|
||||||
PSO_LIMM, /* Read integer value for match limit */
|
PSO_LIMM, /* Read integer value for match limit */
|
||||||
PSO_LIMD }; /* Read integer value for depth limit */
|
PSO_LIMD }; /* Read integer value for depth limit */
|
||||||
|
|
||||||
|
@ -1474,7 +1475,10 @@ else
|
||||||
if (utf)
|
if (utf)
|
||||||
{
|
{
|
||||||
if (c > 0x10ffffU) *errorcodeptr = ERR77;
|
if (c > 0x10ffffU) *errorcodeptr = ERR77;
|
||||||
else if (c >= 0xd800 && c <= 0xdfff) *errorcodeptr = ERR73;
|
else
|
||||||
|
if (c >= 0xd800 && c <= 0xdfff &&
|
||||||
|
(cb->cx->extra_options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0)
|
||||||
|
*errorcodeptr = ERR73;
|
||||||
}
|
}
|
||||||
else if (c > MAX_NON_UTF_CHAR) *errorcodeptr = ERR77;
|
else if (c > MAX_NON_UTF_CHAR) *errorcodeptr = ERR77;
|
||||||
}
|
}
|
||||||
|
@ -1663,7 +1667,8 @@ else
|
||||||
}
|
}
|
||||||
else if (ptr < ptrend && *ptr++ == CHAR_RIGHT_CURLY_BRACKET)
|
else if (ptr < ptrend && *ptr++ == CHAR_RIGHT_CURLY_BRACKET)
|
||||||
{
|
{
|
||||||
if (utf && c >= 0xd800 && c <= 0xdfff)
|
if (utf && c >= 0xd800 && c <= 0xdfff &&
|
||||||
|
(cb->cx->extra_options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0)
|
||||||
{
|
{
|
||||||
ptr--;
|
ptr--;
|
||||||
*errorcodeptr = ERR73;
|
*errorcodeptr = ERR73;
|
||||||
|
@ -1732,7 +1737,8 @@ else
|
||||||
}
|
}
|
||||||
else if (ptr < ptrend && *ptr++ == CHAR_RIGHT_CURLY_BRACKET)
|
else if (ptr < ptrend && *ptr++ == CHAR_RIGHT_CURLY_BRACKET)
|
||||||
{
|
{
|
||||||
if (utf && c >= 0xd800 && c <= 0xdfff)
|
if (utf && c >= 0xd800 && c <= 0xdfff &&
|
||||||
|
(cb->cx->extra_options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0)
|
||||||
{
|
{
|
||||||
ptr--;
|
ptr--;
|
||||||
*errorcodeptr = ERR73;
|
*errorcodeptr = ERR73;
|
||||||
|
@ -2227,7 +2233,7 @@ typedef struct nest_save {
|
||||||
uint16_t reset_group;
|
uint16_t reset_group;
|
||||||
uint16_t max_group;
|
uint16_t max_group;
|
||||||
uint16_t flags;
|
uint16_t flags;
|
||||||
uint32_t options;
|
uint32_t options;
|
||||||
} nest_save;
|
} nest_save;
|
||||||
|
|
||||||
#define NSF_RESET 0x0001u
|
#define NSF_RESET 0x0001u
|
||||||
|
@ -2297,10 +2303,10 @@ creating a nest_save that spans the end of the workspace. */
|
||||||
|
|
||||||
end_nests = (nest_save *)((char *)end_nests -
|
end_nests = (nest_save *)((char *)end_nests -
|
||||||
((cb->workspace_size * sizeof(PCRE2_UCHAR)) % sizeof(nest_save)));
|
((cb->workspace_size * sizeof(PCRE2_UCHAR)) % sizeof(nest_save)));
|
||||||
|
|
||||||
/* PCRE2_EXTENDED_MORE implies PCRE2_EXTENDED */
|
/* PCRE2_EXTENDED_MORE implies PCRE2_EXTENDED */
|
||||||
|
|
||||||
if ((options & PCRE2_EXTENDED_MORE) != 0) options |= PCRE2_EXTENDED;
|
if ((options & PCRE2_EXTENDED_MORE) != 0) options |= PCRE2_EXTENDED;
|
||||||
|
|
||||||
/* Now scan the pattern */
|
/* Now scan the pattern */
|
||||||
|
|
||||||
|
@ -2969,7 +2975,7 @@ while (ptr < ptrend)
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
BOOL char_is_literal = TRUE;
|
BOOL char_is_literal = TRUE;
|
||||||
|
|
||||||
/* Inside \Q...\E everything is literal except \E */
|
/* Inside \Q...\E everything is literal except \E */
|
||||||
|
|
||||||
if (inescq)
|
if (inescq)
|
||||||
|
@ -2982,11 +2988,11 @@ while (ptr < ptrend)
|
||||||
}
|
}
|
||||||
goto CLASS_LITERAL;
|
goto CLASS_LITERAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Skip over space and tab (only) in extended-more mode. */
|
/* Skip over space and tab (only) in extended-more mode. */
|
||||||
|
|
||||||
if ((options & PCRE2_EXTENDED_MORE) != 0 &&
|
if ((options & PCRE2_EXTENDED_MORE) != 0 &&
|
||||||
(c == CHAR_SPACE || c == CHAR_HT))
|
(c == CHAR_SPACE || c == CHAR_HT))
|
||||||
goto CLASS_CONTINUE;
|
goto CLASS_CONTINUE;
|
||||||
|
|
||||||
/* Handle POSIX class names. Perl allows a negation extension of the
|
/* Handle POSIX class names. Perl allows a negation extension of the
|
||||||
|
@ -3448,12 +3454,12 @@ while (ptr < ptrend)
|
||||||
case CHAR_n: *optset |= PCRE2_NO_AUTO_CAPTURE; break;
|
case CHAR_n: *optset |= PCRE2_NO_AUTO_CAPTURE; break;
|
||||||
case CHAR_s: *optset |= PCRE2_DOTALL; break;
|
case CHAR_s: *optset |= PCRE2_DOTALL; break;
|
||||||
case CHAR_U: *optset |= PCRE2_UNGREEDY; break;
|
case CHAR_U: *optset |= PCRE2_UNGREEDY; break;
|
||||||
|
|
||||||
/* If x appears twice it sets the extended extended option. */
|
/* If x appears twice it sets the extended extended option. */
|
||||||
|
|
||||||
case CHAR_x:
|
case CHAR_x:
|
||||||
*optset |= ((*optset & PCRE2_EXTENDED) != 0)?
|
*optset |= ((*optset & PCRE2_EXTENDED) != 0)?
|
||||||
PCRE2_EXTENDED_MORE : PCRE2_EXTENDED;
|
PCRE2_EXTENDED_MORE : PCRE2_EXTENDED;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
@ -3463,10 +3469,10 @@ while (ptr < ptrend)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
options = (options | set) & (~unset);
|
options = (options | set) & (~unset);
|
||||||
|
|
||||||
/* Unsetting extended should also get rid of extended-more. */
|
/* Unsetting extended should also get rid of extended-more. */
|
||||||
|
|
||||||
if ((options & PCRE2_EXTENDED) == 0) options &= ~PCRE2_EXTENDED_MORE;
|
if ((options & PCRE2_EXTENDED) == 0) options &= ~PCRE2_EXTENDED_MORE;
|
||||||
|
|
||||||
/* If the options ended with ')' this is not the start of a nested
|
/* If the options ended with ')' this is not the start of a nested
|
||||||
group with option changes, so the options change at this level.
|
group with option changes, so the options change at this level.
|
||||||
|
@ -4190,18 +4196,18 @@ for (;;)
|
||||||
case OP_CALLOUT_STR:
|
case OP_CALLOUT_STR:
|
||||||
code += GET(code, 1 + 2*LINK_SIZE);
|
code += GET(code, 1 + 2*LINK_SIZE);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OP_SKIPZERO:
|
case OP_SKIPZERO:
|
||||||
code += 2 + GET(code, 2) + LINK_SIZE;
|
code += 2 + GET(code, 2) + LINK_SIZE;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OP_COND:
|
case OP_COND:
|
||||||
case OP_SCOND:
|
case OP_SCOND:
|
||||||
if (code[1+LINK_SIZE] != OP_FALSE || /* Not DEFINE */
|
if (code[1+LINK_SIZE] != OP_FALSE || /* Not DEFINE */
|
||||||
code[GET(code, 1)] != OP_KET) /* More than one branch */
|
code[GET(code, 1)] != OP_KET) /* More than one branch */
|
||||||
return code;
|
return code;
|
||||||
code += GET(code, 1) + 1 + LINK_SIZE;
|
code += GET(code, 1) + 1 + LINK_SIZE;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return code;
|
return code;
|
||||||
|
@ -8150,7 +8156,7 @@ uint32_t nestlevel = 0;
|
||||||
for (;; pptr++)
|
for (;; pptr++)
|
||||||
{
|
{
|
||||||
uint32_t meta = META_CODE(*pptr);
|
uint32_t meta = META_CODE(*pptr);
|
||||||
|
|
||||||
switch(meta)
|
switch(meta)
|
||||||
{
|
{
|
||||||
default: /* Just skip over most items */
|
default: /* Just skip over most items */
|
||||||
|
@ -8265,8 +8271,8 @@ int branchlength;
|
||||||
int grouplength = -1;
|
int grouplength = -1;
|
||||||
|
|
||||||
/* The cache can be used only if there is no possibility of there being two
|
/* The cache can be used only if there is no possibility of there being two
|
||||||
groups with the same number. We do not need to set the end pointer for a group
|
groups with the same number. We do not need to set the end pointer for a group
|
||||||
that is being processed as a back reference or recursion, but we must do so for
|
that is being processed as a back reference or recursion, but we must do so for
|
||||||
an inline group. */
|
an inline group. */
|
||||||
|
|
||||||
if (group > 0 && (cb->external_flags & PCRE2_DUPCAPUSED) == 0)
|
if (group > 0 && (cb->external_flags & PCRE2_DUPCAPUSED) == 0)
|
||||||
|
@ -8438,7 +8444,7 @@ for (;; pptr++)
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* Lookaheads can be ignored, but we must start the skip inside the group
|
/* Lookaheads can be ignored, but we must start the skip inside the group
|
||||||
so that it isn't treated as a group within the branch. */
|
so that it isn't treated as a group within the branch. */
|
||||||
|
|
||||||
case META_LOOKAHEAD:
|
case META_LOOKAHEAD:
|
||||||
|
@ -8464,7 +8470,7 @@ for (;; pptr++)
|
||||||
case META_BACKREF_BYNAME:
|
case META_BACKREF_BYNAME:
|
||||||
if ((cb->external_options & PCRE2_MATCH_UNSET_BACKREF) != 0)
|
if ((cb->external_options & PCRE2_MATCH_UNSET_BACKREF) != 0)
|
||||||
goto ISNOTFIXED;
|
goto ISNOTFIXED;
|
||||||
/* Fall through */
|
/* Fall through */
|
||||||
|
|
||||||
case META_RECURSE_BYNAME:
|
case META_RECURSE_BYNAME:
|
||||||
{
|
{
|
||||||
|
@ -8542,7 +8548,7 @@ for (;; pptr++)
|
||||||
else if (*gptr == (META_CAPTURE | group)) break;
|
else if (*gptr == (META_CAPTURE | group)) break;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* We must start the search for the end of the group at the first meta code
|
/* We must start the search for the end of the group at the first meta code
|
||||||
inside the group. Otherwise it will be treated as an enclosed group. */
|
inside the group. Otherwise it will be treated as an enclosed group. */
|
||||||
|
|
||||||
gptrend = parsed_skip(gptr + 1, PSKIP_KET);
|
gptrend = parsed_skip(gptr + 1, PSKIP_KET);
|
||||||
|
@ -8552,12 +8558,12 @@ for (;; pptr++)
|
||||||
if (r != NULL) goto ISNOTFIXED; /* Mutual recursion */
|
if (r != NULL) goto ISNOTFIXED; /* Mutual recursion */
|
||||||
this_recurse.prev = recurses;
|
this_recurse.prev = recurses;
|
||||||
this_recurse.groupptr = gptr;
|
this_recurse.groupptr = gptr;
|
||||||
|
|
||||||
/* We do not need to know the position of the end of the group, that is,
|
/* We do not need to know the position of the end of the group, that is,
|
||||||
gptr is not used after the call to get_grouplength(). Setting the second
|
gptr is not used after the call to get_grouplength(). Setting the second
|
||||||
argument FALSE stops it scanning for the end when the length can be found
|
argument FALSE stops it scanning for the end when the length can be found
|
||||||
in the cache. */
|
in the cache. */
|
||||||
|
|
||||||
gptr++;
|
gptr++;
|
||||||
grouplength = get_grouplength(&gptr, FALSE, errcodeptr, lcptr, group,
|
grouplength = get_grouplength(&gptr, FALSE, errcodeptr, lcptr, group,
|
||||||
&this_recurse, cb);
|
&this_recurse, cb);
|
||||||
|
@ -8596,7 +8602,7 @@ for (;; pptr++)
|
||||||
case META_NOCAPTURE:
|
case META_NOCAPTURE:
|
||||||
pptr++;
|
pptr++;
|
||||||
CHECK_GROUP:
|
CHECK_GROUP:
|
||||||
grouplength = get_grouplength(&pptr, TRUE, errcodeptr, lcptr, group,
|
grouplength = get_grouplength(&pptr, TRUE, errcodeptr, lcptr, group,
|
||||||
recurses, cb);
|
recurses, cb);
|
||||||
if (grouplength < 0) return -1;
|
if (grouplength < 0) return -1;
|
||||||
itemlength = grouplength;
|
itemlength = grouplength;
|
||||||
|
@ -9053,7 +9059,7 @@ while (patlen - skipatstart >= 2 &&
|
||||||
|
|
||||||
case PSO_LIMM:
|
case PSO_LIMM:
|
||||||
case PSO_LIMD:
|
case PSO_LIMD:
|
||||||
case PSO_LIMH:
|
case PSO_LIMH:
|
||||||
c = 0;
|
c = 0;
|
||||||
pp = skipatstart;
|
pp = skipatstart;
|
||||||
if (!IS_DIGIT(ptr[pp]))
|
if (!IS_DIGIT(ptr[pp]))
|
||||||
|
@ -9100,7 +9106,9 @@ if ((cb.external_options & (PCRE2_UTF|PCRE2_UCP)) != 0)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Check UTF. We have the original options in 'options', with that value as
|
/* Check UTF. We have the original options in 'options', with that value as
|
||||||
modified by (*UTF) etc in cb->external_options. */
|
modified by (*UTF) etc in cb->external_options. The extra option
|
||||||
|
PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not permitted in UTF-16 mode because the
|
||||||
|
surrogate code points cannot be represented in UTF-16. */
|
||||||
|
|
||||||
utf = (cb.external_options & PCRE2_UTF) != 0;
|
utf = (cb.external_options & PCRE2_UTF) != 0;
|
||||||
if (utf)
|
if (utf)
|
||||||
|
@ -9113,6 +9121,14 @@ if (utf)
|
||||||
if ((options & PCRE2_NO_UTF_CHECK) == 0 &&
|
if ((options & PCRE2_NO_UTF_CHECK) == 0 &&
|
||||||
(errorcode = PRIV(valid_utf)(pattern, patlen, erroroffset)) != 0)
|
(errorcode = PRIV(valid_utf)(pattern, patlen, erroroffset)) != 0)
|
||||||
goto HAD_ERROR; /* Offset was set by valid_utf() */
|
goto HAD_ERROR; /* Offset was set by valid_utf() */
|
||||||
|
|
||||||
|
#if PCRE2_CODE_UNIT_WIDTH == 16
|
||||||
|
if ((ccontext->extra_options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) != 0)
|
||||||
|
{
|
||||||
|
errorcode = ERR91;
|
||||||
|
goto HAD_EARLY_ERROR;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Check UCP lockout. */
|
/* Check UCP lockout. */
|
||||||
|
@ -9299,7 +9315,7 @@ possible because nowadays we limit the maximum value of cb.names_found and
|
||||||
cb.name_entry_size. */
|
cb.name_entry_size. */
|
||||||
|
|
||||||
re_blocksize = sizeof(pcre2_real_code) +
|
re_blocksize = sizeof(pcre2_real_code) +
|
||||||
CU2BYTES(length +
|
CU2BYTES(length +
|
||||||
(PCRE2_SIZE)cb.names_found * (PCRE2_SIZE)cb.name_entry_size);
|
(PCRE2_SIZE)cb.names_found * (PCRE2_SIZE)cb.name_entry_size);
|
||||||
re = (pcre2_real_code *)
|
re = (pcre2_real_code *)
|
||||||
ccontext->memctl.malloc(re_blocksize, ccontext->memctl.memory_data);
|
ccontext->memctl.malloc(re_blocksize, ccontext->memctl.memory_data);
|
||||||
|
@ -9308,11 +9324,11 @@ if (re == NULL)
|
||||||
errorcode = ERR21;
|
errorcode = ERR21;
|
||||||
goto HAD_CB_ERROR;
|
goto HAD_CB_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The compiler may put padding at the end of the pcre2_real_code structure in
|
/* The compiler may put padding at the end of the pcre2_real_code structure in
|
||||||
order to round it up to a multiple of 4 or 8 bytes. This means that when a
|
order to round it up to a multiple of 4 or 8 bytes. This means that when a
|
||||||
compiled pattern is copied (for example, when serialized) undefined bytes are
|
compiled pattern is copied (for example, when serialized) undefined bytes are
|
||||||
read, and this annoys debuggers such as valgrind. To avoid this, we explicitly
|
read, and this annoys debuggers such as valgrind. To avoid this, we explicitly
|
||||||
write to the last 8 bytes of the structure before setting the fields. */
|
write to the last 8 bytes of the structure before setting the fields. */
|
||||||
|
|
||||||
memset((char *)re + sizeof(pcre2_real_code) - 8, 0, 8);
|
memset((char *)re + sizeof(pcre2_real_code) - 8, 0, 8);
|
||||||
|
|
|
@ -138,7 +138,8 @@ const pcre2_compile_context PRIV(default_compile_context) = {
|
||||||
PCRE2_UNSET, /* Max pattern length */
|
PCRE2_UNSET, /* Max pattern length */
|
||||||
BSR_DEFAULT, /* Backslash R default */
|
BSR_DEFAULT, /* Backslash R default */
|
||||||
NEWLINE_DEFAULT, /* Newline convention */
|
NEWLINE_DEFAULT, /* Newline convention */
|
||||||
PARENS_NEST_LIMIT }; /* As it says */
|
PARENS_NEST_LIMIT, /* As it says */
|
||||||
|
0 }; /* Extra options */
|
||||||
|
|
||||||
/* The create function copies the default into the new memory, but must
|
/* The create function copies the default into the new memory, but must
|
||||||
override the default memory handling functions if a gcontext was provided. */
|
override the default memory handling functions if a gcontext was provided. */
|
||||||
|
@ -168,7 +169,7 @@ const pcre2_match_context PRIV(default_match_context) = {
|
||||||
NULL,
|
NULL,
|
||||||
NULL,
|
NULL,
|
||||||
PCRE2_UNSET, /* Offset limit */
|
PCRE2_UNSET, /* Offset limit */
|
||||||
HEAP_LIMIT,
|
HEAP_LIMIT,
|
||||||
MATCH_LIMIT,
|
MATCH_LIMIT,
|
||||||
MATCH_LIMIT_DEPTH };
|
MATCH_LIMIT_DEPTH };
|
||||||
|
|
||||||
|
@ -197,7 +198,7 @@ const pcre2_convert_context PRIV(default_convert_context) = {
|
||||||
CHAR_BACKSLASH /* Default path separator */
|
CHAR_BACKSLASH /* Default path separator */
|
||||||
#else /* is OS dependent */
|
#else /* is OS dependent */
|
||||||
CHAR_SLASH /* Not Windows */
|
CHAR_SLASH /* Not Windows */
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
/* The create function copies the default into the new memory, but must
|
/* The create function copies the default into the new memory, but must
|
||||||
|
@ -371,6 +372,13 @@ ccontext->parens_nest_limit = limit;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
pcre2_set_compile_extra_options(pcre2_compile_context *ccontext, uint32_t options)
|
||||||
|
{
|
||||||
|
ccontext->extra_options = options;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
|
pcre2_set_compile_recursion_guard(pcre2_compile_context *ccontext,
|
||||||
int (*guard)(uint32_t, void *), void *user_data)
|
int (*guard)(uint32_t, void *), void *user_data)
|
||||||
|
@ -420,7 +428,7 @@ mcontext->offset_limit = limit;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* This function became obsolete at release 10.30. It is kept as a no-op for
|
/* This function became obsolete at release 10.30. It is kept as a no-op for
|
||||||
backwards compatibility. */
|
backwards compatibility. */
|
||||||
|
|
||||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||||
|
@ -448,3 +456,4 @@ return 0;
|
||||||
|
|
||||||
|
|
||||||
/* End of pcre2_context.c */
|
/* End of pcre2_context.c */
|
||||||
|
|
||||||
|
|
|
@ -176,6 +176,7 @@ static const unsigned char compile_error_texts[] =
|
||||||
"internal error: unknown code in parsed pattern\0"
|
"internal error: unknown code in parsed pattern\0"
|
||||||
/* 90 */
|
/* 90 */
|
||||||
"internal error: bad code value in parsed_skip()\0"
|
"internal error: bad code value in parsed_skip()\0"
|
||||||
|
"PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode\0"
|
||||||
;
|
;
|
||||||
|
|
||||||
/* Match-time and UTF error texts are in the same format. */
|
/* Match-time and UTF error texts are in the same format. */
|
||||||
|
|
|
@ -572,6 +572,7 @@ typedef struct pcre2_real_compile_context {
|
||||||
uint16_t bsr_convention;
|
uint16_t bsr_convention;
|
||||||
uint16_t newline_convention;
|
uint16_t newline_convention;
|
||||||
uint32_t parens_nest_limit;
|
uint32_t parens_nest_limit;
|
||||||
|
uint32_t extra_options;
|
||||||
} pcre2_real_compile_context;
|
} pcre2_real_compile_context;
|
||||||
|
|
||||||
/* The real match context structure. */
|
/* The real match context structure. */
|
||||||
|
|
|
@ -194,6 +194,7 @@ void vms_setsymbol( char *, char *, int );
|
||||||
#define LOCALESIZE 32 /* Size of locale name */
|
#define LOCALESIZE 32 /* Size of locale name */
|
||||||
#define LOOPREPEAT 500000 /* Default loop count for timing */
|
#define LOOPREPEAT 500000 /* Default loop count for timing */
|
||||||
#define MALLOCLISTSIZE 20 /* For remembering mallocs */
|
#define MALLOCLISTSIZE 20 /* For remembering mallocs */
|
||||||
|
#define PARENS_NEST_DEFAULT 220 /* Default parentheses nest limit */
|
||||||
#define PATSTACKSIZE 20 /* Pattern stack for save/restore testing */
|
#define PATSTACKSIZE 20 /* Pattern stack for save/restore testing */
|
||||||
#define REPLACE_MODSIZE 100 /* Field for reading 8-bit replacement */
|
#define REPLACE_MODSIZE 100 /* Field for reading 8-bit replacement */
|
||||||
#define VERSION_SIZE 64 /* Size of buffer for the version strings */
|
#define VERSION_SIZE 64 /* Size of buffer for the version strings */
|
||||||
|
@ -577,6 +578,7 @@ static modstruct modlist[] = {
|
||||||
{ "allaftertext", MOD_PNDP, MOD_CTL, CTL_ALLAFTERTEXT, PO(control) },
|
{ "allaftertext", MOD_PNDP, MOD_CTL, CTL_ALLAFTERTEXT, PO(control) },
|
||||||
{ "allcaptures", MOD_PND, MOD_CTL, CTL_ALLCAPTURES, PO(control) },
|
{ "allcaptures", MOD_PND, MOD_CTL, CTL_ALLCAPTURES, PO(control) },
|
||||||
{ "allow_empty_class", MOD_PAT, MOD_OPT, PCRE2_ALLOW_EMPTY_CLASS, PO(options) },
|
{ "allow_empty_class", MOD_PAT, MOD_OPT, PCRE2_ALLOW_EMPTY_CLASS, PO(options) },
|
||||||
|
{ "allow_surrogate_escapes", MOD_CTC, MOD_OPT, PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES, CO(extra_options) },
|
||||||
{ "allusedtext", MOD_PNDP, MOD_CTL, CTL_ALLUSEDTEXT, PO(control) },
|
{ "allusedtext", MOD_PNDP, MOD_CTL, CTL_ALLUSEDTEXT, PO(control) },
|
||||||
{ "alt_bsux", MOD_PAT, MOD_OPT, PCRE2_ALT_BSUX, PO(options) },
|
{ "alt_bsux", MOD_PAT, MOD_OPT, PCRE2_ALT_BSUX, PO(options) },
|
||||||
{ "alt_circumflex", MOD_PAT, MOD_OPT, PCRE2_ALT_CIRCUMFLEX, PO(options) },
|
{ "alt_circumflex", MOD_PAT, MOD_OPT, PCRE2_ALT_CIRCUMFLEX, PO(options) },
|
||||||
|
@ -685,6 +687,8 @@ static modstruct modlist[] = {
|
||||||
#define POSIX_SUPPORTED_COMPILE_OPTIONS ( \
|
#define POSIX_SUPPORTED_COMPILE_OPTIONS ( \
|
||||||
PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_MULTILINE|PCRE2_UCP|PCRE2_UTF| \
|
PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_MULTILINE|PCRE2_UCP|PCRE2_UTF| \
|
||||||
PCRE2_UNGREEDY)
|
PCRE2_UNGREEDY)
|
||||||
|
|
||||||
|
#define POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS (0)
|
||||||
|
|
||||||
#define POSIX_SUPPORTED_COMPILE_CONTROLS ( \
|
#define POSIX_SUPPORTED_COMPILE_CONTROLS ( \
|
||||||
CTL_AFTERTEXT|CTL_ALLAFTERTEXT|CTL_EXPAND|CTL_POSIX|CTL_POSIX_NOSUB)
|
CTL_AFTERTEXT|CTL_ALLAFTERTEXT|CTL_EXPAND|CTL_POSIX|CTL_POSIX_NOSUB)
|
||||||
|
@ -4025,6 +4029,32 @@ else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*************************************************
|
||||||
|
* Show compile extra options *
|
||||||
|
*************************************************/
|
||||||
|
|
||||||
|
/* Called for unsupported POSIX options.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
options an options word
|
||||||
|
before text to print before
|
||||||
|
after text to print after
|
||||||
|
|
||||||
|
Returns: nothing
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void
|
||||||
|
show_compile_extra_options(uint32_t options, const char *before,
|
||||||
|
const char *after)
|
||||||
|
{
|
||||||
|
if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
|
||||||
|
else fprintf(outfile, "%s%s%s",
|
||||||
|
before,
|
||||||
|
((options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) != 0)? " allow_surrogate_escapes" : "",
|
||||||
|
after);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef SUPPORT_PCRE2_8
|
#ifdef SUPPORT_PCRE2_8
|
||||||
/*************************************************
|
/*************************************************
|
||||||
|
@ -5161,6 +5191,16 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
|
||||||
pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS, msg, "");
|
pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS, msg, "");
|
||||||
msg = "";
|
msg = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((FLD(pat_context, extra_options) &
|
||||||
|
~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS) != 0)
|
||||||
|
{
|
||||||
|
show_compile_extra_options(
|
||||||
|
FLD(pat_context, extra_options) & ~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS,
|
||||||
|
msg, "");
|
||||||
|
msg = "";
|
||||||
|
}
|
||||||
|
|
||||||
if ((pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS) != 0 ||
|
if ((pat_patctl.control & ~POSIX_SUPPORTED_COMPILE_CONTROLS) != 0 ||
|
||||||
(pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2) != 0)
|
(pat_patctl.control2 & ~POSIX_SUPPORTED_COMPILE_CONTROLS2) != 0)
|
||||||
{
|
{
|
||||||
|
@ -5170,7 +5210,11 @@ if ((pat_patctl.control & CTL_POSIX) != 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (local_newline_default != 0) prmsg(&msg, "#newline_default");
|
if (local_newline_default != 0) prmsg(&msg, "#newline_default");
|
||||||
|
if (FLD(pat_context, max_pattern_length) != PCRE2_UNSET)
|
||||||
|
prmsg(&msg, "max_pattern_length");
|
||||||
|
if (FLD(pat_context, parens_nest_limit) != PARENS_NEST_DEFAULT)
|
||||||
|
prmsg(&msg, "parens_nest_limit");
|
||||||
|
|
||||||
if (msg[0] == 0) fprintf(outfile, "\n");
|
if (msg[0] == 0) fprintf(outfile, "\n");
|
||||||
|
|
||||||
/* Translate PCRE2 options to POSIX options and then compile. */
|
/* Translate PCRE2 options to POSIX options and then compile. */
|
||||||
|
@ -8123,6 +8167,7 @@ max_oveccount = DEFAULT_OVECCOUNT;
|
||||||
G(match_data,BITS) = G(pcre2_match_data_create_,BITS)(max_oveccount, G(general_context,BITS))
|
G(match_data,BITS) = G(pcre2_match_data_create_,BITS)(max_oveccount, G(general_context,BITS))
|
||||||
|
|
||||||
#define CONTEXTTESTS \
|
#define CONTEXTTESTS \
|
||||||
|
(void)G(pcre2_set_compile_extra_options_,BITS)(G(pat_context,BITS), 0); \
|
||||||
(void)G(pcre2_set_max_pattern_length_,BITS)(G(pat_context,BITS), 0); \
|
(void)G(pcre2_set_max_pattern_length_,BITS)(G(pat_context,BITS), 0); \
|
||||||
(void)G(pcre2_set_offset_limit_,BITS)(G(dat_context,BITS), 0); \
|
(void)G(pcre2_set_offset_limit_,BITS)(G(dat_context,BITS), 0); \
|
||||||
(void)G(pcre2_set_recursion_memory_management_,BITS)(G(dat_context,BITS), my_malloc, my_free, NULL)
|
(void)G(pcre2_set_recursion_memory_management_,BITS)(G(dat_context,BITS), my_malloc, my_free, NULL)
|
||||||
|
@ -8163,7 +8208,7 @@ if (test_mode == PCRE32_MODE)
|
||||||
/* Set a default parentheses nest limit that is large enough to run the
|
/* Set a default parentheses nest limit that is large enough to run the
|
||||||
standard tests (this also exercises the function). */
|
standard tests (this also exercises the function). */
|
||||||
|
|
||||||
PCRE2_SET_PARENS_NEST_LIMIT(default_pat_context, 220);
|
PCRE2_SET_PARENS_NEST_LIMIT(default_pat_context, PARENS_NEST_DEFAULT);
|
||||||
|
|
||||||
/* Handle command line modifier settings, sending any error messages to
|
/* Handle command line modifier settings, sending any error messages to
|
||||||
stderr. We need to know the mode before modifying the context, and it is tidier
|
stderr. We need to know the mode before modifying the context, and it is tidier
|
||||||
|
|
|
@ -458,4 +458,13 @@
|
||||||
|
|
||||||
/[\s[:^ascii:]]/B,ucp
|
/[\s[:^ascii:]]/B,ucp
|
||||||
|
|
||||||
|
# A special extra option allows excaped surrogate code points in 8-bit mode,
|
||||||
|
# but subjects containing them must not be UTF-checked.
|
||||||
|
|
||||||
|
/\x{d800}/utf,allow_surrogate_escapes
|
||||||
|
\x{d800}\=no_utf_check
|
||||||
|
|
||||||
|
/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes
|
||||||
|
\x{dfff}\x{df01}\=no_utf_check
|
||||||
|
|
||||||
# End of testinput10
|
# End of testinput10
|
||||||
|
|
|
@ -363,4 +363,14 @@
|
||||||
/\pP/ucp
|
/\pP/ucp
|
||||||
\x{7fffffff}
|
\x{7fffffff}
|
||||||
|
|
||||||
|
# A special extra option allows excaped surrogate code points in 32-bit mode,
|
||||||
|
# but subjects containing them must not be UTF-checked. These patterns give
|
||||||
|
# errors in 16-bit mode.
|
||||||
|
|
||||||
|
/\x{d800}/utf,allow_surrogate_escapes
|
||||||
|
\x{d800}\=no_utf_check
|
||||||
|
|
||||||
|
/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes
|
||||||
|
\x{dfff}\x{df01}\=no_utf_check
|
||||||
|
|
||||||
# End of testinput12
|
# End of testinput12
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
#forbid_utf
|
#forbid_utf
|
||||||
#pattern posix
|
#pattern posix
|
||||||
|
|
||||||
# Test invalid options
|
# Test some invalid options
|
||||||
|
|
||||||
/abc/auto_callout
|
/abc/auto_callout
|
||||||
|
|
||||||
|
@ -14,6 +14,10 @@
|
||||||
|
|
||||||
/abc/
|
/abc/
|
||||||
abc\=partial_hard
|
abc\=partial_hard
|
||||||
|
|
||||||
|
/a(())bc/parens_nest_limit=1
|
||||||
|
|
||||||
|
/abc/allow_surrogate_escapes,max_pattern_length=2
|
||||||
|
|
||||||
# Real tests
|
# Real tests
|
||||||
|
|
||||||
|
|
|
@ -1575,4 +1575,15 @@ Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP),
|
||||||
End
|
End
|
||||||
------------------------------------------------------------------
|
------------------------------------------------------------------
|
||||||
|
|
||||||
|
# A special extra option allows excaped surrogate code points in 8-bit mode,
|
||||||
|
# but subjects containing them must not be UTF-checked.
|
||||||
|
|
||||||
|
/\x{d800}/utf,allow_surrogate_escapes
|
||||||
|
\x{d800}\=no_utf_check
|
||||||
|
0: \x{d800}
|
||||||
|
|
||||||
|
/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes
|
||||||
|
\x{dfff}\x{df01}\=no_utf_check
|
||||||
|
0: \x{dfff}\x{df01}
|
||||||
|
|
||||||
# End of testinput10
|
# End of testinput10
|
||||||
|
|
|
@ -1421,4 +1421,16 @@ No match
|
||||||
** Truncation will probably give the wrong result.
|
** Truncation will probably give the wrong result.
|
||||||
No match
|
No match
|
||||||
|
|
||||||
|
# A special extra option allows excaped surrogate code points in 32-bit mode,
|
||||||
|
# but subjects containing them must not be UTF-checked. These patterns give
|
||||||
|
# errors in 16-bit mode.
|
||||||
|
|
||||||
|
/\x{d800}/utf,allow_surrogate_escapes
|
||||||
|
Failed: error 191 at offset 0: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode
|
||||||
|
\x{d800}\=no_utf_check
|
||||||
|
|
||||||
|
/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes
|
||||||
|
Failed: error 191 at offset 0: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode
|
||||||
|
\x{dfff}\x{df01}\=no_utf_check
|
||||||
|
|
||||||
# End of testinput12
|
# End of testinput12
|
||||||
|
|
|
@ -1413,4 +1413,16 @@ No match
|
||||||
\x{7fffffff}
|
\x{7fffffff}
|
||||||
No match
|
No match
|
||||||
|
|
||||||
|
# A special extra option allows excaped surrogate code points in 32-bit mode,
|
||||||
|
# but subjects containing them must not be UTF-checked. These patterns give
|
||||||
|
# errors in 16-bit mode.
|
||||||
|
|
||||||
|
/\x{d800}/utf,allow_surrogate_escapes
|
||||||
|
\x{d800}\=no_utf_check
|
||||||
|
0: \x{d800}
|
||||||
|
|
||||||
|
/\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes
|
||||||
|
\x{dfff}\x{df01}\=no_utf_check
|
||||||
|
0: \x{dfff}\x{df01}
|
||||||
|
|
||||||
# End of testinput12
|
# End of testinput12
|
||||||
|
|
|
@ -15970,7 +15970,6 @@ Error -2: partial match
|
||||||
Error -1: no match
|
Error -1: no match
|
||||||
Error 0: PCRE2_ERROR_BADDATA (unknown error number)
|
Error 0: PCRE2_ERROR_BADDATA (unknown error number)
|
||||||
Error 100: no error
|
Error 100: no error
|
||||||
Error 188: pattern string is longer than the limit set by the application
|
Error 101: \ at end of pattern
|
||||||
Error 189: internal error: unknown code in parsed pattern
|
Error 191: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode
|
||||||
Error 190: internal error: bad code value in parsed_skip()
|
Error 192: PCRE2_ERROR_BADDATA (unknown error number)
|
||||||
Error 191: PCRE2_ERROR_BADDATA (unknown error number)
|
|
||||||
|
|
Loading…
Reference in New Issue