Define names for compiler error codes and use them in pcre2_convert.c
This commit is contained in:
parent
5e3a1da503
commit
897d547046
|
@ -19,6 +19,9 @@ PCRE2_CONFIG_COMPILED_WIDTHS.
|
|||
5. Cut out \C tests in the JIT regression tests when NEVER_BACKSLASH_C is
|
||||
defined (e.g. by --enable-never-backslash-C).
|
||||
|
||||
6. Defined public names for all the pcre2_compile() error numbers, and used
|
||||
the public names in pcre2_convert.c.
|
||||
|
||||
|
||||
Version 10.30 14-August-2017
|
||||
----------------------------
|
||||
|
|
|
@ -45,7 +45,7 @@ can skip ahead to the CMake section.
|
|||
macro settings that it contains to whatever is appropriate for your
|
||||
environment. In particular, you can alter the definition of the NEWLINE
|
||||
macro to specify what character(s) you want to be interpreted as line
|
||||
terminators.
|
||||
terminators by default.
|
||||
|
||||
When you compile any of the PCRE2 modules, you must specify
|
||||
-DHAVE_CONFIG_H to your compiler so that src/config.h is included in the
|
||||
|
@ -91,8 +91,10 @@ can skip ahead to the CMake section.
|
|||
pcre2_compile.c
|
||||
pcre2_config.c
|
||||
pcre2_context.c
|
||||
pcre2_convert.c
|
||||
pcre2_dfa_match.c
|
||||
pcre2_error.c
|
||||
pcre2_extuni.c
|
||||
pcre2_find_bracket.c
|
||||
pcre2_jit_compile.c
|
||||
pcre2_maketables.c
|
||||
|
@ -119,10 +121,14 @@ can skip ahead to the CMake section.
|
|||
Note that you must compile pcre2_jit_compile.c, even if you have not
|
||||
defined SUPPORT_JIT in src/config.h, because when JIT support is not
|
||||
configured, dummy functions are compiled. When JIT support IS configured,
|
||||
pcre2_compile.c #includes other files from the sljit subdirectory, where
|
||||
there should be 16 files, all of whose names begin with "sljit". It also
|
||||
#includes src/pcre2_jit_match.c and src/pcre2_jit_misc.c, so you should
|
||||
not compile these yourself.
|
||||
pcre2_jit_compile.c #includes other files from the sljit subdirectory,
|
||||
all of whose names begin with "sljit". It also #includes
|
||||
src/pcre2_jit_match.c and src/pcre2_jit_misc.c, so you should not compile
|
||||
these yourself.
|
||||
|
||||
Not also that the pcre2_fuzzsupport.c file contains special code that is
|
||||
useful to those who want to run fuzzing tests on the PCRE2 library. Unless
|
||||
you are doing that, you can ignore it.
|
||||
|
||||
(5) Now link all the compiled code into an object library in whichever form
|
||||
your system keeps such libraries. This is the basic PCRE2 C 8-bit library.
|
||||
|
@ -363,18 +369,19 @@ BUILDING PCRE2 ON NATIVE Z/OS AND Z/VM
|
|||
z/OS and z/VM are operating systems for mainframe computers, produced by IBM.
|
||||
The character code used is EBCDIC, not ASCII or Unicode. In z/OS, UNIX APIs and
|
||||
applications can be supported through UNIX System Services, and in such an
|
||||
environment PCRE2 can be built in the same way as in other systems. However, in
|
||||
native z/OS (without UNIX System Services) and in z/VM, special ports are
|
||||
required. For details, please see this web site:
|
||||
environment it should be possible to build PCRE2 in the same way as in other
|
||||
systems, with the EBCDIC related configuration settings, but it is not known if
|
||||
anybody has tried this.
|
||||
|
||||
http://www.zaconsultants.net
|
||||
In native z/OS (without UNIX System Services) and in z/VM, special ports are
|
||||
required. For details, please see file 939 on this web site:
|
||||
|
||||
The site currently has ports for PCRE1 releases, but PCRE2 should follow in due
|
||||
course.
|
||||
http://www.cbttape.org
|
||||
|
||||
You may also download PCRE1 from WWW.CBTTAPE.ORG, file 882. Everything, source
|
||||
and executable, is in EBCDIC and native z/OS file formats and this is the
|
||||
recommended download site.
|
||||
Everything in that location, source and executable, is in EBCDIC and native
|
||||
z/OS file formats. The port provides an API for LE languages such as COBOL and
|
||||
for the z/OS and z/VM versions of the Rexx languages.
|
||||
|
||||
=============================
|
||||
Last Updated: 17 March 2017
|
||||
===============================
|
||||
Last Updated: 13 September 2017
|
||||
===============================
|
||||
|
|
|
@ -773,6 +773,7 @@ The distribution should contain the files listed below.
|
|||
src/pcre2_convert.c )
|
||||
src/pcre2_dfa_match.c )
|
||||
src/pcre2_error.c )
|
||||
src/pcre2_extuni.c )
|
||||
src/pcre2_find_bracket.c )
|
||||
src/pcre2_jit_compile.c )
|
||||
src/pcre2_jit_match.c ) sources for the functions in the library,
|
||||
|
@ -882,4 +883,4 @@ The distribution should contain the files listed below.
|
|||
Philip Hazel
|
||||
Email local part: ph10
|
||||
Email domain: cam.ac.uk
|
||||
Last updated: 18 July 2017
|
||||
Last updated: 12 September 2017
|
||||
|
|
|
@ -45,12 +45,14 @@ point to a uint32_t integer variable. The available codes are:
|
|||
PCRE2_CONFIG_BSR Indicates what \R matches by default:
|
||||
PCRE2_BSR_UNICODE
|
||||
PCRE2_BSR_ANYCRLF
|
||||
PCRE2_CONFIG_HEAPLIMIT Default heap memory limit
|
||||
PCRE2_CONFIG_COMPILED_WIDTHS Which of 8/16/32 support was compiled
|
||||
PCRE2_CONFIG_DEPTHLIMIT Default backtracking depth limit
|
||||
PCRE2_CONFIG_HEAPLIMIT Default heap memory limit
|
||||
PCRE2_CONFIG_JIT Availability of just-in-time compiler support (1=yes 0=no)
|
||||
PCRE2_CONFIG_JITTARGET Information (a string) about the target architecture for the JIT compiler
|
||||
PCRE2_CONFIG_LINKSIZE Configured internal link size (2, 3, 4)
|
||||
PCRE2_CONFIG_MATCHLIMIT Default internal resource limit
|
||||
PCRE2_CONFIG_NEVER_BACKSLASH_C Whether or not \C is disabled
|
||||
PCRE2_CONFIG_NEWLINE Code for the default newline sequence:
|
||||
PCRE2_NEWLINE_CR
|
||||
PCRE2_NEWLINE_LF
|
||||
|
|
|
@ -33,29 +33,28 @@ please consult the man page, in case the conversion went wrong.
|
|||
<li><a name="TOC18" href="#SEC18">PCRE2 CONTEXTS</a>
|
||||
<li><a name="TOC19" href="#SEC19">CHECKING BUILD-TIME OPTIONS</a>
|
||||
<li><a name="TOC20" href="#SEC20">COMPILING A PATTERN</a>
|
||||
<li><a name="TOC21" href="#SEC21">COMPILATION ERROR CODES</a>
|
||||
<li><a name="TOC22" href="#SEC22">JUST-IN-TIME (JIT) COMPILATION</a>
|
||||
<li><a name="TOC23" href="#SEC23">LOCALE SUPPORT</a>
|
||||
<li><a name="TOC24" href="#SEC24">INFORMATION ABOUT A COMPILED PATTERN</a>
|
||||
<li><a name="TOC25" href="#SEC25">INFORMATION ABOUT A PATTERN'S CALLOUTS</a>
|
||||
<li><a name="TOC26" href="#SEC26">SERIALIZATION AND PRECOMPILING</a>
|
||||
<li><a name="TOC27" href="#SEC27">THE MATCH DATA BLOCK</a>
|
||||
<li><a name="TOC28" href="#SEC28">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a>
|
||||
<li><a name="TOC29" href="#SEC29">NEWLINE HANDLING WHEN MATCHING</a>
|
||||
<li><a name="TOC30" href="#SEC30">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a>
|
||||
<li><a name="TOC31" href="#SEC31">OTHER INFORMATION ABOUT A MATCH</a>
|
||||
<li><a name="TOC32" href="#SEC32">ERROR RETURNS FROM <b>pcre2_match()</b></a>
|
||||
<li><a name="TOC33" href="#SEC33">OBTAINING A TEXTUAL ERROR MESSAGE</a>
|
||||
<li><a name="TOC34" href="#SEC34">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a>
|
||||
<li><a name="TOC35" href="#SEC35">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a>
|
||||
<li><a name="TOC36" href="#SEC36">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a>
|
||||
<li><a name="TOC37" href="#SEC37">CREATING A NEW STRING WITH SUBSTITUTIONS</a>
|
||||
<li><a name="TOC38" href="#SEC38">DUPLICATE SUBPATTERN NAMES</a>
|
||||
<li><a name="TOC39" href="#SEC39">FINDING ALL POSSIBLE MATCHES AT ONE POSITION</a>
|
||||
<li><a name="TOC40" href="#SEC40">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a>
|
||||
<li><a name="TOC41" href="#SEC41">SEE ALSO</a>
|
||||
<li><a name="TOC42" href="#SEC42">AUTHOR</a>
|
||||
<li><a name="TOC43" href="#SEC43">REVISION</a>
|
||||
<li><a name="TOC21" href="#SEC21">JUST-IN-TIME (JIT) COMPILATION</a>
|
||||
<li><a name="TOC22" href="#SEC22">LOCALE SUPPORT</a>
|
||||
<li><a name="TOC23" href="#SEC23">INFORMATION ABOUT A COMPILED PATTERN</a>
|
||||
<li><a name="TOC24" href="#SEC24">INFORMATION ABOUT A PATTERN'S CALLOUTS</a>
|
||||
<li><a name="TOC25" href="#SEC25">SERIALIZATION AND PRECOMPILING</a>
|
||||
<li><a name="TOC26" href="#SEC26">THE MATCH DATA BLOCK</a>
|
||||
<li><a name="TOC27" href="#SEC27">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a>
|
||||
<li><a name="TOC28" href="#SEC28">NEWLINE HANDLING WHEN MATCHING</a>
|
||||
<li><a name="TOC29" href="#SEC29">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a>
|
||||
<li><a name="TOC30" href="#SEC30">OTHER INFORMATION ABOUT A MATCH</a>
|
||||
<li><a name="TOC31" href="#SEC31">ERROR RETURNS FROM <b>pcre2_match()</b></a>
|
||||
<li><a name="TOC32" href="#SEC32">OBTAINING A TEXTUAL ERROR MESSAGE</a>
|
||||
<li><a name="TOC33" href="#SEC33">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a>
|
||||
<li><a name="TOC34" href="#SEC34">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a>
|
||||
<li><a name="TOC35" href="#SEC35">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a>
|
||||
<li><a name="TOC36" href="#SEC36">CREATING A NEW STRING WITH SUBSTITUTIONS</a>
|
||||
<li><a name="TOC37" href="#SEC37">DUPLICATE SUBPATTERN NAMES</a>
|
||||
<li><a name="TOC38" href="#SEC38">FINDING ALL POSSIBLE MATCHES AT ONE POSITION</a>
|
||||
<li><a name="TOC39" href="#SEC39">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a>
|
||||
<li><a name="TOC40" href="#SEC40">SEE ALSO</a>
|
||||
<li><a name="TOC41" href="#SEC41">AUTHOR</a>
|
||||
<li><a name="TOC42" href="#SEC42">REVISION</a>
|
||||
</ul>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
|
@ -1082,6 +1081,12 @@ sequences the \R escape sequence matches by default. A value of
|
|||
PCRE2_BSR_UNICODE means that \R matches any Unicode line ending sequence; a
|
||||
value of PCRE2_BSR_ANYCRLF means that \R matches only CR, LF, or CRLF. The
|
||||
default can be overridden when a pattern is compiled.
|
||||
<pre>
|
||||
PCRE2_CONFIG_COMPILED_WIDTHS
|
||||
</pre>
|
||||
The output is a uint32_t integer whose lower bits indicate which code unit
|
||||
widths were selected when PCRE2 was built. The 1-bit indicates 8-bit support,
|
||||
and the 2-bit and 4-bit indicate 16-bit and 32-bit support, respectively.
|
||||
<pre>
|
||||
PCRE2_CONFIG_DEPTHLIMIT
|
||||
</pre>
|
||||
|
@ -1148,6 +1153,11 @@ sequence that is recognized as meaning "newline". The values are:
|
|||
</pre>
|
||||
The default should normally correspond to the standard sequence for your
|
||||
operating system.
|
||||
<pre>
|
||||
PCRE2_CONFIG_NEVER_BACKSLASH_C
|
||||
</pre>
|
||||
The output is a uint32_t integer that is set to one if the use of \C was
|
||||
permanently disabled when PCRE2 was built; otherwise it is set to zero.
|
||||
<pre>
|
||||
PCRE2_CONFIG_PARENSLIMIT
|
||||
</pre>
|
||||
|
@ -1204,8 +1214,8 @@ zero.
|
|||
</P>
|
||||
<P>
|
||||
The <b>pcre2_compile()</b> function compiles a pattern into an internal form.
|
||||
The pattern is defined by a pointer to a string of code units and a length. If
|
||||
the pattern is zero-terminated, the length can be specified as
|
||||
The pattern is defined by a pointer to a string of code units and a length (in
|
||||
code units). If the pattern is zero-terminated, the length can be specified as
|
||||
PCRE2_ZERO_TERMINATED. The function returns a pointer to a block of memory that
|
||||
contains the compiled pattern and related data, or NULL if an error occurred.
|
||||
</P>
|
||||
|
@ -1278,20 +1288,26 @@ error has occurred. The values are not defined when compilation is successful
|
|||
and <b>pcre2_compile()</b> returns a non-NULL value.
|
||||
</P>
|
||||
<P>
|
||||
There are nearly 100 positive error codes that <b>pcre2_compile()</b> may return
|
||||
if it finds an error in the pattern. There are also some negative error codes
|
||||
that are used for invalid UTF strings. These are the same as given by
|
||||
<b>pcre2_match()</b> and <b>pcre2_dfa_match()</b>, and are described in the
|
||||
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
|
||||
page. There is no separate documentation for the positive error codes, because
|
||||
the textual error messages that are obtained by calling the
|
||||
<b>pcre2_get_error_message()</b> function (see "Obtaining a textual error
|
||||
message"
|
||||
<a href="#geterrormessage">below)</a>
|
||||
should be self-explanatory. Macro names starting with PCRE2_ERROR_ are defined
|
||||
for both positive and negative error codes in <b>pcre2.h</b>.
|
||||
</P>
|
||||
<P>
|
||||
The value returned in <i>erroroffset</i> is an indication of where in the
|
||||
pattern the error occurred. It is not necessarily the furthest point in the
|
||||
pattern that was read. For example, after the error "lookbehind assertion is
|
||||
not fixed length", the error offset points to the start of the failing
|
||||
assertion.
|
||||
</P>
|
||||
<P>
|
||||
The <b>pcre2_get_error_message()</b> function (see "Obtaining a textual error
|
||||
message"
|
||||
<a href="#geterrormessage">below)</a>
|
||||
provides a textual message for each error code. Compilation errors have
|
||||
positive error codes; UTF formatting error codes are negative. For an invalid
|
||||
UTF-8 or UTF-16 string, the offset is that of the first code unit of the
|
||||
failing character.
|
||||
assertion. For an invalid UTF-8 or UTF-16 string, the offset is that of the
|
||||
first code unit of the failing character.
|
||||
</P>
|
||||
<P>
|
||||
Some errors are not detected until the whole pattern has been scanned; in these
|
||||
|
@ -1792,21 +1808,8 @@ and the end. This is achieved by automatically inserting the code for "\b(?:"
|
|||
at the start of the compiled pattern and ")\b" at the end. The option may be
|
||||
used with PCRE2_LITERAL. However, it is ignored if PCRE2_EXTRA_MATCH_LINE is
|
||||
also set.
|
||||
</P>
|
||||
<br><a name="SEC21" href="#TOC1">COMPILATION ERROR CODES</a><br>
|
||||
<P>
|
||||
There are nearly 100 positive error codes that <b>pcre2_compile()</b> may return
|
||||
(via <i>errorcode</i>) if it finds an error in the pattern. There are also some
|
||||
negative error codes that are used for invalid UTF strings. These are the same
|
||||
as given by <b>pcre2_match()</b> and <b>pcre2_dfa_match()</b>, and are described
|
||||
in the
|
||||
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
|
||||
page. The <b>pcre2_get_error_message()</b> function (see "Obtaining a textual
|
||||
error message"
|
||||
<a href="#geterrormessage">below)</a>
|
||||
can be called to obtain a textual error message from any error code.
|
||||
<a name="jitcompiling"></a></P>
|
||||
<br><a name="SEC22" href="#TOC1">JUST-IN-TIME (JIT) COMPILATION</a><br>
|
||||
<br><a name="SEC21" href="#TOC1">JUST-IN-TIME (JIT) COMPILATION</a><br>
|
||||
<P>
|
||||
<b>int pcre2_jit_compile(pcre2_code *<i>code</i>, uint32_t <i>options</i>);</b>
|
||||
<br>
|
||||
|
@ -1844,7 +1847,7 @@ patterns to be analyzed, and for one-off matches and simple patterns the
|
|||
benefit of faster execution might be offset by a much slower compilation time.
|
||||
Most (but not all) patterns can be optimized by the JIT compiler.
|
||||
<a name="localesupport"></a></P>
|
||||
<br><a name="SEC23" href="#TOC1">LOCALE SUPPORT</a><br>
|
||||
<br><a name="SEC22" href="#TOC1">LOCALE SUPPORT</a><br>
|
||||
<P>
|
||||
PCRE2 handles caseless matching, and determines whether characters are letters,
|
||||
digits, or whatever, by reference to a set of tables, indexed by character code
|
||||
|
@ -1900,7 +1903,7 @@ is saved with the compiled pattern, and the same tables are used by
|
|||
compilation and matching both happen in the same locale, but different patterns
|
||||
can be processed in different locales.
|
||||
<a name="infoaboutpattern"></a></P>
|
||||
<br><a name="SEC24" href="#TOC1">INFORMATION ABOUT A COMPILED PATTERN</a><br>
|
||||
<br><a name="SEC23" href="#TOC1">INFORMATION ABOUT A COMPILED PATTERN</a><br>
|
||||
<P>
|
||||
<b>int pcre2_pattern_info(const pcre2 *<i>code</i>, uint32_t <i>what</i>, void *<i>where</i>);</b>
|
||||
</P>
|
||||
|
@ -2225,7 +2228,7 @@ value returned by this option, because there are cases where the code that
|
|||
calculates the size has to over-estimate. Processing a pattern with the JIT
|
||||
compiler does not alter the value returned by this option.
|
||||
<a name="infoaboutcallouts"></a></P>
|
||||
<br><a name="SEC25" href="#TOC1">INFORMATION ABOUT A PATTERN'S CALLOUTS</a><br>
|
||||
<br><a name="SEC24" href="#TOC1">INFORMATION ABOUT A PATTERN'S CALLOUTS</a><br>
|
||||
<P>
|
||||
<b>int pcre2_callout_enumerate(const pcre2_code *<i>code</i>,</b>
|
||||
<b> int (*<i>callback</i>)(pcre2_callout_enumerate_block *, void *),</b>
|
||||
|
@ -2244,7 +2247,7 @@ contents of the callout enumeration block are described in the
|
|||
<a href="pcre2callout.html"><b>pcre2callout</b></a>
|
||||
documentation, which also gives further details about callouts.
|
||||
</P>
|
||||
<br><a name="SEC26" href="#TOC1">SERIALIZATION AND PRECOMPILING</a><br>
|
||||
<br><a name="SEC25" href="#TOC1">SERIALIZATION AND PRECOMPILING</a><br>
|
||||
<P>
|
||||
It is possible to save compiled patterns on disc or elsewhere, and reload them
|
||||
later, subject to a number of restrictions. The functions whose names begin
|
||||
|
@ -2253,7 +2256,7 @@ the
|
|||
<a href="pcre2serialize.html"><b>pcre2serialize</b></a>
|
||||
documentation.
|
||||
<a name="matchdatablock"></a></P>
|
||||
<br><a name="SEC27" href="#TOC1">THE MATCH DATA BLOCK</a><br>
|
||||
<br><a name="SEC26" href="#TOC1">THE MATCH DATA BLOCK</a><br>
|
||||
<P>
|
||||
<b>pcre2_match_data *pcre2_match_data_create(uint32_t <i>ovecsize</i>,</b>
|
||||
<b> pcre2_general_context *<i>gcontext</i>);</b>
|
||||
|
@ -2324,7 +2327,7 @@ match data block (for that match) have taken place.
|
|||
When a match data block itself is no longer needed, it should be freed by
|
||||
calling <b>pcre2_match_data_free()</b>.
|
||||
</P>
|
||||
<br><a name="SEC28" href="#TOC1">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a><br>
|
||||
<br><a name="SEC27" href="#TOC1">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a><br>
|
||||
<P>
|
||||
<b>int pcre2_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
||||
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
||||
|
@ -2562,7 +2565,7 @@ examples, in the
|
|||
<a href="pcre2partial.html"><b>pcre2partial</b></a>
|
||||
documentation.
|
||||
</P>
|
||||
<br><a name="SEC29" href="#TOC1">NEWLINE HANDLING WHEN MATCHING</a><br>
|
||||
<br><a name="SEC28" href="#TOC1">NEWLINE HANDLING WHEN MATCHING</a><br>
|
||||
<P>
|
||||
When PCRE2 is built, a default newline convention is set; this is usually the
|
||||
standard convention for the operating system. The default can be overridden in
|
||||
|
@ -2602,7 +2605,7 @@ does \s, even though it includes CR and LF in the characters that it matches.
|
|||
Notwithstanding the above, anomalous effects may still occur when CRLF is a
|
||||
valid newline sequence and explicit \r or \n escapes appear in the pattern.
|
||||
<a name="matchedstrings"></a></P>
|
||||
<br><a name="SEC30" href="#TOC1">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a><br>
|
||||
<br><a name="SEC29" href="#TOC1">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a><br>
|
||||
<P>
|
||||
<b>uint32_t pcre2_get_ovector_count(pcre2_match_data *<i>match_data</i>);</b>
|
||||
<br>
|
||||
|
@ -2701,7 +2704,7 @@ parentheses, no more than <i>ovector[0]</i> to <i>ovector[2n+1]</i> are set by
|
|||
<b>pcre2_match()</b>. The other elements retain whatever values they previously
|
||||
had.
|
||||
<a name="matchotherdata"></a></P>
|
||||
<br><a name="SEC31" href="#TOC1">OTHER INFORMATION ABOUT A MATCH</a><br>
|
||||
<br><a name="SEC30" href="#TOC1">OTHER INFORMATION ABOUT A MATCH</a><br>
|
||||
<P>
|
||||
<b>PCRE2_SPTR pcre2_get_mark(pcre2_match_data *<i>match_data</i>);</b>
|
||||
<br>
|
||||
|
@ -2751,7 +2754,7 @@ the code unit offset of the invalid UTF character. Details are given in the
|
|||
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
|
||||
page.
|
||||
<a name="errorlist"></a></P>
|
||||
<br><a name="SEC32" href="#TOC1">ERROR RETURNS FROM <b>pcre2_match()</b></a><br>
|
||||
<br><a name="SEC31" href="#TOC1">ERROR RETURNS FROM <b>pcre2_match()</b></a><br>
|
||||
<P>
|
||||
If <b>pcre2_match()</b> fails, it returns a negative number. This can be
|
||||
converted to a text string by calling the <b>pcre2_get_error_message()</b>
|
||||
|
@ -2857,7 +2860,7 @@ faulted at compile time, but more complicated cases, in particular mutual
|
|||
recursions between two different subpatterns, cannot be detected until matching
|
||||
is attempted.
|
||||
<a name="geterrormessage"></a></P>
|
||||
<br><a name="SEC33" href="#TOC1">OBTAINING A TEXTUAL ERROR MESSAGE</a><br>
|
||||
<br><a name="SEC32" href="#TOC1">OBTAINING A TEXTUAL ERROR MESSAGE</a><br>
|
||||
<P>
|
||||
<b>int pcre2_get_error_message(int <i>errorcode</i>, PCRE2_UCHAR *<i>buffer</i>,</b>
|
||||
<b> PCRE2_SIZE <i>bufflen</i>);</b>
|
||||
|
@ -2878,7 +2881,7 @@ returned. If the buffer is too small, the message is truncated (but still with
|
|||
a trailing zero), and the negative error code PCRE2_ERROR_NOMEMORY is returned.
|
||||
None of the messages are very long; a buffer size of 120 code units is ample.
|
||||
<a name="extractbynumber"></a></P>
|
||||
<br><a name="SEC34" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a><br>
|
||||
<br><a name="SEC33" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a><br>
|
||||
<P>
|
||||
<b>int pcre2_substring_length_bynumber(pcre2_match_data *<i>match_data</i>,</b>
|
||||
<b> uint32_t <i>number</i>, PCRE2_SIZE *<i>length</i>);</b>
|
||||
|
@ -2975,7 +2978,7 @@ The substring did not participate in the match. For example, if the pattern is
|
|||
(abc)|(def) and the subject is "def", and the ovector contains at least two
|
||||
capturing slots, substring number 1 is unset.
|
||||
</P>
|
||||
<br><a name="SEC35" href="#TOC1">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a><br>
|
||||
<br><a name="SEC34" href="#TOC1">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a><br>
|
||||
<P>
|
||||
<b>int pcre2_substring_list_get(pcre2_match_data *<i>match_data</i>,</b>
|
||||
<b>" PCRE2_UCHAR ***<i>listptr</i>, PCRE2_SIZE **<i>lengthsptr</i>);</b>
|
||||
|
@ -3014,7 +3017,7 @@ can be distinguished from a genuine zero-length substring by inspecting the
|
|||
appropriate offset in the ovector, which contain PCRE2_UNSET for unset
|
||||
substrings, or by calling <b>pcre2_substring_length_bynumber()</b>.
|
||||
<a name="extractbyname"></a></P>
|
||||
<br><a name="SEC36" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a><br>
|
||||
<br><a name="SEC35" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a><br>
|
||||
<P>
|
||||
<b>int pcre2_substring_number_from_name(const pcre2_code *<i>code</i>,</b>
|
||||
<b> PCRE2_SPTR <i>name</i>);</b>
|
||||
|
@ -3074,7 +3077,7 @@ names are not included in the compiled code. The matching process uses only
|
|||
numbers. For this reason, the use of different names for subpatterns of the
|
||||
same number causes an error at compile time.
|
||||
</P>
|
||||
<br><a name="SEC37" href="#TOC1">CREATING A NEW STRING WITH SUBSTITUTIONS</a><br>
|
||||
<br><a name="SEC36" href="#TOC1">CREATING A NEW STRING WITH SUBSTITUTIONS</a><br>
|
||||
<P>
|
||||
<b>int pcre2_substitute(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
||||
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
||||
|
@ -3281,7 +3284,7 @@ obtained by calling the <b>pcre2_get_error_message()</b> function (see
|
|||
"Obtaining a textual error message"
|
||||
<a href="#geterrormessage">above).</a>
|
||||
</P>
|
||||
<br><a name="SEC38" href="#TOC1">DUPLICATE SUBPATTERN NAMES</a><br>
|
||||
<br><a name="SEC37" href="#TOC1">DUPLICATE SUBPATTERN NAMES</a><br>
|
||||
<P>
|
||||
<b>int pcre2_substring_nametable_scan(const pcre2_code *<i>code</i>,</b>
|
||||
<b> PCRE2_SPTR <i>name</i>, PCRE2_SPTR *<i>first</i>, PCRE2_SPTR *<i>last</i>);</b>
|
||||
|
@ -3326,7 +3329,7 @@ in the section entitled <i>Information about a pattern</i>. Given all the
|
|||
relevant entries for the name, you can extract each of their numbers, and hence
|
||||
the captured data.
|
||||
</P>
|
||||
<br><a name="SEC39" href="#TOC1">FINDING ALL POSSIBLE MATCHES AT ONE POSITION</a><br>
|
||||
<br><a name="SEC38" href="#TOC1">FINDING ALL POSSIBLE MATCHES AT ONE POSITION</a><br>
|
||||
<P>
|
||||
The traditional matching function uses a similar algorithm to Perl, which stops
|
||||
when it finds the first match at a given point in the subject. If you want to
|
||||
|
@ -3344,7 +3347,7 @@ substring. Then return 1, which forces <b>pcre2_match()</b> to backtrack and try
|
|||
other alternatives. Ultimately, when it runs out of matches,
|
||||
<b>pcre2_match()</b> will yield PCRE2_ERROR_NOMATCH.
|
||||
<a name="dfamatch"></a></P>
|
||||
<br><a name="SEC40" href="#TOC1">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a><br>
|
||||
<br><a name="SEC39" href="#TOC1">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a><br>
|
||||
<P>
|
||||
<b>int pcre2_dfa_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
||||
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
||||
|
@ -3540,13 +3543,13 @@ some plausibility checks are made on the contents of the workspace, which
|
|||
should contain data about the previous partial match. If any of these checks
|
||||
fail, this error is given.
|
||||
</P>
|
||||
<br><a name="SEC41" href="#TOC1">SEE ALSO</a><br>
|
||||
<br><a name="SEC40" href="#TOC1">SEE ALSO</a><br>
|
||||
<P>
|
||||
<b>pcre2build</b>(3), <b>pcre2callout</b>(3), <b>pcre2demo(3)</b>,
|
||||
<b>pcre2matching</b>(3), <b>pcre2partial</b>(3), <b>pcre2posix</b>(3),
|
||||
<b>pcre2sample</b>(3), <b>pcre2unicode</b>(3).
|
||||
</P>
|
||||
<br><a name="SEC42" href="#TOC1">AUTHOR</a><br>
|
||||
<br><a name="SEC41" href="#TOC1">AUTHOR</a><br>
|
||||
<P>
|
||||
Philip Hazel
|
||||
<br>
|
||||
|
@ -3555,9 +3558,9 @@ University Computing Service
|
|||
Cambridge, England.
|
||||
<br>
|
||||
</P>
|
||||
<br><a name="SEC43" href="#TOC1">REVISION</a><br>
|
||||
<br><a name="SEC42" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 10 July 2017
|
||||
Last updated: 17 September 2017
|
||||
<br>
|
||||
Copyright © 1997-2017 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -512,8 +512,10 @@ limited to certain values, as follows:
|
|||
32-bit non-UTF mode no greater than 0xffffffff
|
||||
All UTF modes no greater than 0x10ffff and a valid codepoint
|
||||
</pre>
|
||||
Invalid Unicode codepoints are the range 0xd800 to 0xdfff (the so-called
|
||||
"surrogate" codepoints), and 0xffef.
|
||||
Invalid Unicode codepoints are all those in the range 0xd800 to 0xdfff (the
|
||||
so-called "surrogate" codepoints). The check for these can be disabled by the
|
||||
caller of <b>pcre2_compile()</b> by setting the option
|
||||
PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES.
|
||||
</P>
|
||||
<br><b>
|
||||
Escape sequences in character classes
|
||||
|
@ -1413,7 +1415,11 @@ followed by two other characters. The octal or hexadecimal representation of
|
|||
Ranges normally include all code points between the start and end characters,
|
||||
inclusive. They can also be used for code points specified numerically, for
|
||||
example [\000-\037]. Ranges can include any characters that are valid for the
|
||||
current mode.
|
||||
current mode. In any UTF mode, the so-called "surrogate" characters (those
|
||||
whose code points lie between 0xd800 and 0xdfff inclusive) may not be specified
|
||||
explicitly by default (the PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES option disables
|
||||
this check). However, ranges such as [\x{d7ff}-\x{e000}], which include the
|
||||
surrogates, are always permitted.
|
||||
</P>
|
||||
<P>
|
||||
There is a special case in EBCDIC environments for ranges whose end points are
|
||||
|
@ -3473,7 +3479,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC30" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 29 July 2017
|
||||
Last updated: 12 September 2017
|
||||
<br>
|
||||
Copyright © 1997-2017 University of Cambridge.
|
||||
<br>
|
||||
|
|
3541
doc/pcre2.txt
3541
doc/pcre2.txt
File diff suppressed because it is too large
Load Diff
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2API 3 "16 September 2017" "PCRE2 10.31"
|
||||
.TH PCRE2API 3 "17 September 2017" "PCRE2 10.31"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.sp
|
||||
|
@ -1018,7 +1018,7 @@ default can be overridden when a pattern is compiled.
|
|||
PCRE2_CONFIG_COMPILED_WIDTHS
|
||||
.sp
|
||||
The output is a uint32_t integer whose lower bits indicate which code unit
|
||||
widths were selected when PCRE2 was built. The 1-bit indicates 8-bit support,
|
||||
widths were selected when PCRE2 was built. The 1-bit indicates 8-bit support,
|
||||
and the 2-bit and 4-bit indicate 16-bit and 32-bit support, respectively.
|
||||
.sp
|
||||
PCRE2_CONFIG_DEPTHLIMIT
|
||||
|
@ -1088,8 +1088,8 @@ operating system.
|
|||
.sp
|
||||
PCRE2_CONFIG_NEVER_BACKSLASH_C
|
||||
.sp
|
||||
The output is a uint32_t integer that is set to one if the use of \eC was
|
||||
permanently disabled when PCRE2 was built; otherwise it is set to zero.
|
||||
The output is a uint32_t integer that is set to one if the use of \eC was
|
||||
permanently disabled when PCRE2 was built; otherwise it is set to zero.
|
||||
.sp
|
||||
PCRE2_CONFIG_PARENSLIMIT
|
||||
.sp
|
||||
|
@ -1147,8 +1147,8 @@ zero.
|
|||
.fi
|
||||
.P
|
||||
The \fBpcre2_compile()\fP function compiles a pattern into an internal form.
|
||||
The pattern is defined by a pointer to a string of code units and a length. If
|
||||
the pattern is zero-terminated, the length can be specified as
|
||||
The pattern is defined by a pointer to a string of code units and a length (in
|
||||
code units). If the pattern is zero-terminated, the length can be specified as
|
||||
PCRE2_ZERO_TERMINATED. The function returns a pointer to a block of memory that
|
||||
contains the compiled pattern and related data, or NULL if an error occurred.
|
||||
.P
|
||||
|
@ -1226,22 +1226,30 @@ respectively, when \fBpcre2_compile()\fP returns NULL because a compilation
|
|||
error has occurred. The values are not defined when compilation is successful
|
||||
and \fBpcre2_compile()\fP returns a non-NULL value.
|
||||
.P
|
||||
The value returned in \fIerroroffset\fP is an indication of where in the
|
||||
pattern the error occurred. It is not necessarily the furthest point in the
|
||||
pattern that was read. For example, after the error "lookbehind assertion is
|
||||
not fixed length", the error offset points to the start of the failing
|
||||
assertion.
|
||||
.P
|
||||
The \fBpcre2_get_error_message()\fP function (see "Obtaining a textual error
|
||||
There are nearly 100 positive error codes that \fBpcre2_compile()\fP may return
|
||||
if it finds an error in the pattern. There are also some negative error codes
|
||||
that are used for invalid UTF strings. These are the same as given by
|
||||
\fBpcre2_match()\fP and \fBpcre2_dfa_match()\fP, and are described in the
|
||||
.\" HREF
|
||||
\fBpcre2unicode\fP
|
||||
.\"
|
||||
page. There is no separate documentation for the positive error codes, because
|
||||
the textual error messages that are obtained by calling the
|
||||
\fBpcre2_get_error_message()\fP function (see "Obtaining a textual error
|
||||
message"
|
||||
.\" HTML <a href="#geterrormessage">
|
||||
.\" </a>
|
||||
below)
|
||||
.\"
|
||||
provides a textual message for each error code. Compilation errors have
|
||||
positive error codes; UTF formatting error codes are negative. For an invalid
|
||||
UTF-8 or UTF-16 string, the offset is that of the first code unit of the
|
||||
failing character.
|
||||
should be self-explanatory. Macro names starting with PCRE2_ERROR_ are defined
|
||||
for both positive and negative error codes in \fBpcre2.h\fP.
|
||||
.P
|
||||
The value returned in \fIerroroffset\fP is an indication of where in the
|
||||
pattern the error occurred. It is not necessarily the furthest point in the
|
||||
pattern that was read. For example, after the error "lookbehind assertion is
|
||||
not fixed length", the error offset points to the start of the failing
|
||||
assertion. For an invalid UTF-8 or UTF-16 string, the offset is that of the
|
||||
first code unit of the failing character.
|
||||
.P
|
||||
Some errors are not detected until the whole pattern has been scanned; in these
|
||||
cases, the offset passed back is the length of the pattern. Note that the
|
||||
|
@ -1757,26 +1765,6 @@ used with PCRE2_LITERAL. However, it is ignored if PCRE2_EXTRA_MATCH_LINE is
|
|||
also set.
|
||||
.
|
||||
.
|
||||
.SH "COMPILATION ERROR CODES"
|
||||
.rs
|
||||
.sp
|
||||
There are nearly 100 positive error codes that \fBpcre2_compile()\fP may return
|
||||
(via \fIerrorcode\fP) if it finds an error in the pattern. There are also some
|
||||
negative error codes that are used for invalid UTF strings. These are the same
|
||||
as given by \fBpcre2_match()\fP and \fBpcre2_dfa_match()\fP, and are described
|
||||
in the
|
||||
.\" HREF
|
||||
\fBpcre2unicode\fP
|
||||
.\"
|
||||
page. The \fBpcre2_get_error_message()\fP function (see "Obtaining a textual
|
||||
error message"
|
||||
.\" HTML <a href="#geterrormessage">
|
||||
.\" </a>
|
||||
below)
|
||||
.\"
|
||||
can be called to obtain a textual error message from any error code.
|
||||
.
|
||||
.
|
||||
.\" HTML <a name="jitcompiling"></a>
|
||||
.SH "JUST-IN-TIME (JIT) COMPILATION"
|
||||
.rs
|
||||
|
@ -3585,6 +3573,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 16 September 2017
|
||||
Last updated: 17 September 2017
|
||||
Copyright (c) 1997-2017 University of Cambridge.
|
||||
.fi
|
||||
|
|
107
src/pcre2.h
107
src/pcre2.h
|
@ -208,7 +208,104 @@ greater than zero. */
|
|||
#define PCRE2_BSR_UNICODE 1
|
||||
#define PCRE2_BSR_ANYCRLF 2
|
||||
|
||||
/* Error codes: no match and partial match are "expected" errors. */
|
||||
/* Error codes for pcre2_compile(). Some of these are also used by
|
||||
pcre2_pattern_convert(). */
|
||||
|
||||
#define PCRE2_ERROR_END_BACKSLASH 101
|
||||
#define PCRE2_ERROR_END_BACKSLASH_C 102
|
||||
#define PCRE2_ERROR_UNKNOWN_ESCAPE 103
|
||||
#define PCRE2_ERROR_QUANTIFIER_OUT_OF_ORDER 104
|
||||
#define PCRE2_ERROR_QUANTIFIER_TOO_BIG 105
|
||||
#define PCRE2_ERROR_MISSING_SQUARE_BRACKET 106
|
||||
#define PCRE2_ERROR_ESCAPE_INVALID_IN_CLASS 107
|
||||
#define PCRE2_ERROR_CLASS_RANGE_ORDER 108
|
||||
#define PCRE2_ERROR_QUANTIFIER_INVALID 109
|
||||
#define PCRE2_ERROR_INTERNAL_UNEXPECTED_REPEAT 110
|
||||
#define PCRE2_ERROR_INVALID_AFTER_PARENS_QUERY 111
|
||||
#define PCRE2_ERROR_POSIX_CLASS_NOT_IN_CLASS 112
|
||||
#define PCRE2_ERROR_POSIX_NO_SUPPORT_COLLATING 113
|
||||
#define PCRE2_ERROR_MISSING_CLOSING_PARENTHESIS 114
|
||||
#define PCRE2_ERROR_BAD_SUBPATTERN_REFERENCE 115
|
||||
#define PCRE2_ERROR_NULL_PATTERN 116
|
||||
#define PCRE2_ERROR_BAD_OPTIONS 117
|
||||
#define PCRE2_ERROR_MISSING_COMMENT_CLOSING 118
|
||||
#define PCRE2_ERROR_PARENTHESES_NEST_TOO_DEEP 119
|
||||
#define PCRE2_ERROR_PATTERN_TOO_LARGE 120
|
||||
#define PCRE2_ERROR_HEAP_FAILED 121
|
||||
#define PCRE2_ERROR_UNMATCHED_CLOSING_PARENTHESIS 122
|
||||
#define PCRE2_ERROR_INTERNAL_CODE_OVERFLOW 123
|
||||
#define PCRE2_ERROR_MISSING_CONDITION_CLOSING 124
|
||||
#define PCRE2_ERROR_LOOKBEHIND_NOT_FIXED_LENGTH 125
|
||||
#define PCRE2_ERROR_ZERO_RELATIVE_REFERENCE 126
|
||||
#define PCRE2_ERROR_TOO_MANY_CONDITION_BRANCHES 127
|
||||
#define PCRE2_ERROR_CONDITION_ASSERTION_EXPECTED 128
|
||||
#define PCRE2_ERROR_BAD_RELATIVE_REFERENCE 129
|
||||
#define PCRE2_ERROR_UNKNOWN_POSIX_CLASS 130
|
||||
#define PCRE2_ERROR_INTERNAL_STUDY_ERROR 131
|
||||
#define PCRE2_ERROR_UNICODE_NOT_SUPPORTED 132
|
||||
#define PCRE2_ERROR_PARENTHESES_STACK_CHECK 133
|
||||
#define PCRE2_ERROR_CODE_POINT_TOO_BIG 134
|
||||
#define PCRE2_ERROR_LOOKBEHIND_TOO_COMPLICATED 135
|
||||
#define PCRE2_ERROR_LOOKBEHIND_INVALID_BACKSLASH_C 136
|
||||
#define PCRE2_ERROR_UNSUPPORTED_ESCAPE_SEQUENCE 137
|
||||
#define PCRE2_ERROR_CALLOUT_NUMBER_TOO_BIG 138
|
||||
#define PCRE2_ERROR_MISSING_CALLOUT_CLOSING 139
|
||||
#define PCRE2_ERROR_ESCAPE_INVALID_IN_VERB 140
|
||||
#define PCRE2_ERROR_UNRECOGNIZED_AFTER_QUERY_P 141
|
||||
#define PCRE2_ERROR_MISSING_NAME_TERMINATOR 142
|
||||
#define PCRE2_ERROR_DUPLICATE_SUBPATTERN_NAME 143
|
||||
#define PCRE2_ERROR_INVALID_SUBPATTERN_NAME 144
|
||||
#define PCRE2_ERROR_UNICODE_PROPERTIES_UNAVAILABLE 145
|
||||
#define PCRE2_ERROR_MALFORMED_UNICODE_PROPERTY 146
|
||||
#define PCRE2_ERROR_UNKNOWN_UNICODE_PROPERTY 147
|
||||
#define PCRE2_ERROR_SUBPATTERN_NAME_TOO_LONG 148
|
||||
#define PCRE2_ERROR_TOO_MANY_NAMED_SUBPATTERNS 149
|
||||
#define PCRE2_ERROR_CLASS_INVALID_RANGE 150
|
||||
#define PCRE2_ERROR_OCTAL_BYTE_TOO_BIG 151
|
||||
#define PCRE2_ERROR_INTERNAL_OVERRAN_WORKSPACE 152
|
||||
#define PCRE2_ERROR_INTERNAL_MISSING_SUBPATTERN 153
|
||||
#define PCRE2_ERROR_DEFINE_TOO_MANY_BRANCHES 154
|
||||
#define PCRE2_ERROR_BACKSLASH_O_MISSING_BRACE 155
|
||||
#define PCRE2_ERROR_INTERNAL_UNKNOWN_NEWLINE 156
|
||||
#define PCRE2_ERROR_BACKSLASH_G_SYNTAX 157
|
||||
#define PCRE2_ERROR_PARENS_QUERY_R_MISSING_CLOSING 158
|
||||
#define PCRE2_ERROR_VERB_ARGUMENT_NOT_ALLOWED 159
|
||||
#define PCRE2_ERROR_VERB_UNKNOWN 160
|
||||
#define PCRE2_ERROR_SUBPATTERN_NUMBER_TOO_BIG 161
|
||||
#define PCRE2_ERROR_SUBPATTERN_NAME_EXPECTED 162
|
||||
#define PCRE2_ERROR_INTERNAL_PARSED_OVERFLOW 163
|
||||
#define PCRE2_ERROR_INVALID_OCTAL 164
|
||||
#define PCRE2_ERROR_SUBPATTERN_NAMES_MISMATCH 165
|
||||
#define PCRE2_ERROR_MARK_MISSING_ARGUMENT 166
|
||||
#define PCRE2_ERROR_INVALID_HEXADECIMAL 167
|
||||
#define PCRE2_ERROR_BACKSLASH_C_SYNTAX 168
|
||||
#define PCRE2_ERROR_BACKSLASH_K_SYNTAX 169
|
||||
#define PCRE2_ERROR_INTERNAL_BAD_CODE_LOOKBEHINDS 170
|
||||
#define PCRE2_ERROR_BACKSLASH_N_IN_CLASS 171
|
||||
#define PCRE2_ERROR_CALLOUT_STRING_TOO_LONG 172
|
||||
#define PCRE2_ERROR_UNICODE_DISALLOWED_CODE_POINT 173
|
||||
#define PCRE2_ERROR_UTF_IS_DISABLED 174
|
||||
#define PCRE2_ERROR_UCP_IS_DISABLED 175
|
||||
#define PCRE2_ERROR_VERB_NAME_TOO_LONG 176
|
||||
#define PCRE2_ERROR_BACKSLASH_U_CODE_POINT_TOO_BIG 177
|
||||
#define PCRE2_ERROR_MISSING_OCTAL_OR_HEX_DIGITS 178
|
||||
#define PCRE2_ERROR_VERSION_CONDITION_SYNTAX 179
|
||||
#define PCRE2_ERROR_INTERNAL_BAD_CODE_AUTO_POSSESS 180
|
||||
#define PCRE2_ERROR_CALLOUT_NO_STRING_DELIMITER 181
|
||||
#define PCRE2_ERROR_CALLOUT_BAD_STRING_DELIMITER 182
|
||||
#define PCRE2_ERROR_BACKSLASH_C_CALLER_DISABLED 183
|
||||
#define PCRE2_ERROR_QUERY_BARJX_NEST_TOO_DEEP 184
|
||||
#define PCRE2_ERROR_BACKSLASH_C_LIBRARY_DISABLED 185
|
||||
#define PCRE2_ERROR_PATTERN_TOO_COMPLICATED 186
|
||||
#define PCRE2_ERROR_LOOKBEHIND_TOO_LONG 187
|
||||
#define PCRE2_ERROR_PATTERN_STRING_TOO_LONG 188
|
||||
#define PCRE2_ERROR_INTERNAL_BAD_CODE 189
|
||||
#define PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP 190
|
||||
#define PCRE2_ERROR_NO_SURROGATES_IN_UTF16 191
|
||||
#define PCRE2_ERROR_BAD_LITERAL_OPTIONS 192
|
||||
|
||||
|
||||
/* "Expected" matching error codes: no match and partial match. */
|
||||
|
||||
#define PCRE2_ERROR_NOMATCH (-1)
|
||||
#define PCRE2_ERROR_PARTIAL (-2)
|
||||
|
@ -248,10 +345,10 @@ greater than zero. */
|
|||
#define PCRE2_ERROR_UTF32_ERR1 (-27)
|
||||
#define PCRE2_ERROR_UTF32_ERR2 (-28)
|
||||
|
||||
/* Error codes for pcre2[_dfa]_match(), substring extraction functions, context
|
||||
functions, and serializing functions. They are in numerical order. Originally
|
||||
they were in alphabetical order too, but now that PCRE2 is released, the
|
||||
numbers must not be changed. */
|
||||
/* Miscellaneous error codes for pcre2[_dfa]_match(), substring extraction
|
||||
functions, context functions, and serializing functions. They are in numerical
|
||||
order. Originally they were in alphabetical order too, but now that PCRE2 is
|
||||
released, the numbers must not be changed. */
|
||||
|
||||
#define PCRE2_ERROR_BADDATA (-29)
|
||||
#define PCRE2_ERROR_MIXEDTABLES (-30) /* Name was changed */
|
||||
|
|
107
src/pcre2.h.in
107
src/pcre2.h.in
|
@ -208,7 +208,104 @@ greater than zero. */
|
|||
#define PCRE2_BSR_UNICODE 1
|
||||
#define PCRE2_BSR_ANYCRLF 2
|
||||
|
||||
/* Error codes: no match and partial match are "expected" errors. */
|
||||
/* Error codes for pcre2_compile(). Some of these are also used by
|
||||
pcre2_pattern_convert(). */
|
||||
|
||||
#define PCRE2_ERROR_END_BACKSLASH 101
|
||||
#define PCRE2_ERROR_END_BACKSLASH_C 102
|
||||
#define PCRE2_ERROR_UNKNOWN_ESCAPE 103
|
||||
#define PCRE2_ERROR_QUANTIFIER_OUT_OF_ORDER 104
|
||||
#define PCRE2_ERROR_QUANTIFIER_TOO_BIG 105
|
||||
#define PCRE2_ERROR_MISSING_SQUARE_BRACKET 106
|
||||
#define PCRE2_ERROR_ESCAPE_INVALID_IN_CLASS 107
|
||||
#define PCRE2_ERROR_CLASS_RANGE_ORDER 108
|
||||
#define PCRE2_ERROR_QUANTIFIER_INVALID 109
|
||||
#define PCRE2_ERROR_INTERNAL_UNEXPECTED_REPEAT 110
|
||||
#define PCRE2_ERROR_INVALID_AFTER_PARENS_QUERY 111
|
||||
#define PCRE2_ERROR_POSIX_CLASS_NOT_IN_CLASS 112
|
||||
#define PCRE2_ERROR_POSIX_NO_SUPPORT_COLLATING 113
|
||||
#define PCRE2_ERROR_MISSING_CLOSING_PARENTHESIS 114
|
||||
#define PCRE2_ERROR_BAD_SUBPATTERN_REFERENCE 115
|
||||
#define PCRE2_ERROR_NULL_PATTERN 116
|
||||
#define PCRE2_ERROR_BAD_OPTIONS 117
|
||||
#define PCRE2_ERROR_MISSING_COMMENT_CLOSING 118
|
||||
#define PCRE2_ERROR_PARENTHESES_NEST_TOO_DEEP 119
|
||||
#define PCRE2_ERROR_PATTERN_TOO_LARGE 120
|
||||
#define PCRE2_ERROR_HEAP_FAILED 121
|
||||
#define PCRE2_ERROR_UNMATCHED_CLOSING_PARENTHESIS 122
|
||||
#define PCRE2_ERROR_INTERNAL_CODE_OVERFLOW 123
|
||||
#define PCRE2_ERROR_MISSING_CONDITION_CLOSING 124
|
||||
#define PCRE2_ERROR_LOOKBEHIND_NOT_FIXED_LENGTH 125
|
||||
#define PCRE2_ERROR_ZERO_RELATIVE_REFERENCE 126
|
||||
#define PCRE2_ERROR_TOO_MANY_CONDITION_BRANCHES 127
|
||||
#define PCRE2_ERROR_CONDITION_ASSERTION_EXPECTED 128
|
||||
#define PCRE2_ERROR_BAD_RELATIVE_REFERENCE 129
|
||||
#define PCRE2_ERROR_UNKNOWN_POSIX_CLASS 130
|
||||
#define PCRE2_ERROR_INTERNAL_STUDY_ERROR 131
|
||||
#define PCRE2_ERROR_UNICODE_NOT_SUPPORTED 132
|
||||
#define PCRE2_ERROR_PARENTHESES_STACK_CHECK 133
|
||||
#define PCRE2_ERROR_CODE_POINT_TOO_BIG 134
|
||||
#define PCRE2_ERROR_LOOKBEHIND_TOO_COMPLICATED 135
|
||||
#define PCRE2_ERROR_LOOKBEHIND_INVALID_BACKSLASH_C 136
|
||||
#define PCRE2_ERROR_UNSUPPORTED_ESCAPE_SEQUENCE 137
|
||||
#define PCRE2_ERROR_CALLOUT_NUMBER_TOO_BIG 138
|
||||
#define PCRE2_ERROR_MISSING_CALLOUT_CLOSING 139
|
||||
#define PCRE2_ERROR_ESCAPE_INVALID_IN_VERB 140
|
||||
#define PCRE2_ERROR_UNRECOGNIZED_AFTER_QUERY_P 141
|
||||
#define PCRE2_ERROR_MISSING_NAME_TERMINATOR 142
|
||||
#define PCRE2_ERROR_DUPLICATE_SUBPATTERN_NAME 143
|
||||
#define PCRE2_ERROR_INVALID_SUBPATTERN_NAME 144
|
||||
#define PCRE2_ERROR_UNICODE_PROPERTIES_UNAVAILABLE 145
|
||||
#define PCRE2_ERROR_MALFORMED_UNICODE_PROPERTY 146
|
||||
#define PCRE2_ERROR_UNKNOWN_UNICODE_PROPERTY 147
|
||||
#define PCRE2_ERROR_SUBPATTERN_NAME_TOO_LONG 148
|
||||
#define PCRE2_ERROR_TOO_MANY_NAMED_SUBPATTERNS 149
|
||||
#define PCRE2_ERROR_CLASS_INVALID_RANGE 150
|
||||
#define PCRE2_ERROR_OCTAL_BYTE_TOO_BIG 151
|
||||
#define PCRE2_ERROR_INTERNAL_OVERRAN_WORKSPACE 152
|
||||
#define PCRE2_ERROR_INTERNAL_MISSING_SUBPATTERN 153
|
||||
#define PCRE2_ERROR_DEFINE_TOO_MANY_BRANCHES 154
|
||||
#define PCRE2_ERROR_BACKSLASH_O_MISSING_BRACE 155
|
||||
#define PCRE2_ERROR_INTERNAL_UNKNOWN_NEWLINE 156
|
||||
#define PCRE2_ERROR_BACKSLASH_G_SYNTAX 157
|
||||
#define PCRE2_ERROR_PARENS_QUERY_R_MISSING_CLOSING 158
|
||||
#define PCRE2_ERROR_VERB_ARGUMENT_NOT_ALLOWED 159
|
||||
#define PCRE2_ERROR_VERB_UNKNOWN 160
|
||||
#define PCRE2_ERROR_SUBPATTERN_NUMBER_TOO_BIG 161
|
||||
#define PCRE2_ERROR_SUBPATTERN_NAME_EXPECTED 162
|
||||
#define PCRE2_ERROR_INTERNAL_PARSED_OVERFLOW 163
|
||||
#define PCRE2_ERROR_INVALID_OCTAL 164
|
||||
#define PCRE2_ERROR_SUBPATTERN_NAMES_MISMATCH 165
|
||||
#define PCRE2_ERROR_MARK_MISSING_ARGUMENT 166
|
||||
#define PCRE2_ERROR_INVALID_HEXADECIMAL 167
|
||||
#define PCRE2_ERROR_BACKSLASH_C_SYNTAX 168
|
||||
#define PCRE2_ERROR_BACKSLASH_K_SYNTAX 169
|
||||
#define PCRE2_ERROR_INTERNAL_BAD_CODE_LOOKBEHINDS 170
|
||||
#define PCRE2_ERROR_BACKSLASH_N_IN_CLASS 171
|
||||
#define PCRE2_ERROR_CALLOUT_STRING_TOO_LONG 172
|
||||
#define PCRE2_ERROR_UNICODE_DISALLOWED_CODE_POINT 173
|
||||
#define PCRE2_ERROR_UTF_IS_DISABLED 174
|
||||
#define PCRE2_ERROR_UCP_IS_DISABLED 175
|
||||
#define PCRE2_ERROR_VERB_NAME_TOO_LONG 176
|
||||
#define PCRE2_ERROR_BACKSLASH_U_CODE_POINT_TOO_BIG 177
|
||||
#define PCRE2_ERROR_MISSING_OCTAL_OR_HEX_DIGITS 178
|
||||
#define PCRE2_ERROR_VERSION_CONDITION_SYNTAX 179
|
||||
#define PCRE2_ERROR_INTERNAL_BAD_CODE_AUTO_POSSESS 180
|
||||
#define PCRE2_ERROR_CALLOUT_NO_STRING_DELIMITER 181
|
||||
#define PCRE2_ERROR_CALLOUT_BAD_STRING_DELIMITER 182
|
||||
#define PCRE2_ERROR_BACKSLASH_C_CALLER_DISABLED 183
|
||||
#define PCRE2_ERROR_QUERY_BARJX_NEST_TOO_DEEP 184
|
||||
#define PCRE2_ERROR_BACKSLASH_C_LIBRARY_DISABLED 185
|
||||
#define PCRE2_ERROR_PATTERN_TOO_COMPLICATED 186
|
||||
#define PCRE2_ERROR_LOOKBEHIND_TOO_LONG 187
|
||||
#define PCRE2_ERROR_PATTERN_STRING_TOO_LONG 188
|
||||
#define PCRE2_ERROR_INTERNAL_BAD_CODE 189
|
||||
#define PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP 190
|
||||
#define PCRE2_ERROR_NO_SURROGATES_IN_UTF16 191
|
||||
#define PCRE2_ERROR_BAD_LITERAL_OPTIONS 192
|
||||
|
||||
|
||||
/* "Expected" matching error codes: no match and partial match. */
|
||||
|
||||
#define PCRE2_ERROR_NOMATCH (-1)
|
||||
#define PCRE2_ERROR_PARTIAL (-2)
|
||||
|
@ -248,10 +345,10 @@ greater than zero. */
|
|||
#define PCRE2_ERROR_UTF32_ERR1 (-27)
|
||||
#define PCRE2_ERROR_UTF32_ERR2 (-28)
|
||||
|
||||
/* Error codes for pcre2[_dfa]_match(), substring extraction functions, context
|
||||
functions, and serializing functions. They are in numerical order. Originally
|
||||
they were in alphabetical order too, but now that PCRE2 is released, the
|
||||
numbers must not be changed. */
|
||||
/* Miscellaneous error codes for pcre2[_dfa]_match(), substring extraction
|
||||
functions, context functions, and serializing functions. They are in numerical
|
||||
order. Originally they were in alphabetical order too, but now that PCRE2 is
|
||||
released, the numbers must not be changed. */
|
||||
|
||||
#define PCRE2_ERROR_BADDATA (-29)
|
||||
#define PCRE2_ERROR_MIXEDTABLES (-30) /* Name was changed */
|
||||
|
|
|
@ -55,16 +55,6 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define DUMMY_BUFFER_SIZE 100
|
||||
|
||||
/* Some pcre2_compile() error numbers are used herein. */
|
||||
|
||||
/* Note: ERROR_NO_SLASH_Z is not an error code. */
|
||||
#define ERROR_NO_SLASH_Z 100
|
||||
#define ERROR_END_BACKSLASH 101
|
||||
#define ERROR_MISSING_SQUARE_BRACKET 106
|
||||
#define ERROR_MISSING_CLOSING_PARENTHESIS 114
|
||||
#define ERROR_UNKNOWN_POSIX_CLASS 130
|
||||
#define ERROR_NO_UNICODE 132
|
||||
|
||||
/* Generated pattern fragments */
|
||||
|
||||
#define STR_BACKSLASH_A STR_BACKSLASH STR_A
|
||||
|
@ -286,7 +276,7 @@ while (plength > 0)
|
|||
break;
|
||||
|
||||
case CHAR_BACKSLASH:
|
||||
if (plength <= 0) return ERROR_END_BACKSLASH;
|
||||
if (plength <= 0) return PCRE2_ERROR_END_BACKSLASH;
|
||||
if (extended) nextisliteral = TRUE; else
|
||||
{
|
||||
if (*posix < 127 && strchr(posix_meta_escapes, *posix) != NULL)
|
||||
|
@ -362,7 +352,7 @@ while (plength > 0)
|
|||
}
|
||||
|
||||
if (posix_state >= POSIX_CLASS_NOT_STARTED)
|
||||
return ERROR_MISSING_SQUARE_BRACKET;
|
||||
return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
|
||||
convlength += p - pp; /* Final segment */
|
||||
*bufflenptr = convlength;
|
||||
*p++ = 0;
|
||||
|
@ -601,7 +591,7 @@ int len, class_index;
|
|||
if (pattern >= pattern_end)
|
||||
{
|
||||
*from = pattern;
|
||||
return ERROR_MISSING_SQUARE_BRACKET;
|
||||
return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
|
||||
}
|
||||
|
||||
if (*pattern == CHAR_EXCLAMATION_MARK
|
||||
|
@ -612,7 +602,7 @@ if (*pattern == CHAR_EXCLAMATION_MARK
|
|||
if (pattern >= pattern_end)
|
||||
{
|
||||
*from = pattern;
|
||||
return ERROR_MISSING_SQUARE_BRACKET;
|
||||
return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
|
||||
}
|
||||
|
||||
is_negative = TRUE;
|
||||
|
@ -750,7 +740,7 @@ while (pattern < pattern_end)
|
|||
}
|
||||
|
||||
*from = pattern;
|
||||
return ERROR_MISSING_SQUARE_BRACKET;
|
||||
return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
|
||||
}
|
||||
|
||||
|
||||
|
@ -808,8 +798,9 @@ BOOL no_wildsep = (options & PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR) != 0;
|
|||
BOOL no_starstar = (options & PCRE2_CONVERT_GLOB_NO_STARSTAR) != 0;
|
||||
BOOL in_atomic = FALSE;
|
||||
BOOL after_starstar = FALSE;
|
||||
BOOL no_slash_z = FALSE;
|
||||
BOOL with_escape, is_start, after_separator;
|
||||
int result;
|
||||
int result = 0;
|
||||
|
||||
(void)utf; /* Avoid compiler warning. */
|
||||
|
||||
|
@ -853,8 +844,6 @@ if (is_start)
|
|||
convert_glob_write_str(&out, 2);
|
||||
}
|
||||
|
||||
result = 0;
|
||||
|
||||
while (pattern < pattern_end)
|
||||
{
|
||||
c = *pattern++;
|
||||
|
@ -878,7 +867,7 @@ while (pattern < pattern_end)
|
|||
|
||||
if (pattern >= pattern_end)
|
||||
{
|
||||
result = ERROR_NO_SLASH_Z;
|
||||
no_slash_z = TRUE;
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -948,7 +937,7 @@ while (pattern < pattern_end)
|
|||
{
|
||||
if (pattern >= pattern_end)
|
||||
{
|
||||
result = ERROR_NO_SLASH_Z;
|
||||
no_slash_z = TRUE;
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1016,9 +1005,9 @@ while (pattern < pattern_end)
|
|||
convert_glob_write(&out, c);
|
||||
}
|
||||
|
||||
if (result == 0 || result == ERROR_NO_SLASH_Z)
|
||||
if (result == 0)
|
||||
{
|
||||
if (result == 0)
|
||||
if (!no_slash_z)
|
||||
{
|
||||
out.out_str[0] = CHAR_BACKSLASH;
|
||||
out.out_str[1] = CHAR_z;
|
||||
|
@ -1029,7 +1018,6 @@ if (result == 0 || result == ERROR_NO_SLASH_Z)
|
|||
convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
|
||||
|
||||
convert_glob_write(&out, CHAR_NUL);
|
||||
result = 0;
|
||||
|
||||
if (!dummyrun && out.output_size != (PCRE2_SIZE) (out.output - use_buffer))
|
||||
result = PCRE2_ERROR_NOMEMORY;
|
||||
|
@ -1093,7 +1081,7 @@ if (ccontext == NULL) ccontext =
|
|||
/* Check UTF if required. */
|
||||
|
||||
#ifndef SUPPORT_UNICODE
|
||||
if (utf) return ERROR_NO_UNICODE;
|
||||
if (utf) return PCRE2_ERROR_UNICODE_NOT_SUPPORTED;
|
||||
#else
|
||||
if (utf && (options & PCRE2_CONVERT_NO_UTF_CHECK) == 0)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue