Return an error code when pcre2_get_error_message() does not recognize an error
code, and add a pcre2test facility for testing this.
This commit is contained in:
parent
16acce6555
commit
0a29ecbe02
|
@ -136,6 +136,13 @@ RunTest (see 4 above).
|
|||
|
||||
35. Fix potential negative index in pcre2test.
|
||||
|
||||
36. Calls to pcre2_get_error_message() with error numbers that are never
|
||||
returned by PCRE2 functions were returning empty strings. Now the error code
|
||||
PCRE2_ERROR_BADDATA is returned. A facility has been added to pcre2test to
|
||||
show the texts for given error numbers (i.e. to call pcre2_get_error_message()
|
||||
and display what it returns) and a few representative error codes are now
|
||||
checked in RunTest.
|
||||
|
||||
|
||||
Version 10.21 12-January-2016
|
||||
-----------------------------
|
||||
|
|
1
RunTest
1
RunTest
|
@ -499,6 +499,7 @@ for bmode in "$test8" "$test16" "$test32"; do
|
|||
for opt in "" $jitopt; do
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $test2stack $bmode $opt $testdata/testinput2 testtry
|
||||
if [ $? = 0 ] ; then
|
||||
$sim $valgrind ${opt:+$vjs} ./pcre2test -q $bmode $opt -error -63,-62,-2,-1,0,100,188,189 >>testtry
|
||||
checkresult $? 2 "$opt"
|
||||
else
|
||||
echo " "
|
||||
|
|
|
@ -35,7 +35,10 @@ errors are negative numbers. The arguments are:
|
|||
<i>bufflen</i> the length of the buffer (code units)
|
||||
</pre>
|
||||
The function returns the length of the message, excluding the trailing zero, or
|
||||
a negative error code if the buffer is too small.
|
||||
the negative error code PCRE2_ERROR_NOMEMORY if the buffer is too small. In
|
||||
this case, the returned message is truncated (but still with a trailing zero).
|
||||
If <i>errorcode</i> does not contain a recognized error code number, the
|
||||
negative value PCRE2_ERROR_BADDATA is returned.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
|
|
|
@ -43,16 +43,17 @@ please consult the man page, in case the conversion went wrong.
|
|||
<li><a name="TOC28" href="#SEC28">HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS</a>
|
||||
<li><a name="TOC29" href="#SEC29">OTHER INFORMATION ABOUT A MATCH</a>
|
||||
<li><a name="TOC30" href="#SEC30">ERROR RETURNS FROM <b>pcre2_match()</b></a>
|
||||
<li><a name="TOC31" href="#SEC31">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a>
|
||||
<li><a name="TOC32" href="#SEC32">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a>
|
||||
<li><a name="TOC33" href="#SEC33">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a>
|
||||
<li><a name="TOC34" href="#SEC34">CREATING A NEW STRING WITH SUBSTITUTIONS</a>
|
||||
<li><a name="TOC35" href="#SEC35">DUPLICATE SUBPATTERN NAMES</a>
|
||||
<li><a name="TOC36" href="#SEC36">FINDING ALL POSSIBLE MATCHES AT ONE POSITION</a>
|
||||
<li><a name="TOC37" href="#SEC37">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a>
|
||||
<li><a name="TOC38" href="#SEC38">SEE ALSO</a>
|
||||
<li><a name="TOC39" href="#SEC39">AUTHOR</a>
|
||||
<li><a name="TOC40" href="#SEC40">REVISION</a>
|
||||
<li><a name="TOC31" href="#SEC31">OBTAINING A TEXTUAL ERROR MESSAGE</a>
|
||||
<li><a name="TOC32" href="#SEC32">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a>
|
||||
<li><a name="TOC33" href="#SEC33">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a>
|
||||
<li><a name="TOC34" href="#SEC34">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a>
|
||||
<li><a name="TOC35" href="#SEC35">CREATING A NEW STRING WITH SUBSTITUTIONS</a>
|
||||
<li><a name="TOC36" href="#SEC36">DUPLICATE SUBPATTERN NAMES</a>
|
||||
<li><a name="TOC37" href="#SEC37">FINDING ALL POSSIBLE MATCHES AT ONE POSITION</a>
|
||||
<li><a name="TOC38" href="#SEC38">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a>
|
||||
<li><a name="TOC39" href="#SEC39">SEE ALSO</a>
|
||||
<li><a name="TOC40" href="#SEC40">AUTHOR</a>
|
||||
<li><a name="TOC41" href="#SEC41">REVISION</a>
|
||||
</ul>
|
||||
<P>
|
||||
<b>#include <pcre2.h></b>
|
||||
|
@ -1063,7 +1064,7 @@ The <b>pcre2_compile()</b> function compiles a pattern into an internal form.
|
|||
The pattern is defined by a pointer to a string of code units and a length. If
|
||||
the pattern is zero-terminated, the length can be specified as
|
||||
PCRE2_ZERO_TERMINATED. The function returns a pointer to a block of memory that
|
||||
contains the compiled pattern and related data.
|
||||
contains the compiled pattern and related data, or NULL if an error occurred.
|
||||
</P>
|
||||
<P>
|
||||
If the compile context argument <i>ccontext</i> is NULL, memory for the compiled
|
||||
|
@ -1085,8 +1086,9 @@ to acquire a private copy of shared compiled code.
|
|||
<P>
|
||||
NOTE: When one of the matching functions is called, pointers to the compiled
|
||||
pattern and the subject string are set in the match data block so that they can
|
||||
be referenced by the extraction functions. After running a match, you must not
|
||||
free a compiled pattern (or a subject string) until after all operations on the
|
||||
be referenced by the substring extraction functions. After running a match, you
|
||||
must not free a compiled pattern (or a subject string) until after all
|
||||
operations on the
|
||||
<a href="#matchdatablock">match data block</a>
|
||||
have taken place.
|
||||
</P>
|
||||
|
@ -1113,13 +1115,20 @@ newline setting) can be provided in a compile context (as described
|
|||
</P>
|
||||
<P>
|
||||
If <i>errorcode</i> or <i>erroroffset</i> is NULL, <b>pcre2_compile()</b> returns
|
||||
NULL immediately. Otherwise, if compilation of a pattern fails,
|
||||
<b>pcre2_compile()</b> returns NULL, having set these variables to an error code
|
||||
and an offset (number of code units) within the pattern, respectively. The
|
||||
<b>pcre2_get_error_message()</b> function provides a textual message for each
|
||||
error code. Compilation errors are positive numbers, but UTF formatting errors
|
||||
are negative numbers. For an invalid UTF-8 or UTF-16 string, the offset is that
|
||||
of the first code unit of the failing character.
|
||||
NULL immediately. Otherwise, the variables to which these point are set to an
|
||||
error code and an offset (number of code units) within the pattern,
|
||||
respectively, when <b>pcre2_compile()</b> returns NULL because a compilation
|
||||
error has occurred. The values are not defined when compilation is successful
|
||||
and <b>pcre2_compile()</b> returns a non-NULL value.
|
||||
</P>
|
||||
<P>
|
||||
The <b>pcre2_get_error_message()</b> function (see "Obtaining a textual error
|
||||
message"
|
||||
<a href="#geterrormessage">below)</a>
|
||||
provides a textual message for each error code. Compilation errors have
|
||||
positive error codes; UTF formatting error codes are negative. For an invalid
|
||||
UTF-8 or UTF-16 string, the offset is that of the first code unit of the
|
||||
failing character.
|
||||
</P>
|
||||
<P>
|
||||
Some errors are not detected until the whole pattern has been scanned; in these
|
||||
|
@ -1488,13 +1497,16 @@ page.
|
|||
</P>
|
||||
<br><a name="SEC19" href="#TOC1">COMPILATION ERROR CODES</a><br>
|
||||
<P>
|
||||
There are over 80 positive error codes that <b>pcre2_compile()</b> may return if
|
||||
it finds an error in the pattern. There are also some negative error codes that
|
||||
are used for invalid UTF strings. These are the same as given by
|
||||
<b>pcre2_match()</b> and <b>pcre2_dfa_match()</b>, and are described in the
|
||||
There are over 80 positive error codes that <b>pcre2_compile()</b> may return
|
||||
(via <i>errorcode</i>) if it finds an error in the pattern. There are also some
|
||||
negative error codes that are used for invalid UTF strings. These are the same
|
||||
as given by <b>pcre2_match()</b> and <b>pcre2_dfa_match()</b>, and are described
|
||||
in the
|
||||
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
|
||||
page. The <b>pcre2_get_error_message()</b> function can be called to obtain a
|
||||
textual error message from any error code.
|
||||
page. The <b>pcre2_get_error_message()</b> function (see "Obtaining a textual
|
||||
error message"
|
||||
<a href="#geterrormessage">below)</a>
|
||||
can be called to obtain a textual error message from any error code.
|
||||
<a name="jitcompiling"></a></P>
|
||||
<br><a name="SEC20" href="#TOC1">JUST-IN-TIME (JIT) COMPILATION</a><br>
|
||||
<P>
|
||||
|
@ -2416,11 +2428,13 @@ page.
|
|||
<br><a name="SEC30" href="#TOC1">ERROR RETURNS FROM <b>pcre2_match()</b></a><br>
|
||||
<P>
|
||||
If <b>pcre2_match()</b> fails, it returns a negative number. This can be
|
||||
converted to a text string by calling <b>pcre2_get_error_message()</b>. Negative
|
||||
error codes are also returned by other functions, and are documented with them.
|
||||
The codes are given names in the header file. If UTF checking is in force and
|
||||
an invalid UTF subject string is detected, one of a number of UTF-specific
|
||||
negative error codes is returned. Details are given in the
|
||||
converted to a text string by calling the <b>pcre2_get_error_message()</b>
|
||||
function (see "Obtaining a textual error message"
|
||||
<a href="#geterrormessage">below).</a>
|
||||
Negative error codes are also returned by other functions, and are documented
|
||||
with them. The codes are given names in the header file. If UTF checking is in
|
||||
force and an invalid UTF subject string is detected, one of a number of
|
||||
UTF-specific negative error codes is returned. Details are given in the
|
||||
<a href="pcre2unicode.html"><b>pcre2unicode</b></a>
|
||||
page. The following are the other errors that may be returned by
|
||||
<b>pcre2_match()</b>:
|
||||
|
@ -2521,8 +2535,29 @@ is attempted.
|
|||
PCRE2_ERROR_RECURSIONLIMIT
|
||||
</pre>
|
||||
The internal recursion limit was reached.
|
||||
<a name="geterrormessage"></a></P>
|
||||
<br><a name="SEC31" href="#TOC1">OBTAINING A TEXTUAL ERROR MESSAGE</a><br>
|
||||
<P>
|
||||
<b>int pcre2_get_error_message(int <i>errorcode</i>, PCRE2_UCHAR *<i>buffer</i>,</b>
|
||||
<b> PCRE2_SIZE <i>bufflen</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
A text message for an error code from any PCRE2 function (compile, match, or
|
||||
auxiliary) can be obtained by calling <b>pcre2_get_error_message()</b>. The code
|
||||
is passed as the first argument, with the remaining two arguments specifying a
|
||||
code unit buffer and its length, into which the text message is placed. Note
|
||||
that the message is returned in code units of the appropriate width for the
|
||||
library that is being used.
|
||||
</P>
|
||||
<P>
|
||||
The returned message is terminated with a trailing zero, and the function
|
||||
returns the number of code units used, excluding the trailing zero. If the
|
||||
error number is unknown, the negative error code PCRE2_ERROR_BADDATA is
|
||||
returned. If the buffer is too small, the message is truncated (but still with
|
||||
a trailing zero), and the negative error code PCRE2_ERROR_NOMEMORY is returned.
|
||||
None of the messages are very long; a buffer size of 120 code units is ample.
|
||||
<a name="extractbynumber"></a></P>
|
||||
<br><a name="SEC31" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a><br>
|
||||
<br><a name="SEC32" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a><br>
|
||||
<P>
|
||||
<b>int pcre2_substring_length_bynumber(pcre2_match_data *<i>match_data</i>,</b>
|
||||
<b> uint32_t <i>number</i>, PCRE2_SIZE *<i>length</i>);</b>
|
||||
|
@ -2619,7 +2654,7 @@ The substring did not participate in the match. For example, if the pattern is
|
|||
(abc)|(def) and the subject is "def", and the ovector contains at least two
|
||||
capturing slots, substring number 1 is unset.
|
||||
</P>
|
||||
<br><a name="SEC32" href="#TOC1">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a><br>
|
||||
<br><a name="SEC33" href="#TOC1">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a><br>
|
||||
<P>
|
||||
<b>int pcre2_substring_list_get(pcre2_match_data *<i>match_data</i>,</b>
|
||||
<b>" PCRE2_UCHAR ***<i>listptr</i>, PCRE2_SIZE **<i>lengthsptr</i>);</b>
|
||||
|
@ -2658,7 +2693,7 @@ can be distinguished from a genuine zero-length substring by inspecting the
|
|||
appropriate offset in the ovector, which contain PCRE2_UNSET for unset
|
||||
substrings, or by calling <b>pcre2_substring_length_bynumber()</b>.
|
||||
<a name="extractbyname"></a></P>
|
||||
<br><a name="SEC33" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a><br>
|
||||
<br><a name="SEC34" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a><br>
|
||||
<P>
|
||||
<b>int pcre2_substring_number_from_name(const pcre2_code *<i>code</i>,</b>
|
||||
<b> PCRE2_SPTR <i>name</i>);</b>
|
||||
|
@ -2718,7 +2753,7 @@ names are not included in the compiled code. The matching process uses only
|
|||
numbers. For this reason, the use of different names for subpatterns of the
|
||||
same number causes an error at compile time.
|
||||
</P>
|
||||
<br><a name="SEC34" href="#TOC1">CREATING A NEW STRING WITH SUBSTITUTIONS</a><br>
|
||||
<br><a name="SEC35" href="#TOC1">CREATING A NEW STRING WITH SUBSTITUTIONS</a><br>
|
||||
<P>
|
||||
<b>int pcre2_substitute(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
||||
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
||||
|
@ -2921,9 +2956,11 @@ started, which can happen if \K is used in an assertion).
|
|||
</P>
|
||||
<P>
|
||||
As for all PCRE2 errors, a text message that describes the error can be
|
||||
obtained by calling <b>pcre2_get_error_message()</b>.
|
||||
obtained by calling the <b>pcre2_get_error_message()</b> function (see
|
||||
"Obtaining a textual error message"
|
||||
<a href="#geterrormessage">above).</a>
|
||||
</P>
|
||||
<br><a name="SEC35" href="#TOC1">DUPLICATE SUBPATTERN NAMES</a><br>
|
||||
<br><a name="SEC36" href="#TOC1">DUPLICATE SUBPATTERN NAMES</a><br>
|
||||
<P>
|
||||
<b>int pcre2_substring_nametable_scan(const pcre2_code *<i>code</i>,</b>
|
||||
<b> PCRE2_SPTR <i>name</i>, PCRE2_SPTR *<i>first</i>, PCRE2_SPTR *<i>last</i>);</b>
|
||||
|
@ -2968,7 +3005,7 @@ in the section entitled <i>Information about a pattern</i>. Given all the
|
|||
relevant entries for the name, you can extract each of their numbers, and hence
|
||||
the captured data.
|
||||
</P>
|
||||
<br><a name="SEC36" href="#TOC1">FINDING ALL POSSIBLE MATCHES AT ONE POSITION</a><br>
|
||||
<br><a name="SEC37" href="#TOC1">FINDING ALL POSSIBLE MATCHES AT ONE POSITION</a><br>
|
||||
<P>
|
||||
The traditional matching function uses a similar algorithm to Perl, which stops
|
||||
when it finds the first match at a given point in the subject. If you want to
|
||||
|
@ -2986,7 +3023,7 @@ substring. Then return 1, which forces <b>pcre2_match()</b> to backtrack and try
|
|||
other alternatives. Ultimately, when it runs out of matches,
|
||||
<b>pcre2_match()</b> will yield PCRE2_ERROR_NOMATCH.
|
||||
<a name="dfamatch"></a></P>
|
||||
<br><a name="SEC37" href="#TOC1">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a><br>
|
||||
<br><a name="SEC38" href="#TOC1">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a><br>
|
||||
<P>
|
||||
<b>int pcre2_dfa_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
||||
<b> PCRE2_SIZE <i>length</i>, PCRE2_SIZE <i>startoffset</i>,</b>
|
||||
|
@ -3181,13 +3218,13 @@ some plausibility checks are made on the contents of the workspace, which
|
|||
should contain data about the previous partial match. If any of these checks
|
||||
fail, this error is given.
|
||||
</P>
|
||||
<br><a name="SEC38" href="#TOC1">SEE ALSO</a><br>
|
||||
<br><a name="SEC39" href="#TOC1">SEE ALSO</a><br>
|
||||
<P>
|
||||
<b>pcre2build</b>(3), <b>pcre2callout</b>(3), <b>pcre2demo(3)</b>,
|
||||
<b>pcre2matching</b>(3), <b>pcre2partial</b>(3), <b>pcre2posix</b>(3),
|
||||
<b>pcre2sample</b>(3), <b>pcre2stack</b>(3), <b>pcre2unicode</b>(3).
|
||||
</P>
|
||||
<br><a name="SEC39" href="#TOC1">AUTHOR</a><br>
|
||||
<br><a name="SEC40" href="#TOC1">AUTHOR</a><br>
|
||||
<P>
|
||||
Philip Hazel
|
||||
<br>
|
||||
|
@ -3196,9 +3233,9 @@ University Computing Service
|
|||
Cambridge, England.
|
||||
<br>
|
||||
</P>
|
||||
<br><a name="SEC40" href="#TOC1">REVISION</a><br>
|
||||
<br><a name="SEC41" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 05 June 2016
|
||||
Last updated: 17 June 2016
|
||||
<br>
|
||||
Copyright © 1997-2016 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -179,6 +179,13 @@ using the <b>pcre2_dfa_match()</b> function instead of the default
|
|||
<b>pcre2_match()</b>.
|
||||
</P>
|
||||
<P>
|
||||
<b>-error</b> <i>number[,number,...]</i>
|
||||
Call <b>pcre2_get_error_message()</b> for each of the error numbers in the
|
||||
comma-separated list, display the resulting messages on the standard output,
|
||||
then exit with zero exit code. The numbers may be positive or negative. This is
|
||||
a convenience facility for PCRE2 maintainers.
|
||||
</P>
|
||||
<P>
|
||||
<b>-help</b>
|
||||
Output a brief summary these options and then exit.
|
||||
</P>
|
||||
|
@ -1698,7 +1705,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC21" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 05 June 2016
|
||||
Last updated: 17 June 2016
|
||||
<br>
|
||||
Copyright © 1997-2016 University of Cambridge.
|
||||
<br>
|
||||
|
|
|
@ -1106,7 +1106,8 @@ COMPILING A PATTERN
|
|||
The pattern is defined by a pointer to a string of code units and a
|
||||
length. If the pattern is zero-terminated, the length can be specified
|
||||
as PCRE2_ZERO_TERMINATED. The function returns a pointer to a block of
|
||||
memory that contains the compiled pattern and related data.
|
||||
memory that contains the compiled pattern and related data, or NULL if
|
||||
an error occurred.
|
||||
|
||||
If the compile context argument ccontext is NULL, memory for the com-
|
||||
piled pattern is obtained by calling malloc(). Otherwise, it is
|
||||
|
@ -1126,10 +1127,10 @@ COMPILING A PATTERN
|
|||
|
||||
NOTE: When one of the matching functions is called, pointers to the
|
||||
compiled pattern and the subject string are set in the match data block
|
||||
so that they can be referenced by the extraction functions. After run-
|
||||
ning a match, you must not free a compiled pattern (or a subject
|
||||
string) until after all operations on the match data block have taken
|
||||
place.
|
||||
so that they can be referenced by the substring extraction functions.
|
||||
After running a match, you must not free a compiled pattern (or a sub-
|
||||
ject string) until after all operations on the match data block have
|
||||
taken place.
|
||||
|
||||
The options argument for pcre2_compile() contains various bit settings
|
||||
that affect the compilation. It should be zero if no options are
|
||||
|
@ -1148,13 +1149,17 @@ COMPILING A PATTERN
|
|||
above).
|
||||
|
||||
If errorcode or erroroffset is NULL, pcre2_compile() returns NULL imme-
|
||||
diately. Otherwise, if compilation of a pattern fails, pcre2_compile()
|
||||
returns NULL, having set these variables to an error code and an offset
|
||||
(number of code units) within the pattern, respectively. The
|
||||
pcre2_get_error_message() function provides a textual message for each
|
||||
error code. Compilation errors are positive numbers, but UTF formatting
|
||||
errors are negative numbers. For an invalid UTF-8 or UTF-16 string, the
|
||||
offset is that of the first code unit of the failing character.
|
||||
diately. Otherwise, the variables to which these point are set to an
|
||||
error code and an offset (number of code units) within the pattern,
|
||||
respectively, when pcre2_compile() returns NULL because a compilation
|
||||
error has occurred. The values are not defined when compilation is suc-
|
||||
cessful and pcre2_compile() returns a non-NULL value.
|
||||
|
||||
The pcre2_get_error_message() function (see "Obtaining a textual error
|
||||
message" below) provides a textual message for each error code. Compi-
|
||||
lation errors have positive error codes; UTF formatting error codes are
|
||||
negative. For an invalid UTF-8 or UTF-16 string, the offset is that of
|
||||
the first code unit of the failing character.
|
||||
|
||||
Some errors are not detected until the whole pattern has been scanned;
|
||||
in these cases, the offset passed back is the length of the pattern.
|
||||
|
@ -1515,11 +1520,12 @@ COMPILING A PATTERN
|
|||
COMPILATION ERROR CODES
|
||||
|
||||
There are over 80 positive error codes that pcre2_compile() may return
|
||||
if it finds an error in the pattern. There are also some negative error
|
||||
codes that are used for invalid UTF strings. These are the same as
|
||||
given by pcre2_match() and pcre2_dfa_match(), and are described in the
|
||||
pcre2unicode page. The pcre2_get_error_message() function can be called
|
||||
to obtain a textual error message from any error code.
|
||||
(via errorcode) if it finds an error in the pattern. There are also
|
||||
some negative error codes that are used for invalid UTF strings. These
|
||||
are the same as given by pcre2_match() and pcre2_dfa_match(), and are
|
||||
described in the pcre2unicode page. The pcre2_get_error_message() func-
|
||||
tion (see "Obtaining a textual error message" below) can be called to
|
||||
obtain a textual error message from any error code.
|
||||
|
||||
|
||||
JUST-IN-TIME (JIT) COMPILATION
|
||||
|
@ -2389,13 +2395,14 @@ OTHER INFORMATION ABOUT A MATCH
|
|||
ERROR RETURNS FROM pcre2_match()
|
||||
|
||||
If pcre2_match() fails, it returns a negative number. This can be con-
|
||||
verted to a text string by calling pcre2_get_error_message(). Negative
|
||||
error codes are also returned by other functions, and are documented
|
||||
with them. The codes are given names in the header file. If UTF check-
|
||||
ing is in force and an invalid UTF subject string is detected, one of a
|
||||
number of UTF-specific negative error codes is returned. Details are
|
||||
given in the pcre2unicode page. The following are the other errors that
|
||||
may be returned by pcre2_match():
|
||||
verted to a text string by calling the pcre2_get_error_message() func-
|
||||
tion (see "Obtaining a textual error message" below). Negative error
|
||||
codes are also returned by other functions, and are documented with
|
||||
them. The codes are given names in the header file. If UTF checking is
|
||||
in force and an invalid UTF subject string is detected, one of a number
|
||||
of UTF-specific negative error codes is returned. Details are given in
|
||||
the pcre2unicode page. The following are the other errors that may be
|
||||
returned by pcre2_match():
|
||||
|
||||
PCRE2_ERROR_NOMATCH
|
||||
|
||||
|
@ -2491,6 +2498,27 @@ ERROR RETURNS FROM pcre2_match()
|
|||
The internal recursion limit was reached.
|
||||
|
||||
|
||||
OBTAINING A TEXTUAL ERROR MESSAGE
|
||||
|
||||
int pcre2_get_error_message(int errorcode, PCRE2_UCHAR *buffer,
|
||||
PCRE2_SIZE bufflen);
|
||||
|
||||
A text message for an error code from any PCRE2 function (compile,
|
||||
match, or auxiliary) can be obtained by calling pcre2_get_error_mes-
|
||||
sage(). The code is passed as the first argument, with the remaining
|
||||
two arguments specifying a code unit buffer and its length, into which
|
||||
the text message is placed. Note that the message is returned in code
|
||||
units of the appropriate width for the library that is being used.
|
||||
|
||||
The returned message is terminated with a trailing zero, and the func-
|
||||
tion returns the number of code units used, excluding the trailing
|
||||
zero. If the error number is unknown, the negative error code
|
||||
PCRE2_ERROR_BADDATA is returned. If the buffer is too small, the mes-
|
||||
sage is truncated (but still with a trailing zero), and the negative
|
||||
error code PCRE2_ERROR_NOMEMORY is returned. None of the messages are
|
||||
very long; a buffer size of 120 code units is ample.
|
||||
|
||||
|
||||
EXTRACTING CAPTURED SUBSTRINGS BY NUMBER
|
||||
|
||||
int pcre2_substring_length_bynumber(pcre2_match_data *match_data,
|
||||
|
@ -2861,7 +2889,8 @@ CREATING A NEW STRING WITH SUBSTITUTIONS
|
|||
used in an assertion).
|
||||
|
||||
As for all PCRE2 errors, a text message that describes the error can be
|
||||
obtained by calling pcre2_get_error_message().
|
||||
obtained by calling the pcre2_get_error_message() function (see
|
||||
"Obtaining a textual error message" above).
|
||||
|
||||
|
||||
DUPLICATE SUBPATTERN NAMES
|
||||
|
@ -3122,7 +3151,7 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 05 June 2016
|
||||
Last updated: 17 June 2016
|
||||
Copyright (c) 1997-2016 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2_GET_ERROR_MESSAGE 3 "21 October 2014" "PCRE2 10.00"
|
||||
.TH PCRE2_GET_ERROR_MESSAGE 3 "17 June 2016" "PCRE2 10.22"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.SH SYNOPSIS
|
||||
|
@ -23,7 +23,10 @@ errors are negative numbers. The arguments are:
|
|||
\fIbufflen\fP the length of the buffer (code units)
|
||||
.sp
|
||||
The function returns the length of the message, excluding the trailing zero, or
|
||||
a negative error code if the buffer is too small.
|
||||
the negative error code PCRE2_ERROR_NOMEMORY if the buffer is too small. In
|
||||
this case, the returned message is truncated (but still with a trailing zero).
|
||||
If \fIerrorcode\fP does not contain a recognized error code number, the
|
||||
negative value PCRE2_ERROR_BADDATA is returned.
|
||||
.P
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
.\" HREF
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2API 3 "05 June 2016" "PCRE2 10.22"
|
||||
.TH PCRE2API 3 "17 June 2016" "PCRE2 10.22"
|
||||
.SH NAME
|
||||
PCRE2 - Perl-compatible regular expressions (revised API)
|
||||
.sp
|
||||
|
@ -1032,7 +1032,7 @@ The \fBpcre2_compile()\fP function compiles a pattern into an internal form.
|
|||
The pattern is defined by a pointer to a string of code units and a length. If
|
||||
the pattern is zero-terminated, the length can be specified as
|
||||
PCRE2_ZERO_TERMINATED. The function returns a pointer to a block of memory that
|
||||
contains the compiled pattern and related data.
|
||||
contains the compiled pattern and related data, or NULL if an error occurred.
|
||||
.P
|
||||
If the compile context argument \fIccontext\fP is NULL, memory for the compiled
|
||||
pattern is obtained by calling \fBmalloc()\fP. Otherwise, it is obtained from
|
||||
|
@ -1054,8 +1054,9 @@ to acquire a private copy of shared compiled code.
|
|||
.P
|
||||
NOTE: When one of the matching functions is called, pointers to the compiled
|
||||
pattern and the subject string are set in the match data block so that they can
|
||||
be referenced by the extraction functions. After running a match, you must not
|
||||
free a compiled pattern (or a subject string) until after all operations on the
|
||||
be referenced by the substring extraction functions. After running a match, you
|
||||
must not free a compiled pattern (or a subject string) until after all
|
||||
operations on the
|
||||
.\" HTML <a href="#matchdatablock">
|
||||
.\" </a>
|
||||
match data block
|
||||
|
@ -1086,13 +1087,22 @@ above).
|
|||
.\"
|
||||
.P
|
||||
If \fIerrorcode\fP or \fIerroroffset\fP is NULL, \fBpcre2_compile()\fP returns
|
||||
NULL immediately. Otherwise, if compilation of a pattern fails,
|
||||
\fBpcre2_compile()\fP returns NULL, having set these variables to an error code
|
||||
and an offset (number of code units) within the pattern, respectively. The
|
||||
\fBpcre2_get_error_message()\fP function provides a textual message for each
|
||||
error code. Compilation errors are positive numbers, but UTF formatting errors
|
||||
are negative numbers. For an invalid UTF-8 or UTF-16 string, the offset is that
|
||||
of the first code unit of the failing character.
|
||||
NULL immediately. Otherwise, the variables to which these point are set to an
|
||||
error code and an offset (number of code units) within the pattern,
|
||||
respectively, when \fBpcre2_compile()\fP returns NULL because a compilation
|
||||
error has occurred. The values are not defined when compilation is successful
|
||||
and \fBpcre2_compile()\fP returns a non-NULL value.
|
||||
.P
|
||||
The \fBpcre2_get_error_message()\fP function (see "Obtaining a textual error
|
||||
message"
|
||||
.\" HTML <a href="#geterrormessage">
|
||||
.\" </a>
|
||||
below)
|
||||
.\"
|
||||
provides a textual message for each error code. Compilation errors have
|
||||
positive error codes; UTF formatting error codes are negative. For an invalid
|
||||
UTF-8 or UTF-16 string, the offset is that of the first code unit of the
|
||||
failing character.
|
||||
.P
|
||||
Some errors are not detected until the whole pattern has been scanned; in these
|
||||
cases, the offset passed back is the length of the pattern. Note that the
|
||||
|
@ -1479,15 +1489,21 @@ page.
|
|||
.SH "COMPILATION ERROR CODES"
|
||||
.rs
|
||||
.sp
|
||||
There are over 80 positive error codes that \fBpcre2_compile()\fP may return if
|
||||
it finds an error in the pattern. There are also some negative error codes that
|
||||
are used for invalid UTF strings. These are the same as given by
|
||||
\fBpcre2_match()\fP and \fBpcre2_dfa_match()\fP, and are described in the
|
||||
There are over 80 positive error codes that \fBpcre2_compile()\fP may return
|
||||
(via \fIerrorcode\fP) if it finds an error in the pattern. There are also some
|
||||
negative error codes that are used for invalid UTF strings. These are the same
|
||||
as given by \fBpcre2_match()\fP and \fBpcre2_dfa_match()\fP, and are described
|
||||
in the
|
||||
.\" HREF
|
||||
\fBpcre2unicode\fP
|
||||
.\"
|
||||
page. The \fBpcre2_get_error_message()\fP function can be called to obtain a
|
||||
textual error message from any error code.
|
||||
page. The \fBpcre2_get_error_message()\fP function (see "Obtaining a textual
|
||||
error message"
|
||||
.\" HTML <a href="#geterrormessage">
|
||||
.\" </a>
|
||||
below)
|
||||
.\"
|
||||
can be called to obtain a textual error message from any error code.
|
||||
.
|
||||
.
|
||||
.\" HTML <a name="jitcompiling"></a>
|
||||
|
@ -2454,11 +2470,16 @@ page.
|
|||
.rs
|
||||
.sp
|
||||
If \fBpcre2_match()\fP fails, it returns a negative number. This can be
|
||||
converted to a text string by calling \fBpcre2_get_error_message()\fP. Negative
|
||||
error codes are also returned by other functions, and are documented with them.
|
||||
The codes are given names in the header file. If UTF checking is in force and
|
||||
an invalid UTF subject string is detected, one of a number of UTF-specific
|
||||
negative error codes is returned. Details are given in the
|
||||
converted to a text string by calling the \fBpcre2_get_error_message()\fP
|
||||
function (see "Obtaining a textual error message"
|
||||
.\" HTML <a href="#geterrormessage">
|
||||
.\" </a>
|
||||
below).
|
||||
.\"
|
||||
Negative error codes are also returned by other functions, and are documented
|
||||
with them. The codes are given names in the header file. If UTF checking is in
|
||||
force and an invalid UTF subject string is detected, one of a number of
|
||||
UTF-specific negative error codes is returned. Details are given in the
|
||||
.\" HREF
|
||||
\fBpcre2unicode\fP
|
||||
.\"
|
||||
|
@ -2571,6 +2592,30 @@ is attempted.
|
|||
The internal recursion limit was reached.
|
||||
.
|
||||
.
|
||||
.\" HTML <a name="geterrormessage"></a>
|
||||
.SH "OBTAINING A TEXTUAL ERROR MESSAGE"
|
||||
.rs
|
||||
.sp
|
||||
.nf
|
||||
.B int pcre2_get_error_message(int \fIerrorcode\fP, PCRE2_UCHAR *\fIbuffer\fP,
|
||||
.B " PCRE2_SIZE \fIbufflen\fP);"
|
||||
.fi
|
||||
.P
|
||||
A text message for an error code from any PCRE2 function (compile, match, or
|
||||
auxiliary) can be obtained by calling \fBpcre2_get_error_message()\fP. The code
|
||||
is passed as the first argument, with the remaining two arguments specifying a
|
||||
code unit buffer and its length, into which the text message is placed. Note
|
||||
that the message is returned in code units of the appropriate width for the
|
||||
library that is being used.
|
||||
.P
|
||||
The returned message is terminated with a trailing zero, and the function
|
||||
returns the number of code units used, excluding the trailing zero. If the
|
||||
error number is unknown, the negative error code PCRE2_ERROR_BADDATA is
|
||||
returned. If the buffer is too small, the message is truncated (but still with
|
||||
a trailing zero), and the negative error code PCRE2_ERROR_NOMEMORY is returned.
|
||||
None of the messages are very long; a buffer size of 120 code units is ample.
|
||||
.
|
||||
.
|
||||
.\" HTML <a name="extractbynumber"></a>
|
||||
.SH "EXTRACTING CAPTURED SUBSTRINGS BY NUMBER"
|
||||
.rs
|
||||
|
@ -2948,7 +2993,12 @@ substitution), and PCRE2_BADSUBPATTERN (the pattern match ended before it
|
|||
started, which can happen if \eK is used in an assertion).
|
||||
.P
|
||||
As for all PCRE2 errors, a text message that describes the error can be
|
||||
obtained by calling \fBpcre2_get_error_message()\fP.
|
||||
obtained by calling the \fBpcre2_get_error_message()\fP function (see
|
||||
"Obtaining a textual error message"
|
||||
.\" HTML <a href="#geterrormessage">
|
||||
.\" </a>
|
||||
above).
|
||||
.\"
|
||||
.
|
||||
.
|
||||
.SH "DUPLICATE SUBPATTERN NAMES"
|
||||
|
@ -3242,6 +3292,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 05 June 2016
|
||||
Last updated: 17 June 2016
|
||||
Copyright (c) 1997-2016 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.TH PCRE2TEST 1 "05 June 2016" "PCRE 10.22"
|
||||
.TH PCRE2TEST 1 "17 June 2016" "PCRE 10.22"
|
||||
.SH NAME
|
||||
pcre2test - a program for testing Perl-compatible regular expressions.
|
||||
.SH SYNOPSIS
|
||||
|
@ -143,6 +143,12 @@ Behave as if each subject line has the \fBdfa\fP modifier; matching is done
|
|||
using the \fBpcre2_dfa_match()\fP function instead of the default
|
||||
\fBpcre2_match()\fP.
|
||||
.TP 10
|
||||
\fB-error\fP \fInumber[,number,...]\fP
|
||||
Call \fBpcre2_get_error_message()\fP for each of the error numbers in the
|
||||
comma-separated list, display the resulting messages on the standard output,
|
||||
then exit with zero exit code. The numbers may be positive or negative. This is
|
||||
a convenience facility for PCRE2 maintainers.
|
||||
.TP 10
|
||||
\fB-help\fP
|
||||
Output a brief summary these options and then exit.
|
||||
.TP 10
|
||||
|
@ -1675,6 +1681,6 @@ Cambridge, England.
|
|||
.rs
|
||||
.sp
|
||||
.nf
|
||||
Last updated: 05 June 2016
|
||||
Last updated: 17 June 2016
|
||||
Copyright (c) 1997-2016 University of Cambridge.
|
||||
.fi
|
||||
|
|
|
@ -138,6 +138,13 @@ COMMAND LINE OPTIONS
|
|||
is done using the pcre2_dfa_match() function instead of the
|
||||
default pcre2_match().
|
||||
|
||||
-error number[,number,...]
|
||||
Call pcre2_get_error_message() for each of the error numbers
|
||||
in the comma-separated list, display the resulting messages
|
||||
on the standard output, then exit with zero exit code. The
|
||||
numbers may be positive or negative. This is a convenience
|
||||
facility for PCRE2 maintainers.
|
||||
|
||||
-help Output a brief summary these options and then exit.
|
||||
|
||||
-i Behave as if each pattern has the /info modifier; information
|
||||
|
@ -1539,5 +1546,5 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 05 June 2016
|
||||
Last updated: 17 June 2016
|
||||
Copyright (c) 1997-2016 University of Cambridge.
|
||||
|
|
|
@ -277,32 +277,32 @@ Returns: length of message if all is well
|
|||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
pcre2_get_error_message(int enumber, PCRE2_UCHAR *buffer, size_t size)
|
||||
{
|
||||
char xbuff[128];
|
||||
const unsigned char *message;
|
||||
size_t i;
|
||||
int n;
|
||||
|
||||
if (size == 0) return PCRE2_ERROR_NOMEMORY;
|
||||
|
||||
if (enumber > COMPILE_ERROR_BASE) /* Compile error */
|
||||
if (enumber >= COMPILE_ERROR_BASE) /* Compile error */
|
||||
{
|
||||
message = compile_error_texts;
|
||||
n = enumber - COMPILE_ERROR_BASE;
|
||||
}
|
||||
else /* Match or UTF error */
|
||||
else if (enumber < 0) /* Match or UTF error */
|
||||
{
|
||||
message = match_error_texts;
|
||||
n = -enumber;
|
||||
}
|
||||
else /* Invalid error number */
|
||||
{
|
||||
message = (unsigned char *)"\0"; /* Empty message list */
|
||||
n = 1;
|
||||
}
|
||||
|
||||
for (; n > 0; n--)
|
||||
{
|
||||
while (*message++ != CHAR_NULL) {};
|
||||
if (*message == CHAR_NULL)
|
||||
{
|
||||
sprintf(xbuff, "No text for error %d", enumber);
|
||||
break;
|
||||
}
|
||||
if (*message == CHAR_NULL) return PCRE2_ERROR_BADDATA;
|
||||
}
|
||||
|
||||
for (i = 0; *message != 0; i++)
|
||||
|
|
|
@ -6885,6 +6885,7 @@ printf(" pcre2-32 32 bit library support enabled [0, 1]\n");
|
|||
printf(" unicode Unicode and UTF support enabled [0, 1]\n");
|
||||
printf(" -d set default pattern control 'debug'\n");
|
||||
printf(" -dfa set default subject control 'dfa'\n");
|
||||
printf(" -error <n,m,..> show messages for error numbers, then exit\n");
|
||||
printf(" -help show usage information\n");
|
||||
printf(" -i set default pattern control 'info'\n");
|
||||
printf(" -jit set default pattern control 'jit'\n");
|
||||
|
@ -7062,6 +7063,7 @@ BOOL showtotaltimes = FALSE;
|
|||
BOOL skipping = FALSE;
|
||||
char *arg_subject = NULL;
|
||||
char *arg_pattern = NULL;
|
||||
char *arg_error = NULL;
|
||||
|
||||
/* The offsets to the options and control bits fields of the pattern and data
|
||||
control blocks must be the same so that common options and controls such as
|
||||
|
@ -7273,6 +7275,12 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
|
|||
/* The following options save their data for processing once we know what
|
||||
the running mode is. */
|
||||
|
||||
else if (strcmp(arg, "-error") == 0)
|
||||
{
|
||||
arg_error = argv[op+1];
|
||||
goto CHECK_VALUE_EXISTS;
|
||||
}
|
||||
|
||||
else if (strcmp(arg, "-subject") == 0)
|
||||
{
|
||||
arg_subject = argv[op+1];
|
||||
|
@ -7306,6 +7314,88 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0)
|
|||
argc--;
|
||||
}
|
||||
|
||||
/* If -error was present, get the error numbers, show the messages, and exit.
|
||||
We wait to do this until we know which mode we are in. */
|
||||
|
||||
if (arg_error != NULL)
|
||||
{
|
||||
int len;
|
||||
int errcode;
|
||||
char *endptr;
|
||||
|
||||
/* Ensure the relevant non-8-bit buffer is available. */
|
||||
|
||||
#ifdef SUPPORT_PCRE2_16
|
||||
if (test_mode == PCRE16_MODE)
|
||||
{
|
||||
pbuffer16_size = 256;
|
||||
pbuffer16 = (uint16_t *)malloc(pbuffer16_size);
|
||||
if (pbuffer16 == NULL)
|
||||
{
|
||||
fprintf(stderr, "pcre2test: malloc(%lu) failed for pbuffer16\n",
|
||||
(unsigned long int)pbuffer16_size);
|
||||
yield = 1;
|
||||
goto EXIT;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef SUPPORT_PCRE2_32
|
||||
if (test_mode == PCRE32_MODE)
|
||||
{
|
||||
pbuffer32_size = 256;
|
||||
pbuffer32 = (uint32_t *)malloc(pbuffer32_size);
|
||||
if (pbuffer32 == NULL)
|
||||
{
|
||||
fprintf(stderr, "pcre2test: malloc(%lu) failed for pbuffer32\n",
|
||||
(unsigned long int)pbuffer32_size);
|
||||
yield = 1;
|
||||
goto EXIT;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Loop along a list of error numbers. */
|
||||
|
||||
for (;;)
|
||||
{
|
||||
errcode = strtol(arg_error, &endptr, 10);
|
||||
if (*endptr != 0 && *endptr != CHAR_COMMA)
|
||||
{
|
||||
fprintf(stderr, "** '%s' is not a valid error number list\n", arg_error);
|
||||
yield = 1;
|
||||
goto EXIT;
|
||||
}
|
||||
printf("Error %d: ", errcode);
|
||||
PCRE2_GET_ERROR_MESSAGE(len, errcode, pbuffer);
|
||||
if (len < 0)
|
||||
{
|
||||
switch (len)
|
||||
{
|
||||
case PCRE2_ERROR_BADDATA:
|
||||
printf("PCRE2_ERROR_BADDATA (unknown error number)");
|
||||
break;
|
||||
|
||||
case PCRE2_ERROR_NOMEMORY:
|
||||
printf("PCRE2_ERROR_NOMEMORY (buffer too small)");
|
||||
break;
|
||||
|
||||
default:
|
||||
printf("Unexpected return (%d) from pcre2_get_error_message()", len);
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
PCHARSV(CASTVAR(void *, pbuffer), 0, len, FALSE, stdout);
|
||||
}
|
||||
printf("\n");
|
||||
if (*endptr == 0) goto EXIT;
|
||||
arg_error = endptr + 1;
|
||||
}
|
||||
/* Control never reaches here */
|
||||
} /* End of -error handling */
|
||||
|
||||
/* Initialize things that cannot be done until we know which test mode we are
|
||||
running in. When HEAP_MATCH_RECURSE is undefined, calling pcre2_set_recursion_
|
||||
memory_management() is a no-op, but we call it in order to exercise it. Also
|
||||
|
|
|
@ -15187,3 +15187,11 @@ Failed: error 122 at offset 10: unmatched closing parenthesis
|
|||
No match
|
||||
|
||||
# End of testinput2
|
||||
Error -63: PCRE2_ERROR_BADDATA (unknown error number)
|
||||
Error -62: bad serialized data
|
||||
Error -2: partial match
|
||||
Error -1: no match
|
||||
Error 0: PCRE2_ERROR_BADDATA (unknown error number)
|
||||
Error 100: no error
|
||||
Error 188: pattern string is longer than the limit set by the application
|
||||
Error 189: PCRE2_ERROR_BADDATA (unknown error number)
|
||||
|
|
Loading…
Reference in New Issue