File tidies for 10.00-RC2.
This commit is contained in:
parent
e34c44e2aa
commit
2a5767d757
|
@ -1,7 +1,7 @@
|
||||||
Change Log for PCRE2
|
Change Log for PCRE2
|
||||||
--------------------
|
--------------------
|
||||||
|
|
||||||
Version 10.00 28-November-2014
|
Version 10.00 19-December-2014
|
||||||
------------------------------
|
------------------------------
|
||||||
|
|
||||||
Version 10.00 is the first release of PCRE2, a revised API for the PCRE
|
Version 10.00 is the first release of PCRE2, a revised API for the PCRE
|
||||||
|
@ -14,7 +14,8 @@ logged. In addition to the API changes, the following changes were made. They
|
||||||
are either new functionality, or bug fixes and other noticeable changes of
|
are either new functionality, or bug fixes and other noticeable changes of
|
||||||
behaviour that were implemented after the code had been forked.
|
behaviour that were implemented after the code had been forked.
|
||||||
|
|
||||||
1. Unicode support is now enabled by default.
|
1. Unicode support is now enabled by default, but it can optionally be
|
||||||
|
disabled.
|
||||||
|
|
||||||
2. The test program, now called pcre2test, was re-specified and almost
|
2. The test program, now called pcre2test, was re-specified and almost
|
||||||
completely re-written. Its input is not compatible with input for pcretest.
|
completely re-written. Its input is not compatible with input for pcretest.
|
||||||
|
|
2
NEWS
2
NEWS
|
@ -1,7 +1,7 @@
|
||||||
News about PCRE2 releases
|
News about PCRE2 releases
|
||||||
-------------------------
|
-------------------------
|
||||||
|
|
||||||
Version 10.00 28-November-2014
|
Version 10.00 19-December-2014
|
||||||
------------------------------
|
------------------------------
|
||||||
|
|
||||||
Version 10.00 is the first release of PCRE2, a revised API for the PCRE
|
Version 10.00 is the first release of PCRE2, a revised API for the PCRE
|
||||||
|
|
|
@ -11,7 +11,7 @@ dnl be defined as -RC2, for example. For real releases, it should be empty.
|
||||||
m4_define(pcre2_major, [10])
|
m4_define(pcre2_major, [10])
|
||||||
m4_define(pcre2_minor, [00])
|
m4_define(pcre2_minor, [00])
|
||||||
m4_define(pcre2_prerelease, [-RC2])
|
m4_define(pcre2_prerelease, [-RC2])
|
||||||
m4_define(pcre2_date, [2014-11-28])
|
m4_define(pcre2_date, [2014-12-19])
|
||||||
|
|
||||||
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
||||||
# 50 lines of this file. Please update that if the variables above are moved.
|
# 50 lines of this file. Please update that if the variables above are moved.
|
||||||
|
|
|
@ -36,8 +36,16 @@ by name, into a given buffer. The arguments are:
|
||||||
</pre>
|
</pre>
|
||||||
The <i>bufflen</i> variable is updated to contain the length of the extracted
|
The <i>bufflen</i> variable is updated to contain the length of the extracted
|
||||||
string, excluding the trailing zero. The yield of the function is zero for
|
string, excluding the trailing zero. The yield of the function is zero for
|
||||||
success, PCRE2_ERROR_NOMEMORY if the buffer is too small, or
|
success or one of the following error numbers:
|
||||||
PCRE2_ERROR_NOSUBSTRING if the string name is invalid.
|
<pre>
|
||||||
|
PCRE2_ERROR_NOSUBSTRING there are no groups of that name
|
||||||
|
PCRE2_ERROR_UNAVAILBLE the ovector was too small for that group
|
||||||
|
PCRE2_ERROR_UNSET the group did not participate in the match
|
||||||
|
PCRE2_ERROR_NOMEMORY the buffer is not big enough
|
||||||
|
</pre>
|
||||||
|
If there is more than one group with the given name, the first one that is set
|
||||||
|
is returned. In this situation PCRE2_ERROR_UNSET means that no group with the
|
||||||
|
given name was set.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
There is a complete description of the PCRE2 native API in the
|
There is a complete description of the PCRE2 native API in the
|
||||||
|
|
|
@ -36,9 +36,15 @@ buffer. The arguments are:
|
||||||
<i>bufflen</i> Length of buffer
|
<i>bufflen</i> Length of buffer
|
||||||
</pre>
|
</pre>
|
||||||
The <i>bufflen</i> variable is updated with the length of the extracted string,
|
The <i>bufflen</i> variable is updated with the length of the extracted string,
|
||||||
excluding the terminating zero. The yield of the function is zero for success,
|
excluding the terminating zero. The yield of the function is zero for success
|
||||||
PCRE2_ERROR_NOMEMORY if the buffer was too small, or PCRE2_ERROR_NOSUBSTRING if
|
or one of the following error numbers:
|
||||||
the string number is invalid.
|
<pre>
|
||||||
|
PCRE2_ERROR_NOSUBSTRING there are no groups of that number
|
||||||
|
PCRE2_ERROR_UNAVAILBLE the ovector was too small for that group
|
||||||
|
PCRE2_ERROR_UNSET the group did not participate in the match
|
||||||
|
PCRE2_ERROR_NOMEMORY the buffer is too small
|
||||||
|
|
||||||
|
</PRE>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
There is a complete description of the PCRE2 native API in the
|
There is a complete description of the PCRE2 native API in the
|
||||||
|
|
|
@ -37,9 +37,17 @@ newly acquired memory. The arguments are:
|
||||||
The memory in which the substring is placed is obtained by calling the same
|
The memory in which the substring is placed is obtained by calling the same
|
||||||
memory allocation function that was used for the match data block. The
|
memory allocation function that was used for the match data block. The
|
||||||
convenience function <b>pcre2_substring_free()</b> can be used to free it when
|
convenience function <b>pcre2_substring_free()</b> can be used to free it when
|
||||||
it is no longer needed. The yield of the function is zero for success,
|
it is no longer needed. The yield of the function is zero for success or one of
|
||||||
PCRE2_ERROR_NOMEMORY if sufficient memory could not be obtained, or
|
the following error numbers:
|
||||||
PCRE2_ERROR_NOSUBSTRING if the string name is invalid.
|
<pre>
|
||||||
|
PCRE2_ERROR_NOSUBSTRING there are no groups of that name
|
||||||
|
PCRE2_ERROR_UNAVAILBLE the ovector was too small for that group
|
||||||
|
PCRE2_ERROR_UNSET the group did not participate in the match
|
||||||
|
PCRE2_ERROR_NOMEMORY memory could not be obtained
|
||||||
|
</pre>
|
||||||
|
If there is more than one group with the given name, the first one that is set
|
||||||
|
is returned. In this situation PCRE2_ERROR_UNSET means that no group with the
|
||||||
|
given name was set.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
There is a complete description of the PCRE2 native API in the
|
There is a complete description of the PCRE2 native API in the
|
||||||
|
|
|
@ -37,9 +37,15 @@ into newly acquired memory. The arguments are:
|
||||||
The memory in which the substring is placed is obtained by calling the same
|
The memory in which the substring is placed is obtained by calling the same
|
||||||
memory allocation function that was used for the match data block. The
|
memory allocation function that was used for the match data block. The
|
||||||
convenience function <b>pcre2_substring_free()</b> can be used to free it when
|
convenience function <b>pcre2_substring_free()</b> can be used to free it when
|
||||||
it is no longer needed. The yield of the function is zero for success,
|
it is no longer needed. The yield of the function is zero for success or one of
|
||||||
PCRE2_ERROR_NOMEMORY if sufficient memory could not be obtained, or
|
the following error numbers:
|
||||||
PCRE2_ERROR_NOSUBSTRING if the string number is invalid.
|
<pre>
|
||||||
|
PCRE2_ERROR_NOSUBSTRING there are no groups of that number
|
||||||
|
PCRE2_ERROR_UNAVAILBLE the ovector was too small for that group
|
||||||
|
PCRE2_ERROR_UNSET the group did not participate in the match
|
||||||
|
PCRE2_ERROR_NOMEMORY memory could not be obtained
|
||||||
|
|
||||||
|
</PRE>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
There is a complete description of the PCRE2 native API in the
|
There is a complete description of the PCRE2 native API in the
|
||||||
|
|
|
@ -947,6 +947,14 @@ contains the compiled pattern and related data. The caller must free the memory
|
||||||
by calling <b>pcre2_code_free()</b> when it is no longer needed.
|
by calling <b>pcre2_code_free()</b> when it is no longer needed.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
NOTE: When one of the matching functions is called, pointers to the compiled
|
||||||
|
pattern and the subject string are set in the match data block so that they can
|
||||||
|
be referenced by the extraction functions. After running a match, you must not
|
||||||
|
free a compiled pattern (or a subject string) until after all operations on the
|
||||||
|
<a href="#matchdatablock">match data block</a>
|
||||||
|
have taken place.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
If the compile context argument <i>ccontext</i> is NULL, memory for the compiled
|
If the compile context argument <i>ccontext</i> is NULL, memory for the compiled
|
||||||
pattern is obtained by calling <b>malloc()</b>. Otherwise, it is obtained from
|
pattern is obtained by calling <b>malloc()</b>. Otherwise, it is obtained from
|
||||||
the same memory function that was used for the compile context.
|
the same memory function that was used for the compile context.
|
||||||
|
@ -1690,7 +1698,7 @@ pattern with the JIT compiler does not alter the value returned by this option.
|
||||||
<b>void pcre2_match_data_free(pcre2_match_data *<i>match_data</i>);</b>
|
<b>void pcre2_match_data_free(pcre2_match_data *<i>match_data</i>);</b>
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Information about successful and unsuccessful matches is placed in a match
|
Information about a successful or unsuccessful match is placed in a match
|
||||||
data block, which is an opaque structure that is accessed by function calls. In
|
data block, which is an opaque structure that is accessed by function calls. In
|
||||||
particular, the match data block contains a vector of offsets into the subject
|
particular, the match data block contains a vector of offsets into the subject
|
||||||
string that define the matched part of the subject and any substrings that were
|
string that define the matched part of the subject and any substrings that were
|
||||||
|
@ -1724,15 +1732,24 @@ pattern (custom or default).
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
A match data block can be used many times, with the same or different compiled
|
A match data block can be used many times, with the same or different compiled
|
||||||
patterns. When it is no longer needed, it should be freed by calling
|
patterns. You can extract information from a match data block after a match
|
||||||
<b>pcre2_match_data_free()</b>. You can extract information from a match data
|
operation has finished, using functions that are described in the sections on
|
||||||
block after a match operation has finished, using functions that are described
|
|
||||||
in the sections on
|
|
||||||
<a href="#matchedstrings">matched strings</a>
|
<a href="#matchedstrings">matched strings</a>
|
||||||
and
|
and
|
||||||
<a href="#matchotherdata">other match data</a>
|
<a href="#matchotherdata">other match data</a>
|
||||||
below.
|
below.
|
||||||
</P>
|
</P>
|
||||||
|
<P>
|
||||||
|
When one of the matching functions is called, pointers to the compiled pattern
|
||||||
|
and the subject string are set in the match data block so that they can be
|
||||||
|
referenced by the extraction functions. After running a match, you must not
|
||||||
|
free a compiled pattern or a subject string until after all operations on the
|
||||||
|
match data block (for that match) have taken place.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
When a match data block itself is no longer needed, it should be freed by
|
||||||
|
calling <b>pcre2_match_data_free()</b>.
|
||||||
|
</P>
|
||||||
<br><a name="SEC23" href="#TOC1">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a><br>
|
<br><a name="SEC23" href="#TOC1">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a><br>
|
||||||
<P>
|
<P>
|
||||||
<b>int pcre2_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
<b>int pcre2_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
||||||
|
@ -2034,8 +2051,14 @@ from a successful match is 1, indicating that just the first pair of offsets
|
||||||
has been set.
|
has been set.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If a capturing subpattern is matched repeatedly within a single match
|
If a pattern uses the \K escape sequence within a positive assertion, the
|
||||||
operation, it is the last portion of the string that it matched that is
|
reported start of the match can be greater than the end of the match. For
|
||||||
|
example, if the pattern (?=ab\K) is matched against "ab", the start and end
|
||||||
|
offset values for the match are 2 and 0.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
If a capturing subpattern group is matched repeatedly within a single match
|
||||||
|
operation, it is the last portion of the subject that it matched that is
|
||||||
returned.
|
returned.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
|
@ -2234,25 +2257,34 @@ Captured substrings can be accessed directly by using the ovector as described
|
||||||
<a href="#matchedstrings">above.</a>
|
<a href="#matchedstrings">above.</a>
|
||||||
For convenience, auxiliary functions are provided for extracting captured
|
For convenience, auxiliary functions are provided for extracting captured
|
||||||
substrings as new, separate, zero-terminated strings. The functions in this
|
substrings as new, separate, zero-terminated strings. The functions in this
|
||||||
section identify substrings by number. The next section describes similar
|
section identify substrings by number. The number zero refers to the entire
|
||||||
functions for extracting substrings by name. A substring that contains a binary
|
matched substring, with higher numbers referring to substrings captured by
|
||||||
zero is correctly extracted and has a further zero added on the end, but the
|
parenthesized groups. The next section describes similar functions for
|
||||||
result is not, of course, a C string.
|
extracting captured substrings by name. A substring that contains a binary zero
|
||||||
|
is correctly extracted and has a further zero added on the end, but the result
|
||||||
|
is not, of course, a C string.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
If a pattern uses the \K escape sequence within a positive assertion, the
|
||||||
|
reported start of the match can be greater than the end of the match. For
|
||||||
|
example, if the pattern (?=ab\K) is matched against "ab", the start and end
|
||||||
|
offset values for the match are 2 and 0. In this situation, calling these
|
||||||
|
functions with a zero substring number extracts a zero-length empty string.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
You can find the length in code units of a captured substring without
|
You can find the length in code units of a captured substring without
|
||||||
extracting it by calling <b>pcre2_substring_length_bynumber()</b>. The first
|
extracting it by calling <b>pcre2_substring_length_bynumber()</b>. The first
|
||||||
argument is a pointer to the match data block, the second is the group number,
|
argument is a pointer to the match data block, the second is the group number,
|
||||||
and the third is a pointer to a variable into which the length is placed.
|
and the third is a pointer to a variable into which the length is placed. If
|
||||||
|
you just want to know whether or not the substring has been captured, you can
|
||||||
|
pass the third argument as NULL.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The <b>pcre2_substring_copy_bynumber()</b> function copies one string into a
|
The <b>pcre2_substring_copy_bynumber()</b> function copies a captured substring
|
||||||
supplied buffer, whereas <b>pcre2_substring_get_bynumber()</b> copies it into
|
into a supplied buffer, whereas <b>pcre2_substring_get_bynumber()</b> copies it
|
||||||
new memory, obtained using the same memory allocation function that was used
|
into new memory, obtained using the same memory allocation function that was
|
||||||
for the match data block. The first two arguments of these functions are a
|
used for the match data block. The first two arguments of these functions are a
|
||||||
pointer to the match data block and a capturing group number. A group number of
|
pointer to the match data block and a capturing group number.
|
||||||
zero extracts the substring that matched the entire pattern, and higher values
|
|
||||||
extract the captured substrings.
|
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The final arguments of <b>pcre2_substring_copy_bynumber()</b> are a pointer to
|
The final arguments of <b>pcre2_substring_copy_bynumber()</b> are a pointer to
|
||||||
|
@ -2268,8 +2300,9 @@ zero. When the substring is no longer needed, the memory should be freed by
|
||||||
calling <b>pcre2_substring_free()</b>.
|
calling <b>pcre2_substring_free()</b>.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
The return value from these functions is zero for success, or one of these
|
The return value from all these functions is zero for success, or a negative
|
||||||
error codes:
|
error code. If the pattern match failed, the match failure code is returned.
|
||||||
|
Other possible error codes are:
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_ERROR_NOMEMORY
|
PCRE2_ERROR_NOMEMORY
|
||||||
</pre>
|
</pre>
|
||||||
|
@ -2278,10 +2311,20 @@ attempt to get memory failed for <b>pcre2_substring_get_bynumber()</b>.
|
||||||
<pre>
|
<pre>
|
||||||
PCRE2_ERROR_NOSUBSTRING
|
PCRE2_ERROR_NOSUBSTRING
|
||||||
</pre>
|
</pre>
|
||||||
No substring with the given number was captured. This could be because there is
|
There is no substring with that number in the pattern, that is, the number is
|
||||||
no capturing group of that number in the pattern, or because the group with
|
greater than the number of capturing parentheses.
|
||||||
that number did not participate in the match, or because the ovector was too
|
<pre>
|
||||||
small to capture that group.
|
PCRE2_ERROR_UNAVAILABLE
|
||||||
|
</pre>
|
||||||
|
The substring number, though not greater than the number of captures in the
|
||||||
|
pattern, is greater than the number of slots in the ovector, so the substring
|
||||||
|
could not be captured.
|
||||||
|
<pre>
|
||||||
|
PCRE2_ERROR_UNSET
|
||||||
|
</pre>
|
||||||
|
The substring did not participate in the match. For example, if the pattern is
|
||||||
|
(abc)|(def) and the subject is "def", and the ovector contains at least two
|
||||||
|
capturing slots, substring number 1 is unset.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC29" href="#TOC1">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a><br>
|
<br><a name="SEC29" href="#TOC1">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -2316,7 +2359,7 @@ capturing subpattern number <i>n+1</i> matches some part of the subject, but
|
||||||
subpattern <i>n</i> has not been used at all, it returns an empty string. This
|
subpattern <i>n</i> has not been used at all, it returns an empty string. This
|
||||||
can be distinguished from a genuine zero-length substring by inspecting the
|
can be distinguished from a genuine zero-length substring by inspecting the
|
||||||
appropriate offset in the ovector, which contain PCRE2_UNSET for unset
|
appropriate offset in the ovector, which contain PCRE2_UNSET for unset
|
||||||
substrings.
|
substrings, or by calling <b>pcre2_substring_length_bynumber()</b>.
|
||||||
<a name="extractbyname"></a></P>
|
<a name="extractbyname"></a></P>
|
||||||
<br><a name="SEC30" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a><br>
|
<br><a name="SEC30" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a><br>
|
||||||
<P>
|
<P>
|
||||||
|
@ -2350,14 +2393,22 @@ calling <b>pcre2_substring_number_from_name()</b>. The first argument is the
|
||||||
compiled pattern, and the second is the name. The yield of the function is the
|
compiled pattern, and the second is the name. The yield of the function is the
|
||||||
subpattern number, PCRE2_ERROR_NOSUBSTRING if there is no subpattern of that
|
subpattern number, PCRE2_ERROR_NOSUBSTRING if there is no subpattern of that
|
||||||
name, or PCRE2_ERROR_NOUNIQUESUBSTRING if there is more than one subpattern of
|
name, or PCRE2_ERROR_NOUNIQUESUBSTRING if there is more than one subpattern of
|
||||||
that name.
|
that name. Given the number, you can extract the substring directly, or use one
|
||||||
|
of the functions described above.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
Given the number, you can extract the substring directly, or use one of the
|
For convenience, there are also "byname" functions that correspond to the
|
||||||
functions described above. For convenience, there are also "byname" functions
|
"bynumber" functions, the only difference being that the second argument is a
|
||||||
that correspond to the "bynumber" functions, the only difference being that the
|
name instead of a number. If PCRE2_DUPNAMES is set and there are duplicate
|
||||||
second argument is a name instead of a number. However, if PCRE2_DUPNAMES is
|
names, these functions scan all the groups with the given name, and return the
|
||||||
set and there are duplicate names, the behaviour may not be what you want.
|
first named string that is set.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
If there are no groups with the given name, PCRE2_ERROR_NOSUBSTRING is
|
||||||
|
returned. If all groups with the name have numbers that are greater than the
|
||||||
|
number of slots in the ovector, PCRE2_ERROR_UNAVAILABLE is returned. If there
|
||||||
|
is at least one group with a slot in the ovector, but no group is found to be
|
||||||
|
set, PCRE2_ERROR_UNSET is returned.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
<b>Warning:</b> If the pattern uses the (?| feature to set up multiple
|
<b>Warning:</b> If the pattern uses the (?| feature to set up multiple
|
||||||
|
@ -2451,9 +2502,9 @@ documentation.
|
||||||
<P>
|
<P>
|
||||||
When duplicates are present, <b>pcre2_substring_copy_byname()</b> and
|
When duplicates are present, <b>pcre2_substring_copy_byname()</b> and
|
||||||
<b>pcre2_substring_get_byname()</b> return the first substring corresponding to
|
<b>pcre2_substring_get_byname()</b> return the first substring corresponding to
|
||||||
the given name that is set. If none are set, PCRE2_ERROR_NOSUBSTRING is
|
the given name that is set. Only if none are set is PCRE2_ERROR_UNSET is
|
||||||
returned. The <b>pcre2_substring_number_from_name()</b> function returns
|
returned. The <b>pcre2_substring_number_from_name()</b> function returns the
|
||||||
the error PCRE2_ERROR_NOUNIQUESUBSTRING.
|
error PCRE2_ERROR_NOUNIQUESUBSTRING when there are duplicate names.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
If you want to get full details of all captured substrings for a given name,
|
If you want to get full details of all captured substrings for a given name,
|
||||||
|
@ -2607,17 +2658,38 @@ is matched against the string
|
||||||
</pre>
|
</pre>
|
||||||
the three matched strings are
|
the three matched strings are
|
||||||
<pre>
|
<pre>
|
||||||
<something>
|
|
||||||
<something> <something else>
|
|
||||||
<something> <something else> <something further>
|
<something> <something else> <something further>
|
||||||
|
<something> <something else>
|
||||||
|
<something>
|
||||||
</pre>
|
</pre>
|
||||||
On success, the yield of the function is a number greater than zero, which is
|
On success, the yield of the function is a number greater than zero, which is
|
||||||
the number of matched substrings. The offsets of the substrings are returned in
|
the number of matched substrings. The offsets of the substrings are returned in
|
||||||
the ovector, and can be extracted in the same way as for <b>pcre2_match()</b>.
|
the ovector, and can be extracted by number in the same way as for
|
||||||
They are returned in reverse order of length; that is, the longest
|
<b>pcre2_match()</b>, but the numbers bear no relation to any capturing groups
|
||||||
matching string is given first. If there were too many matches to fit into
|
that may exist in the pattern, because DFA matching does not support group
|
||||||
the ovector, the yield of the function is zero, and the vector is filled with
|
capture.
|
||||||
the longest matches.
|
</P>
|
||||||
|
<P>
|
||||||
|
Calls to the convenience functions that extract substrings by name
|
||||||
|
return the error PCRE2_ERROR_DFA_UFUNC (unsupported function) if used after a
|
||||||
|
DFA match. The convenience functions that extract substrings by number never
|
||||||
|
return PCRE2_ERROR_NOSUBSTRING, and the meanings of some other errors are
|
||||||
|
slightly different:
|
||||||
|
<pre>
|
||||||
|
PCRE2_ERROR_UNAVAILABLE
|
||||||
|
</pre>
|
||||||
|
The ovector is not big enough to include a slot for the given substring number.
|
||||||
|
<pre>
|
||||||
|
PCRE2_ERROR_UNSET
|
||||||
|
</pre>
|
||||||
|
There is a slot in the ovector for this substring, but there were insufficient
|
||||||
|
matches to fill it.
|
||||||
|
</P>
|
||||||
|
<P>
|
||||||
|
The matched strings are stored in the ovector in reverse order of length; that
|
||||||
|
is, the longest matching string is first. If there were too many matches to fit
|
||||||
|
into the ovector, the yield of the function is zero, and the vector is filled
|
||||||
|
with the longest matches.
|
||||||
</P>
|
</P>
|
||||||
<P>
|
<P>
|
||||||
NOTE: PCRE2's "auto-possessification" optimization usually applies to character
|
NOTE: PCRE2's "auto-possessification" optimization usually applies to character
|
||||||
|
@ -2685,7 +2757,7 @@ Cambridge, England.
|
||||||
</P>
|
</P>
|
||||||
<br><a name="SEC37" href="#TOC1">REVISION</a><br>
|
<br><a name="SEC37" href="#TOC1">REVISION</a><br>
|
||||||
<P>
|
<P>
|
||||||
Last updated: 01 December 2014
|
Last updated: 14 December 2014
|
||||||
<br>
|
<br>
|
||||||
Copyright © 1997-2014 University of Cambridge.
|
Copyright © 1997-2014 University of Cambridge.
|
||||||
<br>
|
<br>
|
||||||
|
|
958
doc/pcre2.txt
958
doc/pcre2.txt
File diff suppressed because it is too large
Load Diff
|
@ -201,7 +201,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
||||||
#define PACKAGE_NAME "PCRE2"
|
#define PACKAGE_NAME "PCRE2"
|
||||||
|
|
||||||
/* Define to the full name and version of this package. */
|
/* Define to the full name and version of this package. */
|
||||||
#define PACKAGE_STRING "PCRE2 10.00-RC1"
|
#define PACKAGE_STRING "PCRE2 10.00-RC2"
|
||||||
|
|
||||||
/* Define to the one symbol short name of this package. */
|
/* Define to the one symbol short name of this package. */
|
||||||
#define PACKAGE_TARNAME "pcre2"
|
#define PACKAGE_TARNAME "pcre2"
|
||||||
|
@ -210,7 +210,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
||||||
#define PACKAGE_URL ""
|
#define PACKAGE_URL ""
|
||||||
|
|
||||||
/* Define to the version of this package. */
|
/* Define to the version of this package. */
|
||||||
#define PACKAGE_VERSION "10.00-RC1"
|
#define PACKAGE_VERSION "10.00-RC2"
|
||||||
|
|
||||||
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||||
|
@ -288,7 +288,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
||||||
/* #undef SUPPORT_VALGRIND */
|
/* #undef SUPPORT_VALGRIND */
|
||||||
|
|
||||||
/* Version number of package */
|
/* Version number of package */
|
||||||
#define VERSION "10.00-RC1"
|
#define VERSION "10.00-RC2"
|
||||||
|
|
||||||
/* Define to empty if `const' does not conform to ANSI C. */
|
/* Define to empty if `const' does not conform to ANSI C. */
|
||||||
/* #undef const */
|
/* #undef const */
|
||||||
|
|
|
@ -43,8 +43,8 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define PCRE2_MAJOR 10
|
#define PCRE2_MAJOR 10
|
||||||
#define PCRE2_MINOR 00
|
#define PCRE2_MINOR 00
|
||||||
#define PCRE2_PRERELEASE -RC1
|
#define PCRE2_PRERELEASE -RC2
|
||||||
#define PCRE2_DATE 2014-11-28
|
#define PCRE2_DATE 2014-12-19
|
||||||
|
|
||||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||||
imported have to be identified as such. When building PCRE2, the appropriate
|
imported have to be identified as such. When building PCRE2, the appropriate
|
||||||
|
@ -80,20 +80,20 @@ uint8_t, UCHAR_MAX, etc are defined. */
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* The following options can be passed to pcre2_compile(), pcre2_match(), or
|
/* The following option bits can be passed to pcre2_compile(), pcre2_match(),
|
||||||
pcre2_dfa_match(). PCRE2_NO_UTF_CHECK affects only the function to which it is
|
or pcre2_dfa_match(). PCRE2_NO_UTF_CHECK affects only the function to which it
|
||||||
passed. Put these bits at the most significant end of the options word so
|
is passed. Put these bits at the most significant end of the options word so
|
||||||
others can be added next to them */
|
others can be added next to them */
|
||||||
|
|
||||||
#define PCRE2_ANCHORED 0x80000000u
|
#define PCRE2_ANCHORED 0x80000000u
|
||||||
#define PCRE2_NO_UTF_CHECK 0x40000000u
|
#define PCRE2_NO_UTF_CHECK 0x40000000u
|
||||||
|
|
||||||
/* Other options that can be passed to pcre2_compile(). They may affect
|
/* The following option bits can be passed only to pcre2_compile(). However,
|
||||||
compilation, JIT compilation, and/or interpretive execution. The following tags
|
they may affect compilation, JIT compilation, and/or interpretive execution.
|
||||||
indicate which:
|
The following tags indicate which:
|
||||||
|
|
||||||
C alters what is compiled
|
C alters what is compiled by pcre2_compile()
|
||||||
J alters what JIT compiles
|
J alters what is compiled by pcre2_jit_compile()
|
||||||
M is inspected during pcre2_match() execution
|
M is inspected during pcre2_match() execution
|
||||||
D is inspected during pcre2_dfa_match() execution
|
D is inspected during pcre2_dfa_match() execution
|
||||||
*/
|
*/
|
||||||
|
@ -212,19 +212,21 @@ context functions. */
|
||||||
#define PCRE2_ERROR_DFA_BADRESTART (-38)
|
#define PCRE2_ERROR_DFA_BADRESTART (-38)
|
||||||
#define PCRE2_ERROR_DFA_RECURSE (-39)
|
#define PCRE2_ERROR_DFA_RECURSE (-39)
|
||||||
#define PCRE2_ERROR_DFA_UCOND (-40)
|
#define PCRE2_ERROR_DFA_UCOND (-40)
|
||||||
#define PCRE2_ERROR_DFA_UITEM (-41)
|
#define PCRE2_ERROR_DFA_UFUNC (-41)
|
||||||
#define PCRE2_ERROR_DFA_WSSIZE (-42)
|
#define PCRE2_ERROR_DFA_UITEM (-42)
|
||||||
#define PCRE2_ERROR_INTERNAL (-43)
|
#define PCRE2_ERROR_DFA_WSSIZE (-43)
|
||||||
#define PCRE2_ERROR_JIT_BADOPTION (-44)
|
#define PCRE2_ERROR_INTERNAL (-44)
|
||||||
#define PCRE2_ERROR_JIT_STACKLIMIT (-45)
|
#define PCRE2_ERROR_JIT_BADOPTION (-45)
|
||||||
#define PCRE2_ERROR_MATCHLIMIT (-46)
|
#define PCRE2_ERROR_JIT_STACKLIMIT (-46)
|
||||||
#define PCRE2_ERROR_NOMEMORY (-47)
|
#define PCRE2_ERROR_MATCHLIMIT (-47)
|
||||||
#define PCRE2_ERROR_NOSUBSTRING (-48)
|
#define PCRE2_ERROR_NOMEMORY (-48)
|
||||||
#define PCRE2_ERROR_NOUNIQUESUBSTRING (-49)
|
#define PCRE2_ERROR_NOSUBSTRING (-49)
|
||||||
#define PCRE2_ERROR_NULL (-50)
|
#define PCRE2_ERROR_NOUNIQUESUBSTRING (-50)
|
||||||
#define PCRE2_ERROR_RECURSELOOP (-51)
|
#define PCRE2_ERROR_NULL (-51)
|
||||||
#define PCRE2_ERROR_RECURSIONLIMIT (-52)
|
#define PCRE2_ERROR_RECURSELOOP (-52)
|
||||||
#define PCRE2_ERROR_UNSET (-53)
|
#define PCRE2_ERROR_RECURSIONLIMIT (-53)
|
||||||
|
#define PCRE2_ERROR_UNAVAILABLE (-54)
|
||||||
|
#define PCRE2_ERROR_UNSET (-55)
|
||||||
|
|
||||||
/* Request types for pcre2_pattern_info() */
|
/* Request types for pcre2_pattern_info() */
|
||||||
|
|
||||||
|
@ -434,16 +436,16 @@ PCRE2_EXP_DECL PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *);
|
||||||
PCRE2_EXP_DECL int pcre2_substring_copy_byname(pcre2_match_data *, \
|
PCRE2_EXP_DECL int pcre2_substring_copy_byname(pcre2_match_data *, \
|
||||||
PCRE2_SPTR, PCRE2_UCHAR *, PCRE2_SIZE *); \
|
PCRE2_SPTR, PCRE2_UCHAR *, PCRE2_SIZE *); \
|
||||||
PCRE2_EXP_DECL int pcre2_substring_copy_bynumber(pcre2_match_data *, \
|
PCRE2_EXP_DECL int pcre2_substring_copy_bynumber(pcre2_match_data *, \
|
||||||
unsigned int, PCRE2_UCHAR *, PCRE2_SIZE *); \
|
uint32_t, PCRE2_UCHAR *, PCRE2_SIZE *); \
|
||||||
PCRE2_EXP_DECL void pcre2_substring_free(PCRE2_UCHAR *); \
|
PCRE2_EXP_DECL void pcre2_substring_free(PCRE2_UCHAR *); \
|
||||||
PCRE2_EXP_DECL int pcre2_substring_get_byname(pcre2_match_data *, \
|
PCRE2_EXP_DECL int pcre2_substring_get_byname(pcre2_match_data *, \
|
||||||
PCRE2_SPTR, PCRE2_UCHAR **, PCRE2_SIZE *); \
|
PCRE2_SPTR, PCRE2_UCHAR **, PCRE2_SIZE *); \
|
||||||
PCRE2_EXP_DECL int pcre2_substring_get_bynumber(pcre2_match_data *, \
|
PCRE2_EXP_DECL int pcre2_substring_get_bynumber(pcre2_match_data *, \
|
||||||
unsigned int, PCRE2_UCHAR **, PCRE2_SIZE *); \
|
uint32_t, PCRE2_UCHAR **, PCRE2_SIZE *); \
|
||||||
PCRE2_EXP_DECL int pcre2_substring_length_byname(pcre2_match_data *, \
|
PCRE2_EXP_DECL int pcre2_substring_length_byname(pcre2_match_data *, \
|
||||||
PCRE2_SPTR, PCRE2_SIZE *); \
|
PCRE2_SPTR, PCRE2_SIZE *); \
|
||||||
PCRE2_EXP_DECL int pcre2_substring_length_bynumber(pcre2_match_data *, \
|
PCRE2_EXP_DECL int pcre2_substring_length_bynumber(pcre2_match_data *, \
|
||||||
unsigned int, PCRE2_SIZE *); \
|
uint32_t, PCRE2_SIZE *); \
|
||||||
PCRE2_EXP_DECL int pcre2_substring_nametable_scan(const pcre2_code *, \
|
PCRE2_EXP_DECL int pcre2_substring_nametable_scan(const pcre2_code *, \
|
||||||
PCRE2_SPTR, PCRE2_SPTR *, PCRE2_SPTR *); \
|
PCRE2_SPTR, PCRE2_SPTR *, PCRE2_SPTR *); \
|
||||||
PCRE2_EXP_DECL int pcre2_substring_number_from_name(\
|
PCRE2_EXP_DECL int pcre2_substring_number_from_name(\
|
||||||
|
|
Loading…
Reference in New Issue