File tidies for 10.00-RC2.
This commit is contained in:
parent
e34c44e2aa
commit
2a5767d757
|
@ -1,7 +1,7 @@
|
|||
Change Log for PCRE2
|
||||
--------------------
|
||||
|
||||
Version 10.00 28-November-2014
|
||||
Version 10.00 19-December-2014
|
||||
------------------------------
|
||||
|
||||
Version 10.00 is the first release of PCRE2, a revised API for the PCRE
|
||||
|
@ -14,7 +14,8 @@ logged. In addition to the API changes, the following changes were made. They
|
|||
are either new functionality, or bug fixes and other noticeable changes of
|
||||
behaviour that were implemented after the code had been forked.
|
||||
|
||||
1. Unicode support is now enabled by default.
|
||||
1. Unicode support is now enabled by default, but it can optionally be
|
||||
disabled.
|
||||
|
||||
2. The test program, now called pcre2test, was re-specified and almost
|
||||
completely re-written. Its input is not compatible with input for pcretest.
|
||||
|
|
2
NEWS
2
NEWS
|
@ -1,7 +1,7 @@
|
|||
News about PCRE2 releases
|
||||
-------------------------
|
||||
|
||||
Version 10.00 28-November-2014
|
||||
Version 10.00 19-December-2014
|
||||
------------------------------
|
||||
|
||||
Version 10.00 is the first release of PCRE2, a revised API for the PCRE
|
||||
|
|
|
@ -11,7 +11,7 @@ dnl be defined as -RC2, for example. For real releases, it should be empty.
|
|||
m4_define(pcre2_major, [10])
|
||||
m4_define(pcre2_minor, [00])
|
||||
m4_define(pcre2_prerelease, [-RC2])
|
||||
m4_define(pcre2_date, [2014-11-28])
|
||||
m4_define(pcre2_date, [2014-12-19])
|
||||
|
||||
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
||||
# 50 lines of this file. Please update that if the variables above are moved.
|
||||
|
|
|
@ -36,8 +36,16 @@ by name, into a given buffer. The arguments are:
|
|||
</pre>
|
||||
The <i>bufflen</i> variable is updated to contain the length of the extracted
|
||||
string, excluding the trailing zero. The yield of the function is zero for
|
||||
success, PCRE2_ERROR_NOMEMORY if the buffer is too small, or
|
||||
PCRE2_ERROR_NOSUBSTRING if the string name is invalid.
|
||||
success or one of the following error numbers:
|
||||
<pre>
|
||||
PCRE2_ERROR_NOSUBSTRING there are no groups of that name
|
||||
PCRE2_ERROR_UNAVAILBLE the ovector was too small for that group
|
||||
PCRE2_ERROR_UNSET the group did not participate in the match
|
||||
PCRE2_ERROR_NOMEMORY the buffer is not big enough
|
||||
</pre>
|
||||
If there is more than one group with the given name, the first one that is set
|
||||
is returned. In this situation PCRE2_ERROR_UNSET means that no group with the
|
||||
given name was set.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
|
|
|
@ -36,9 +36,15 @@ buffer. The arguments are:
|
|||
<i>bufflen</i> Length of buffer
|
||||
</pre>
|
||||
The <i>bufflen</i> variable is updated with the length of the extracted string,
|
||||
excluding the terminating zero. The yield of the function is zero for success,
|
||||
PCRE2_ERROR_NOMEMORY if the buffer was too small, or PCRE2_ERROR_NOSUBSTRING if
|
||||
the string number is invalid.
|
||||
excluding the terminating zero. The yield of the function is zero for success
|
||||
or one of the following error numbers:
|
||||
<pre>
|
||||
PCRE2_ERROR_NOSUBSTRING there are no groups of that number
|
||||
PCRE2_ERROR_UNAVAILBLE the ovector was too small for that group
|
||||
PCRE2_ERROR_UNSET the group did not participate in the match
|
||||
PCRE2_ERROR_NOMEMORY the buffer is too small
|
||||
|
||||
</PRE>
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
|
|
|
@ -37,9 +37,17 @@ newly acquired memory. The arguments are:
|
|||
The memory in which the substring is placed is obtained by calling the same
|
||||
memory allocation function that was used for the match data block. The
|
||||
convenience function <b>pcre2_substring_free()</b> can be used to free it when
|
||||
it is no longer needed. The yield of the function is zero for success,
|
||||
PCRE2_ERROR_NOMEMORY if sufficient memory could not be obtained, or
|
||||
PCRE2_ERROR_NOSUBSTRING if the string name is invalid.
|
||||
it is no longer needed. The yield of the function is zero for success or one of
|
||||
the following error numbers:
|
||||
<pre>
|
||||
PCRE2_ERROR_NOSUBSTRING there are no groups of that name
|
||||
PCRE2_ERROR_UNAVAILBLE the ovector was too small for that group
|
||||
PCRE2_ERROR_UNSET the group did not participate in the match
|
||||
PCRE2_ERROR_NOMEMORY memory could not be obtained
|
||||
</pre>
|
||||
If there is more than one group with the given name, the first one that is set
|
||||
is returned. In this situation PCRE2_ERROR_UNSET means that no group with the
|
||||
given name was set.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
|
|
|
@ -37,9 +37,15 @@ into newly acquired memory. The arguments are:
|
|||
The memory in which the substring is placed is obtained by calling the same
|
||||
memory allocation function that was used for the match data block. The
|
||||
convenience function <b>pcre2_substring_free()</b> can be used to free it when
|
||||
it is no longer needed. The yield of the function is zero for success,
|
||||
PCRE2_ERROR_NOMEMORY if sufficient memory could not be obtained, or
|
||||
PCRE2_ERROR_NOSUBSTRING if the string number is invalid.
|
||||
it is no longer needed. The yield of the function is zero for success or one of
|
||||
the following error numbers:
|
||||
<pre>
|
||||
PCRE2_ERROR_NOSUBSTRING there are no groups of that number
|
||||
PCRE2_ERROR_UNAVAILBLE the ovector was too small for that group
|
||||
PCRE2_ERROR_UNSET the group did not participate in the match
|
||||
PCRE2_ERROR_NOMEMORY memory could not be obtained
|
||||
|
||||
</PRE>
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
|
|
|
@ -947,6 +947,14 @@ contains the compiled pattern and related data. The caller must free the memory
|
|||
by calling <b>pcre2_code_free()</b> when it is no longer needed.
|
||||
</P>
|
||||
<P>
|
||||
NOTE: When one of the matching functions is called, pointers to the compiled
|
||||
pattern and the subject string are set in the match data block so that they can
|
||||
be referenced by the extraction functions. After running a match, you must not
|
||||
free a compiled pattern (or a subject string) until after all operations on the
|
||||
<a href="#matchdatablock">match data block</a>
|
||||
have taken place.
|
||||
</P>
|
||||
<P>
|
||||
If the compile context argument <i>ccontext</i> is NULL, memory for the compiled
|
||||
pattern is obtained by calling <b>malloc()</b>. Otherwise, it is obtained from
|
||||
the same memory function that was used for the compile context.
|
||||
|
@ -1690,7 +1698,7 @@ pattern with the JIT compiler does not alter the value returned by this option.
|
|||
<b>void pcre2_match_data_free(pcre2_match_data *<i>match_data</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
Information about successful and unsuccessful matches is placed in a match
|
||||
Information about a successful or unsuccessful match is placed in a match
|
||||
data block, which is an opaque structure that is accessed by function calls. In
|
||||
particular, the match data block contains a vector of offsets into the subject
|
||||
string that define the matched part of the subject and any substrings that were
|
||||
|
@ -1724,15 +1732,24 @@ pattern (custom or default).
|
|||
</P>
|
||||
<P>
|
||||
A match data block can be used many times, with the same or different compiled
|
||||
patterns. When it is no longer needed, it should be freed by calling
|
||||
<b>pcre2_match_data_free()</b>. You can extract information from a match data
|
||||
block after a match operation has finished, using functions that are described
|
||||
in the sections on
|
||||
patterns. You can extract information from a match data block after a match
|
||||
operation has finished, using functions that are described in the sections on
|
||||
<a href="#matchedstrings">matched strings</a>
|
||||
and
|
||||
<a href="#matchotherdata">other match data</a>
|
||||
below.
|
||||
</P>
|
||||
<P>
|
||||
When one of the matching functions is called, pointers to the compiled pattern
|
||||
and the subject string are set in the match data block so that they can be
|
||||
referenced by the extraction functions. After running a match, you must not
|
||||
free a compiled pattern or a subject string until after all operations on the
|
||||
match data block (for that match) have taken place.
|
||||
</P>
|
||||
<P>
|
||||
When a match data block itself is no longer needed, it should be freed by
|
||||
calling <b>pcre2_match_data_free()</b>.
|
||||
</P>
|
||||
<br><a name="SEC23" href="#TOC1">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a><br>
|
||||
<P>
|
||||
<b>int pcre2_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
||||
|
@ -2034,8 +2051,14 @@ from a successful match is 1, indicating that just the first pair of offsets
|
|||
has been set.
|
||||
</P>
|
||||
<P>
|
||||
If a capturing subpattern is matched repeatedly within a single match
|
||||
operation, it is the last portion of the string that it matched that is
|
||||
If a pattern uses the \K escape sequence within a positive assertion, the
|
||||
reported start of the match can be greater than the end of the match. For
|
||||
example, if the pattern (?=ab\K) is matched against "ab", the start and end
|
||||
offset values for the match are 2 and 0.
|
||||
</P>
|
||||
<P>
|
||||
If a capturing subpattern group is matched repeatedly within a single match
|
||||
operation, it is the last portion of the subject that it matched that is
|
||||
returned.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -2234,25 +2257,34 @@ Captured substrings can be accessed directly by using the ovector as described
|
|||
<a href="#matchedstrings">above.</a>
|
||||
For convenience, auxiliary functions are provided for extracting captured
|
||||
substrings as new, separate, zero-terminated strings. The functions in this
|
||||
section identify substrings by number. The next section describes similar
|
||||
functions for extracting substrings by name. A substring that contains a binary
|
||||
zero is correctly extracted and has a further zero added on the end, but the
|
||||
result is not, of course, a C string.
|
||||
section identify substrings by number. The number zero refers to the entire
|
||||
matched substring, with higher numbers referring to substrings captured by
|
||||
parenthesized groups. The next section describes similar functions for
|
||||
extracting captured substrings by name. A substring that contains a binary zero
|
||||
is correctly extracted and has a further zero added on the end, but the result
|
||||
is not, of course, a C string.
|
||||
</P>
|
||||
<P>
|
||||
If a pattern uses the \K escape sequence within a positive assertion, the
|
||||
reported start of the match can be greater than the end of the match. For
|
||||
example, if the pattern (?=ab\K) is matched against "ab", the start and end
|
||||
offset values for the match are 2 and 0. In this situation, calling these
|
||||
functions with a zero substring number extracts a zero-length empty string.
|
||||
</P>
|
||||
<P>
|
||||
You can find the length in code units of a captured substring without
|
||||
extracting it by calling <b>pcre2_substring_length_bynumber()</b>. The first
|
||||
argument is a pointer to the match data block, the second is the group number,
|
||||
and the third is a pointer to a variable into which the length is placed.
|
||||
and the third is a pointer to a variable into which the length is placed. If
|
||||
you just want to know whether or not the substring has been captured, you can
|
||||
pass the third argument as NULL.
|
||||
</P>
|
||||
<P>
|
||||
The <b>pcre2_substring_copy_bynumber()</b> function copies one string into a
|
||||
supplied buffer, whereas <b>pcre2_substring_get_bynumber()</b> copies it into
|
||||
new memory, obtained using the same memory allocation function that was used
|
||||
for the match data block. The first two arguments of these functions are a
|
||||
pointer to the match data block and a capturing group number. A group number of
|
||||
zero extracts the substring that matched the entire pattern, and higher values
|
||||
extract the captured substrings.
|
||||
The <b>pcre2_substring_copy_bynumber()</b> function copies a captured substring
|
||||
into a supplied buffer, whereas <b>pcre2_substring_get_bynumber()</b> copies it
|
||||
into new memory, obtained using the same memory allocation function that was
|
||||
used for the match data block. The first two arguments of these functions are a
|
||||
pointer to the match data block and a capturing group number.
|
||||
</P>
|
||||
<P>
|
||||
The final arguments of <b>pcre2_substring_copy_bynumber()</b> are a pointer to
|
||||
|
@ -2268,8 +2300,9 @@ zero. When the substring is no longer needed, the memory should be freed by
|
|||
calling <b>pcre2_substring_free()</b>.
|
||||
</P>
|
||||
<P>
|
||||
The return value from these functions is zero for success, or one of these
|
||||
error codes:
|
||||
The return value from all these functions is zero for success, or a negative
|
||||
error code. If the pattern match failed, the match failure code is returned.
|
||||
Other possible error codes are:
|
||||
<pre>
|
||||
PCRE2_ERROR_NOMEMORY
|
||||
</pre>
|
||||
|
@ -2278,10 +2311,20 @@ attempt to get memory failed for <b>pcre2_substring_get_bynumber()</b>.
|
|||
<pre>
|
||||
PCRE2_ERROR_NOSUBSTRING
|
||||
</pre>
|
||||
No substring with the given number was captured. This could be because there is
|
||||
no capturing group of that number in the pattern, or because the group with
|
||||
that number did not participate in the match, or because the ovector was too
|
||||
small to capture that group.
|
||||
There is no substring with that number in the pattern, that is, the number is
|
||||
greater than the number of capturing parentheses.
|
||||
<pre>
|
||||
PCRE2_ERROR_UNAVAILABLE
|
||||
</pre>
|
||||
The substring number, though not greater than the number of captures in the
|
||||
pattern, is greater than the number of slots in the ovector, so the substring
|
||||
could not be captured.
|
||||
<pre>
|
||||
PCRE2_ERROR_UNSET
|
||||
</pre>
|
||||
The substring did not participate in the match. For example, if the pattern is
|
||||
(abc)|(def) and the subject is "def", and the ovector contains at least two
|
||||
capturing slots, substring number 1 is unset.
|
||||
</P>
|
||||
<br><a name="SEC29" href="#TOC1">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a><br>
|
||||
<P>
|
||||
|
@ -2316,7 +2359,7 @@ capturing subpattern number <i>n+1</i> matches some part of the subject, but
|
|||
subpattern <i>n</i> has not been used at all, it returns an empty string. This
|
||||
can be distinguished from a genuine zero-length substring by inspecting the
|
||||
appropriate offset in the ovector, which contain PCRE2_UNSET for unset
|
||||
substrings.
|
||||
substrings, or by calling <b>pcre2_substring_length_bynumber()</b>.
|
||||
<a name="extractbyname"></a></P>
|
||||
<br><a name="SEC30" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a><br>
|
||||
<P>
|
||||
|
@ -2350,14 +2393,22 @@ calling <b>pcre2_substring_number_from_name()</b>. The first argument is the
|
|||
compiled pattern, and the second is the name. The yield of the function is the
|
||||
subpattern number, PCRE2_ERROR_NOSUBSTRING if there is no subpattern of that
|
||||
name, or PCRE2_ERROR_NOUNIQUESUBSTRING if there is more than one subpattern of
|
||||
that name.
|
||||
that name. Given the number, you can extract the substring directly, or use one
|
||||
of the functions described above.
|
||||
</P>
|
||||
<P>
|
||||
Given the number, you can extract the substring directly, or use one of the
|
||||
functions described above. For convenience, there are also "byname" functions
|
||||
that correspond to the "bynumber" functions, the only difference being that the
|
||||
second argument is a name instead of a number. However, if PCRE2_DUPNAMES is
|
||||
set and there are duplicate names, the behaviour may not be what you want.
|
||||
For convenience, there are also "byname" functions that correspond to the
|
||||
"bynumber" functions, the only difference being that the second argument is a
|
||||
name instead of a number. If PCRE2_DUPNAMES is set and there are duplicate
|
||||
names, these functions scan all the groups with the given name, and return the
|
||||
first named string that is set.
|
||||
</P>
|
||||
<P>
|
||||
If there are no groups with the given name, PCRE2_ERROR_NOSUBSTRING is
|
||||
returned. If all groups with the name have numbers that are greater than the
|
||||
number of slots in the ovector, PCRE2_ERROR_UNAVAILABLE is returned. If there
|
||||
is at least one group with a slot in the ovector, but no group is found to be
|
||||
set, PCRE2_ERROR_UNSET is returned.
|
||||
</P>
|
||||
<P>
|
||||
<b>Warning:</b> If the pattern uses the (?| feature to set up multiple
|
||||
|
@ -2451,9 +2502,9 @@ documentation.
|
|||
<P>
|
||||
When duplicates are present, <b>pcre2_substring_copy_byname()</b> and
|
||||
<b>pcre2_substring_get_byname()</b> return the first substring corresponding to
|
||||
the given name that is set. If none are set, PCRE2_ERROR_NOSUBSTRING is
|
||||
returned. The <b>pcre2_substring_number_from_name()</b> function returns
|
||||
the error PCRE2_ERROR_NOUNIQUESUBSTRING.
|
||||
the given name that is set. Only if none are set is PCRE2_ERROR_UNSET is
|
||||
returned. The <b>pcre2_substring_number_from_name()</b> function returns the
|
||||
error PCRE2_ERROR_NOUNIQUESUBSTRING when there are duplicate names.
|
||||
</P>
|
||||
<P>
|
||||
If you want to get full details of all captured substrings for a given name,
|
||||
|
@ -2607,17 +2658,38 @@ is matched against the string
|
|||
</pre>
|
||||
the three matched strings are
|
||||
<pre>
|
||||
<something>
|
||||
<something> <something else>
|
||||
<something> <something else> <something further>
|
||||
<something> <something else>
|
||||
<something>
|
||||
</pre>
|
||||
On success, the yield of the function is a number greater than zero, which is
|
||||
the number of matched substrings. The offsets of the substrings are returned in
|
||||
the ovector, and can be extracted in the same way as for <b>pcre2_match()</b>.
|
||||
They are returned in reverse order of length; that is, the longest
|
||||
matching string is given first. If there were too many matches to fit into
|
||||
the ovector, the yield of the function is zero, and the vector is filled with
|
||||
the longest matches.
|
||||
the ovector, and can be extracted by number in the same way as for
|
||||
<b>pcre2_match()</b>, but the numbers bear no relation to any capturing groups
|
||||
that may exist in the pattern, because DFA matching does not support group
|
||||
capture.
|
||||
</P>
|
||||
<P>
|
||||
Calls to the convenience functions that extract substrings by name
|
||||
return the error PCRE2_ERROR_DFA_UFUNC (unsupported function) if used after a
|
||||
DFA match. The convenience functions that extract substrings by number never
|
||||
return PCRE2_ERROR_NOSUBSTRING, and the meanings of some other errors are
|
||||
slightly different:
|
||||
<pre>
|
||||
PCRE2_ERROR_UNAVAILABLE
|
||||
</pre>
|
||||
The ovector is not big enough to include a slot for the given substring number.
|
||||
<pre>
|
||||
PCRE2_ERROR_UNSET
|
||||
</pre>
|
||||
There is a slot in the ovector for this substring, but there were insufficient
|
||||
matches to fill it.
|
||||
</P>
|
||||
<P>
|
||||
The matched strings are stored in the ovector in reverse order of length; that
|
||||
is, the longest matching string is first. If there were too many matches to fit
|
||||
into the ovector, the yield of the function is zero, and the vector is filled
|
||||
with the longest matches.
|
||||
</P>
|
||||
<P>
|
||||
NOTE: PCRE2's "auto-possessification" optimization usually applies to character
|
||||
|
@ -2685,7 +2757,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC37" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 01 December 2014
|
||||
Last updated: 14 December 2014
|
||||
<br>
|
||||
Copyright © 1997-2014 University of Cambridge.
|
||||
<br>
|
||||
|
|
162
doc/pcre2.txt
162
doc/pcre2.txt
|
@ -995,6 +995,13 @@ COMPILING A PATTERN
|
|||
must free the memory by calling pcre2_code_free() when it is no longer
|
||||
needed.
|
||||
|
||||
NOTE: When one of the matching functions is called, pointers to the
|
||||
compiled pattern and the subject string are set in the match data block
|
||||
so that they can be referenced by the extraction functions. After run-
|
||||
ning a match, you must not free a compiled pattern (or a subject
|
||||
string) until after all operations on the match data block have taken
|
||||
place.
|
||||
|
||||
If the compile context argument ccontext is NULL, memory for the com-
|
||||
piled pattern is obtained by calling malloc(). Otherwise, it is
|
||||
obtained from the same memory function that was used for the compile
|
||||
|
@ -1710,7 +1717,7 @@ THE MATCH DATA BLOCK
|
|||
|
||||
void pcre2_match_data_free(pcre2_match_data *match_data);
|
||||
|
||||
Information about successful and unsuccessful matches is placed in a
|
||||
Information about a successful or unsuccessful match is placed in a
|
||||
match data block, which is an opaque structure that is accessed by
|
||||
function calls. In particular, the match data block contains a vector
|
||||
of offsets into the subject string that define the matched part of the
|
||||
|
@ -1741,11 +1748,20 @@ THE MATCH DATA BLOCK
|
|||
was used for the compiled pattern (custom or default).
|
||||
|
||||
A match data block can be used many times, with the same or different
|
||||
compiled patterns. When it is no longer needed, it should be freed by
|
||||
calling pcre2_match_data_free(). You can extract information from a
|
||||
match data block after a match operation has finished, using functions
|
||||
that are described in the sections on matched strings and other match
|
||||
data below.
|
||||
compiled patterns. You can extract information from a match data block
|
||||
after a match operation has finished, using functions that are
|
||||
described in the sections on matched strings and other match data
|
||||
below.
|
||||
|
||||
When one of the matching functions is called, pointers to the compiled
|
||||
pattern and the subject string are set in the match data block so that
|
||||
they can be referenced by the extraction functions. After running a
|
||||
match, you must not free a compiled pattern or a subject string until
|
||||
after all operations on the match data block (for that match) have
|
||||
taken place.
|
||||
|
||||
When a match data block itself is no longer needed, it should be freed
|
||||
by calling pcre2_match_data_free().
|
||||
|
||||
|
||||
MATCHING A PATTERN: THE TRADITIONAL FUNCTION
|
||||
|
@ -2017,9 +2033,14 @@ HOW PCRE2_MATCH() RETURNS A STRING AND CAPTURED SUBSTRINGS
|
|||
subpatterns, the return value from a successful match is 1, indicating
|
||||
that just the first pair of offsets has been set.
|
||||
|
||||
If a capturing subpattern is matched repeatedly within a single match
|
||||
operation, it is the last portion of the string that it matched that is
|
||||
returned.
|
||||
If a pattern uses the \K escape sequence within a positive assertion,
|
||||
the reported start of the match can be greater than the end of the
|
||||
match. For example, if the pattern (?=ab\K) is matched against "ab",
|
||||
the start and end offset values for the match are 2 and 0.
|
||||
|
||||
If a capturing subpattern group is matched repeatedly within a single
|
||||
match operation, it is the last portion of the subject that it matched
|
||||
that is returned.
|
||||
|
||||
If the ovector is too small to hold all the captured substring offsets,
|
||||
as much as possible is filled in, and the function returns a value of
|
||||
|
@ -2205,24 +2226,33 @@ EXTRACTING CAPTURED SUBSTRINGS BY NUMBER
|
|||
described above. For convenience, auxiliary functions are provided for
|
||||
extracting captured substrings as new, separate, zero-terminated
|
||||
strings. The functions in this section identify substrings by number.
|
||||
The next section describes similar functions for extracting substrings
|
||||
by name. A substring that contains a binary zero is correctly extracted
|
||||
and has a further zero added on the end, but the result is not, of
|
||||
course, a C string.
|
||||
The number zero refers to the entire matched substring, with higher
|
||||
numbers referring to substrings captured by parenthesized groups. The
|
||||
next section describes similar functions for extracting captured sub-
|
||||
strings by name. A substring that contains a binary zero is correctly
|
||||
extracted and has a further zero added on the end, but the result is
|
||||
not, of course, a C string.
|
||||
|
||||
If a pattern uses the \K escape sequence within a positive assertion,
|
||||
the reported start of the match can be greater than the end of the
|
||||
match. For example, if the pattern (?=ab\K) is matched against "ab",
|
||||
the start and end offset values for the match are 2 and 0. In this sit-
|
||||
uation, calling these functions with a zero substring number extracts a
|
||||
zero-length empty string.
|
||||
|
||||
You can find the length in code units of a captured substring without
|
||||
extracting it by calling pcre2_substring_length_bynumber(). The first
|
||||
argument is a pointer to the match data block, the second is the group
|
||||
number, and the third is a pointer to a variable into which the length
|
||||
is placed.
|
||||
is placed. If you just want to know whether or not the substring has
|
||||
been captured, you can pass the third argument as NULL.
|
||||
|
||||
The pcre2_substring_copy_bynumber() function copies one string into a
|
||||
supplied buffer, whereas pcre2_substring_get_bynumber() copies it into
|
||||
new memory, obtained using the same memory allocation function that was
|
||||
used for the match data block. The first two arguments of these func-
|
||||
tions are a pointer to the match data block and a capturing group num-
|
||||
ber. A group number of zero extracts the substring that matched the
|
||||
entire pattern, and higher values extract the captured substrings.
|
||||
The pcre2_substring_copy_bynumber() function copies a captured sub-
|
||||
string into a supplied buffer, whereas pcre2_substring_get_bynumber()
|
||||
copies it into new memory, obtained using the same memory allocation
|
||||
function that was used for the match data block. The first two argu-
|
||||
ments of these functions are a pointer to the match data block and a
|
||||
capturing group number.
|
||||
|
||||
The final arguments of pcre2_substring_copy_bynumber() are a pointer to
|
||||
the buffer and a pointer to a variable that contains its length in code
|
||||
|
@ -2235,8 +2265,9 @@ EXTRACTING CAPTURED SUBSTRINGS BY NUMBER
|
|||
terminating zero. When the substring is no longer needed, the memory
|
||||
should be freed by calling pcre2_substring_free().
|
||||
|
||||
The return value from these functions is zero for success, or one of
|
||||
these error codes:
|
||||
The return value from all these functions is zero for success, or a
|
||||
negative error code. If the pattern match failed, the match failure
|
||||
code is returned. Other possible error codes are:
|
||||
|
||||
PCRE2_ERROR_NOMEMORY
|
||||
|
||||
|
@ -2245,10 +2276,20 @@ EXTRACTING CAPTURED SUBSTRINGS BY NUMBER
|
|||
|
||||
PCRE2_ERROR_NOSUBSTRING
|
||||
|
||||
No substring with the given number was captured. This could be because
|
||||
there is no capturing group of that number in the pattern, or because
|
||||
the group with that number did not participate in the match, or because
|
||||
the ovector was too small to capture that group.
|
||||
There is no substring with that number in the pattern, that is, the
|
||||
number is greater than the number of capturing parentheses.
|
||||
|
||||
PCRE2_ERROR_UNAVAILABLE
|
||||
|
||||
The substring number, though not greater than the number of captures in
|
||||
the pattern, is greater than the number of slots in the ovector, so the
|
||||
substring could not be captured.
|
||||
|
||||
PCRE2_ERROR_UNSET
|
||||
|
||||
The substring did not participate in the match. For example, if the
|
||||
pattern is (abc)|(def) and the subject is "def", and the ovector con-
|
||||
tains at least two capturing slots, substring number 1 is unset.
|
||||
|
||||
|
||||
EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS
|
||||
|
@ -2280,7 +2321,8 @@ EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS
|
|||
but subpattern n has not been used at all, it returns an empty string.
|
||||
This can be distinguished from a genuine zero-length substring by
|
||||
inspecting the appropriate offset in the ovector, which contain
|
||||
PCRE2_UNSET for unset substrings.
|
||||
PCRE2_UNSET for unset substrings, or by calling pcre2_sub-
|
||||
string_length_bynumber().
|
||||
|
||||
|
||||
EXTRACTING CAPTURED SUBSTRINGS BY NAME
|
||||
|
@ -2310,14 +2352,21 @@ EXTRACTING CAPTURED SUBSTRINGS BY NAME
|
|||
ment is the compiled pattern, and the second is the name. The yield of
|
||||
the function is the subpattern number, PCRE2_ERROR_NOSUBSTRING if there
|
||||
is no subpattern of that name, or PCRE2_ERROR_NOUNIQUESUBSTRING if
|
||||
there is more than one subpattern of that name.
|
||||
there is more than one subpattern of that name. Given the number, you
|
||||
can extract the substring directly, or use one of the functions
|
||||
described above.
|
||||
|
||||
Given the number, you can extract the substring directly, or use one of
|
||||
the functions described above. For convenience, there are also "byname"
|
||||
functions that correspond to the "bynumber" functions, the only differ-
|
||||
ence being that the second argument is a name instead of a number. How-
|
||||
ever, if PCRE2_DUPNAMES is set and there are duplicate names, the be-
|
||||
haviour may not be what you want.
|
||||
For convenience, there are also "byname" functions that correspond to
|
||||
the "bynumber" functions, the only difference being that the second
|
||||
argument is a name instead of a number. If PCRE2_DUPNAMES is set and
|
||||
there are duplicate names, these functions scan all the groups with the
|
||||
given name, and return the first named string that is set.
|
||||
|
||||
If there are no groups with the given name, PCRE2_ERROR_NOSUBSTRING is
|
||||
returned. If all groups with the name have numbers that are greater
|
||||
than the number of slots in the ovector, PCRE2_ERROR_UNAVAILABLE is
|
||||
returned. If there is at least one group with a slot in the ovector,
|
||||
but no group is found to be set, PCRE2_ERROR_UNSET is returned.
|
||||
|
||||
Warning: If the pattern uses the (?| feature to set up multiple subpat-
|
||||
terns with the same number, as described in the section on duplicate
|
||||
|
@ -2404,9 +2453,10 @@ DUPLICATE SUBPATTERN NAMES
|
|||
|
||||
When duplicates are present, pcre2_substring_copy_byname() and
|
||||
pcre2_substring_get_byname() return the first substring corresponding
|
||||
to the given name that is set. If none are set, PCRE2_ERROR_NOSUBSTRING
|
||||
is returned. The pcre2_substring_number_from_name() function returns
|
||||
the error PCRE2_ERROR_NOUNIQUESUBSTRING.
|
||||
to the given name that is set. Only if none are set is
|
||||
PCRE2_ERROR_UNSET is returned. The pcre2_substring_number_from_name()
|
||||
function returns the error PCRE2_ERROR_NOUNIQUESUBSTRING when there are
|
||||
duplicate names.
|
||||
|
||||
If you want to get full details of all captured substrings for a given
|
||||
name, you must use the pcre2_substring_nametable_scan() function. The
|
||||
|
@ -2549,17 +2599,37 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION
|
|||
|
||||
the three matched strings are
|
||||
|
||||
<something>
|
||||
<something> <something else>
|
||||
<something> <something else> <something further>
|
||||
<something> <something else>
|
||||
<something>
|
||||
|
||||
On success, the yield of the function is a number greater than zero,
|
||||
which is the number of matched substrings. The offsets of the sub-
|
||||
strings are returned in the ovector, and can be extracted in the same
|
||||
way as for pcre2_match(). They are returned in reverse order of
|
||||
length; that is, the longest matching string is given first. If there
|
||||
were too many matches to fit into the ovector, the yield of the func-
|
||||
tion is zero, and the vector is filled with the longest matches.
|
||||
strings are returned in the ovector, and can be extracted by number in
|
||||
the same way as for pcre2_match(), but the numbers bear no relation to
|
||||
any capturing groups that may exist in the pattern, because DFA match-
|
||||
ing does not support group capture.
|
||||
|
||||
Calls to the convenience functions that extract substrings by name
|
||||
return the error PCRE2_ERROR_DFA_UFUNC (unsupported function) if used
|
||||
after a DFA match. The convenience functions that extract substrings by
|
||||
number never return PCRE2_ERROR_NOSUBSTRING, and the meanings of some
|
||||
other errors are slightly different:
|
||||
|
||||
PCRE2_ERROR_UNAVAILABLE
|
||||
|
||||
The ovector is not big enough to include a slot for the given substring
|
||||
number.
|
||||
|
||||
PCRE2_ERROR_UNSET
|
||||
|
||||
There is a slot in the ovector for this substring, but there were
|
||||
insufficient matches to fill it.
|
||||
|
||||
The matched strings are stored in the ovector in reverse order of
|
||||
length; that is, the longest matching string is first. If there were
|
||||
too many matches to fit into the ovector, the yield of the function is
|
||||
zero, and the vector is filled with the longest matches.
|
||||
|
||||
NOTE: PCRE2's "auto-possessification" optimization usually applies to
|
||||
character repeats at the end of a pattern (as well as internally). For
|
||||
|
@ -2624,7 +2694,7 @@ AUTHOR
|
|||
|
||||
REVISION
|
||||
|
||||
Last updated: 01 December 2014
|
||||
Last updated: 14 December 2014
|
||||
Copyright (c) 1997-2014 University of Cambridge.
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
|
|
|
@ -201,7 +201,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#define PACKAGE_NAME "PCRE2"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "PCRE2 10.00-RC1"
|
||||
#define PACKAGE_STRING "PCRE2 10.00-RC2"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "pcre2"
|
||||
|
@ -210,7 +210,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#define PACKAGE_URL ""
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "10.00-RC1"
|
||||
#define PACKAGE_VERSION "10.00-RC2"
|
||||
|
||||
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||
|
@ -288,7 +288,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
/* #undef SUPPORT_VALGRIND */
|
||||
|
||||
/* Version number of package */
|
||||
#define VERSION "10.00-RC1"
|
||||
#define VERSION "10.00-RC2"
|
||||
|
||||
/* Define to empty if `const' does not conform to ANSI C. */
|
||||
/* #undef const */
|
||||
|
|
|
@ -43,8 +43,8 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define PCRE2_MAJOR 10
|
||||
#define PCRE2_MINOR 00
|
||||
#define PCRE2_PRERELEASE -RC1
|
||||
#define PCRE2_DATE 2014-11-28
|
||||
#define PCRE2_PRERELEASE -RC2
|
||||
#define PCRE2_DATE 2014-12-19
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE2, the appropriate
|
||||
|
@ -80,20 +80,20 @@ uint8_t, UCHAR_MAX, etc are defined. */
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* The following options can be passed to pcre2_compile(), pcre2_match(), or
|
||||
pcre2_dfa_match(). PCRE2_NO_UTF_CHECK affects only the function to which it is
|
||||
passed. Put these bits at the most significant end of the options word so
|
||||
/* The following option bits can be passed to pcre2_compile(), pcre2_match(),
|
||||
or pcre2_dfa_match(). PCRE2_NO_UTF_CHECK affects only the function to which it
|
||||
is passed. Put these bits at the most significant end of the options word so
|
||||
others can be added next to them */
|
||||
|
||||
#define PCRE2_ANCHORED 0x80000000u
|
||||
#define PCRE2_NO_UTF_CHECK 0x40000000u
|
||||
|
||||
/* Other options that can be passed to pcre2_compile(). They may affect
|
||||
compilation, JIT compilation, and/or interpretive execution. The following tags
|
||||
indicate which:
|
||||
/* The following option bits can be passed only to pcre2_compile(). However,
|
||||
they may affect compilation, JIT compilation, and/or interpretive execution.
|
||||
The following tags indicate which:
|
||||
|
||||
C alters what is compiled
|
||||
J alters what JIT compiles
|
||||
C alters what is compiled by pcre2_compile()
|
||||
J alters what is compiled by pcre2_jit_compile()
|
||||
M is inspected during pcre2_match() execution
|
||||
D is inspected during pcre2_dfa_match() execution
|
||||
*/
|
||||
|
@ -212,19 +212,21 @@ context functions. */
|
|||
#define PCRE2_ERROR_DFA_BADRESTART (-38)
|
||||
#define PCRE2_ERROR_DFA_RECURSE (-39)
|
||||
#define PCRE2_ERROR_DFA_UCOND (-40)
|
||||
#define PCRE2_ERROR_DFA_UITEM (-41)
|
||||
#define PCRE2_ERROR_DFA_WSSIZE (-42)
|
||||
#define PCRE2_ERROR_INTERNAL (-43)
|
||||
#define PCRE2_ERROR_JIT_BADOPTION (-44)
|
||||
#define PCRE2_ERROR_JIT_STACKLIMIT (-45)
|
||||
#define PCRE2_ERROR_MATCHLIMIT (-46)
|
||||
#define PCRE2_ERROR_NOMEMORY (-47)
|
||||
#define PCRE2_ERROR_NOSUBSTRING (-48)
|
||||
#define PCRE2_ERROR_NOUNIQUESUBSTRING (-49)
|
||||
#define PCRE2_ERROR_NULL (-50)
|
||||
#define PCRE2_ERROR_RECURSELOOP (-51)
|
||||
#define PCRE2_ERROR_RECURSIONLIMIT (-52)
|
||||
#define PCRE2_ERROR_UNSET (-53)
|
||||
#define PCRE2_ERROR_DFA_UFUNC (-41)
|
||||
#define PCRE2_ERROR_DFA_UITEM (-42)
|
||||
#define PCRE2_ERROR_DFA_WSSIZE (-43)
|
||||
#define PCRE2_ERROR_INTERNAL (-44)
|
||||
#define PCRE2_ERROR_JIT_BADOPTION (-45)
|
||||
#define PCRE2_ERROR_JIT_STACKLIMIT (-46)
|
||||
#define PCRE2_ERROR_MATCHLIMIT (-47)
|
||||
#define PCRE2_ERROR_NOMEMORY (-48)
|
||||
#define PCRE2_ERROR_NOSUBSTRING (-49)
|
||||
#define PCRE2_ERROR_NOUNIQUESUBSTRING (-50)
|
||||
#define PCRE2_ERROR_NULL (-51)
|
||||
#define PCRE2_ERROR_RECURSELOOP (-52)
|
||||
#define PCRE2_ERROR_RECURSIONLIMIT (-53)
|
||||
#define PCRE2_ERROR_UNAVAILABLE (-54)
|
||||
#define PCRE2_ERROR_UNSET (-55)
|
||||
|
||||
/* Request types for pcre2_pattern_info() */
|
||||
|
||||
|
@ -434,16 +436,16 @@ PCRE2_EXP_DECL PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *);
|
|||
PCRE2_EXP_DECL int pcre2_substring_copy_byname(pcre2_match_data *, \
|
||||
PCRE2_SPTR, PCRE2_UCHAR *, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_copy_bynumber(pcre2_match_data *, \
|
||||
unsigned int, PCRE2_UCHAR *, PCRE2_SIZE *); \
|
||||
uint32_t, PCRE2_UCHAR *, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL void pcre2_substring_free(PCRE2_UCHAR *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_get_byname(pcre2_match_data *, \
|
||||
PCRE2_SPTR, PCRE2_UCHAR **, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_get_bynumber(pcre2_match_data *, \
|
||||
unsigned int, PCRE2_UCHAR **, PCRE2_SIZE *); \
|
||||
uint32_t, PCRE2_UCHAR **, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_length_byname(pcre2_match_data *, \
|
||||
PCRE2_SPTR, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_length_bynumber(pcre2_match_data *, \
|
||||
unsigned int, PCRE2_SIZE *); \
|
||||
uint32_t, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_nametable_scan(const pcre2_code *, \
|
||||
PCRE2_SPTR, PCRE2_SPTR *, PCRE2_SPTR *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_number_from_name(\
|
||||
|
|
Loading…
Reference in New Issue