File tidies for 10.00-RC2.
This commit is contained in:
parent
e34c44e2aa
commit
2a5767d757
|
@ -1,7 +1,7 @@
|
|||
Change Log for PCRE2
|
||||
--------------------
|
||||
|
||||
Version 10.00 28-November-2014
|
||||
Version 10.00 19-December-2014
|
||||
------------------------------
|
||||
|
||||
Version 10.00 is the first release of PCRE2, a revised API for the PCRE
|
||||
|
@ -14,7 +14,8 @@ logged. In addition to the API changes, the following changes were made. They
|
|||
are either new functionality, or bug fixes and other noticeable changes of
|
||||
behaviour that were implemented after the code had been forked.
|
||||
|
||||
1. Unicode support is now enabled by default.
|
||||
1. Unicode support is now enabled by default, but it can optionally be
|
||||
disabled.
|
||||
|
||||
2. The test program, now called pcre2test, was re-specified and almost
|
||||
completely re-written. Its input is not compatible with input for pcretest.
|
||||
|
|
2
NEWS
2
NEWS
|
@ -1,7 +1,7 @@
|
|||
News about PCRE2 releases
|
||||
-------------------------
|
||||
|
||||
Version 10.00 28-November-2014
|
||||
Version 10.00 19-December-2014
|
||||
------------------------------
|
||||
|
||||
Version 10.00 is the first release of PCRE2, a revised API for the PCRE
|
||||
|
|
|
@ -11,7 +11,7 @@ dnl be defined as -RC2, for example. For real releases, it should be empty.
|
|||
m4_define(pcre2_major, [10])
|
||||
m4_define(pcre2_minor, [00])
|
||||
m4_define(pcre2_prerelease, [-RC2])
|
||||
m4_define(pcre2_date, [2014-11-28])
|
||||
m4_define(pcre2_date, [2014-12-19])
|
||||
|
||||
# NOTE: The CMakeLists.txt file searches for the above variables in the first
|
||||
# 50 lines of this file. Please update that if the variables above are moved.
|
||||
|
|
|
@ -36,8 +36,16 @@ by name, into a given buffer. The arguments are:
|
|||
</pre>
|
||||
The <i>bufflen</i> variable is updated to contain the length of the extracted
|
||||
string, excluding the trailing zero. The yield of the function is zero for
|
||||
success, PCRE2_ERROR_NOMEMORY if the buffer is too small, or
|
||||
PCRE2_ERROR_NOSUBSTRING if the string name is invalid.
|
||||
success or one of the following error numbers:
|
||||
<pre>
|
||||
PCRE2_ERROR_NOSUBSTRING there are no groups of that name
|
||||
PCRE2_ERROR_UNAVAILBLE the ovector was too small for that group
|
||||
PCRE2_ERROR_UNSET the group did not participate in the match
|
||||
PCRE2_ERROR_NOMEMORY the buffer is not big enough
|
||||
</pre>
|
||||
If there is more than one group with the given name, the first one that is set
|
||||
is returned. In this situation PCRE2_ERROR_UNSET means that no group with the
|
||||
given name was set.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
|
|
|
@ -36,9 +36,15 @@ buffer. The arguments are:
|
|||
<i>bufflen</i> Length of buffer
|
||||
</pre>
|
||||
The <i>bufflen</i> variable is updated with the length of the extracted string,
|
||||
excluding the terminating zero. The yield of the function is zero for success,
|
||||
PCRE2_ERROR_NOMEMORY if the buffer was too small, or PCRE2_ERROR_NOSUBSTRING if
|
||||
the string number is invalid.
|
||||
excluding the terminating zero. The yield of the function is zero for success
|
||||
or one of the following error numbers:
|
||||
<pre>
|
||||
PCRE2_ERROR_NOSUBSTRING there are no groups of that number
|
||||
PCRE2_ERROR_UNAVAILBLE the ovector was too small for that group
|
||||
PCRE2_ERROR_UNSET the group did not participate in the match
|
||||
PCRE2_ERROR_NOMEMORY the buffer is too small
|
||||
|
||||
</PRE>
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
|
|
|
@ -37,9 +37,17 @@ newly acquired memory. The arguments are:
|
|||
The memory in which the substring is placed is obtained by calling the same
|
||||
memory allocation function that was used for the match data block. The
|
||||
convenience function <b>pcre2_substring_free()</b> can be used to free it when
|
||||
it is no longer needed. The yield of the function is zero for success,
|
||||
PCRE2_ERROR_NOMEMORY if sufficient memory could not be obtained, or
|
||||
PCRE2_ERROR_NOSUBSTRING if the string name is invalid.
|
||||
it is no longer needed. The yield of the function is zero for success or one of
|
||||
the following error numbers:
|
||||
<pre>
|
||||
PCRE2_ERROR_NOSUBSTRING there are no groups of that name
|
||||
PCRE2_ERROR_UNAVAILBLE the ovector was too small for that group
|
||||
PCRE2_ERROR_UNSET the group did not participate in the match
|
||||
PCRE2_ERROR_NOMEMORY memory could not be obtained
|
||||
</pre>
|
||||
If there is more than one group with the given name, the first one that is set
|
||||
is returned. In this situation PCRE2_ERROR_UNSET means that no group with the
|
||||
given name was set.
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
|
|
|
@ -37,9 +37,15 @@ into newly acquired memory. The arguments are:
|
|||
The memory in which the substring is placed is obtained by calling the same
|
||||
memory allocation function that was used for the match data block. The
|
||||
convenience function <b>pcre2_substring_free()</b> can be used to free it when
|
||||
it is no longer needed. The yield of the function is zero for success,
|
||||
PCRE2_ERROR_NOMEMORY if sufficient memory could not be obtained, or
|
||||
PCRE2_ERROR_NOSUBSTRING if the string number is invalid.
|
||||
it is no longer needed. The yield of the function is zero for success or one of
|
||||
the following error numbers:
|
||||
<pre>
|
||||
PCRE2_ERROR_NOSUBSTRING there are no groups of that number
|
||||
PCRE2_ERROR_UNAVAILBLE the ovector was too small for that group
|
||||
PCRE2_ERROR_UNSET the group did not participate in the match
|
||||
PCRE2_ERROR_NOMEMORY memory could not be obtained
|
||||
|
||||
</PRE>
|
||||
</P>
|
||||
<P>
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
|
|
|
@ -947,6 +947,14 @@ contains the compiled pattern and related data. The caller must free the memory
|
|||
by calling <b>pcre2_code_free()</b> when it is no longer needed.
|
||||
</P>
|
||||
<P>
|
||||
NOTE: When one of the matching functions is called, pointers to the compiled
|
||||
pattern and the subject string are set in the match data block so that they can
|
||||
be referenced by the extraction functions. After running a match, you must not
|
||||
free a compiled pattern (or a subject string) until after all operations on the
|
||||
<a href="#matchdatablock">match data block</a>
|
||||
have taken place.
|
||||
</P>
|
||||
<P>
|
||||
If the compile context argument <i>ccontext</i> is NULL, memory for the compiled
|
||||
pattern is obtained by calling <b>malloc()</b>. Otherwise, it is obtained from
|
||||
the same memory function that was used for the compile context.
|
||||
|
@ -1690,7 +1698,7 @@ pattern with the JIT compiler does not alter the value returned by this option.
|
|||
<b>void pcre2_match_data_free(pcre2_match_data *<i>match_data</i>);</b>
|
||||
</P>
|
||||
<P>
|
||||
Information about successful and unsuccessful matches is placed in a match
|
||||
Information about a successful or unsuccessful match is placed in a match
|
||||
data block, which is an opaque structure that is accessed by function calls. In
|
||||
particular, the match data block contains a vector of offsets into the subject
|
||||
string that define the matched part of the subject and any substrings that were
|
||||
|
@ -1724,15 +1732,24 @@ pattern (custom or default).
|
|||
</P>
|
||||
<P>
|
||||
A match data block can be used many times, with the same or different compiled
|
||||
patterns. When it is no longer needed, it should be freed by calling
|
||||
<b>pcre2_match_data_free()</b>. You can extract information from a match data
|
||||
block after a match operation has finished, using functions that are described
|
||||
in the sections on
|
||||
patterns. You can extract information from a match data block after a match
|
||||
operation has finished, using functions that are described in the sections on
|
||||
<a href="#matchedstrings">matched strings</a>
|
||||
and
|
||||
<a href="#matchotherdata">other match data</a>
|
||||
below.
|
||||
</P>
|
||||
<P>
|
||||
When one of the matching functions is called, pointers to the compiled pattern
|
||||
and the subject string are set in the match data block so that they can be
|
||||
referenced by the extraction functions. After running a match, you must not
|
||||
free a compiled pattern or a subject string until after all operations on the
|
||||
match data block (for that match) have taken place.
|
||||
</P>
|
||||
<P>
|
||||
When a match data block itself is no longer needed, it should be freed by
|
||||
calling <b>pcre2_match_data_free()</b>.
|
||||
</P>
|
||||
<br><a name="SEC23" href="#TOC1">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a><br>
|
||||
<P>
|
||||
<b>int pcre2_match(const pcre2_code *<i>code</i>, PCRE2_SPTR <i>subject</i>,</b>
|
||||
|
@ -2034,8 +2051,14 @@ from a successful match is 1, indicating that just the first pair of offsets
|
|||
has been set.
|
||||
</P>
|
||||
<P>
|
||||
If a capturing subpattern is matched repeatedly within a single match
|
||||
operation, it is the last portion of the string that it matched that is
|
||||
If a pattern uses the \K escape sequence within a positive assertion, the
|
||||
reported start of the match can be greater than the end of the match. For
|
||||
example, if the pattern (?=ab\K) is matched against "ab", the start and end
|
||||
offset values for the match are 2 and 0.
|
||||
</P>
|
||||
<P>
|
||||
If a capturing subpattern group is matched repeatedly within a single match
|
||||
operation, it is the last portion of the subject that it matched that is
|
||||
returned.
|
||||
</P>
|
||||
<P>
|
||||
|
@ -2234,25 +2257,34 @@ Captured substrings can be accessed directly by using the ovector as described
|
|||
<a href="#matchedstrings">above.</a>
|
||||
For convenience, auxiliary functions are provided for extracting captured
|
||||
substrings as new, separate, zero-terminated strings. The functions in this
|
||||
section identify substrings by number. The next section describes similar
|
||||
functions for extracting substrings by name. A substring that contains a binary
|
||||
zero is correctly extracted and has a further zero added on the end, but the
|
||||
result is not, of course, a C string.
|
||||
section identify substrings by number. The number zero refers to the entire
|
||||
matched substring, with higher numbers referring to substrings captured by
|
||||
parenthesized groups. The next section describes similar functions for
|
||||
extracting captured substrings by name. A substring that contains a binary zero
|
||||
is correctly extracted and has a further zero added on the end, but the result
|
||||
is not, of course, a C string.
|
||||
</P>
|
||||
<P>
|
||||
If a pattern uses the \K escape sequence within a positive assertion, the
|
||||
reported start of the match can be greater than the end of the match. For
|
||||
example, if the pattern (?=ab\K) is matched against "ab", the start and end
|
||||
offset values for the match are 2 and 0. In this situation, calling these
|
||||
functions with a zero substring number extracts a zero-length empty string.
|
||||
</P>
|
||||
<P>
|
||||
You can find the length in code units of a captured substring without
|
||||
extracting it by calling <b>pcre2_substring_length_bynumber()</b>. The first
|
||||
argument is a pointer to the match data block, the second is the group number,
|
||||
and the third is a pointer to a variable into which the length is placed.
|
||||
and the third is a pointer to a variable into which the length is placed. If
|
||||
you just want to know whether or not the substring has been captured, you can
|
||||
pass the third argument as NULL.
|
||||
</P>
|
||||
<P>
|
||||
The <b>pcre2_substring_copy_bynumber()</b> function copies one string into a
|
||||
supplied buffer, whereas <b>pcre2_substring_get_bynumber()</b> copies it into
|
||||
new memory, obtained using the same memory allocation function that was used
|
||||
for the match data block. The first two arguments of these functions are a
|
||||
pointer to the match data block and a capturing group number. A group number of
|
||||
zero extracts the substring that matched the entire pattern, and higher values
|
||||
extract the captured substrings.
|
||||
The <b>pcre2_substring_copy_bynumber()</b> function copies a captured substring
|
||||
into a supplied buffer, whereas <b>pcre2_substring_get_bynumber()</b> copies it
|
||||
into new memory, obtained using the same memory allocation function that was
|
||||
used for the match data block. The first two arguments of these functions are a
|
||||
pointer to the match data block and a capturing group number.
|
||||
</P>
|
||||
<P>
|
||||
The final arguments of <b>pcre2_substring_copy_bynumber()</b> are a pointer to
|
||||
|
@ -2268,8 +2300,9 @@ zero. When the substring is no longer needed, the memory should be freed by
|
|||
calling <b>pcre2_substring_free()</b>.
|
||||
</P>
|
||||
<P>
|
||||
The return value from these functions is zero for success, or one of these
|
||||
error codes:
|
||||
The return value from all these functions is zero for success, or a negative
|
||||
error code. If the pattern match failed, the match failure code is returned.
|
||||
Other possible error codes are:
|
||||
<pre>
|
||||
PCRE2_ERROR_NOMEMORY
|
||||
</pre>
|
||||
|
@ -2278,10 +2311,20 @@ attempt to get memory failed for <b>pcre2_substring_get_bynumber()</b>.
|
|||
<pre>
|
||||
PCRE2_ERROR_NOSUBSTRING
|
||||
</pre>
|
||||
No substring with the given number was captured. This could be because there is
|
||||
no capturing group of that number in the pattern, or because the group with
|
||||
that number did not participate in the match, or because the ovector was too
|
||||
small to capture that group.
|
||||
There is no substring with that number in the pattern, that is, the number is
|
||||
greater than the number of capturing parentheses.
|
||||
<pre>
|
||||
PCRE2_ERROR_UNAVAILABLE
|
||||
</pre>
|
||||
The substring number, though not greater than the number of captures in the
|
||||
pattern, is greater than the number of slots in the ovector, so the substring
|
||||
could not be captured.
|
||||
<pre>
|
||||
PCRE2_ERROR_UNSET
|
||||
</pre>
|
||||
The substring did not participate in the match. For example, if the pattern is
|
||||
(abc)|(def) and the subject is "def", and the ovector contains at least two
|
||||
capturing slots, substring number 1 is unset.
|
||||
</P>
|
||||
<br><a name="SEC29" href="#TOC1">EXTRACTING A LIST OF ALL CAPTURED SUBSTRINGS</a><br>
|
||||
<P>
|
||||
|
@ -2316,7 +2359,7 @@ capturing subpattern number <i>n+1</i> matches some part of the subject, but
|
|||
subpattern <i>n</i> has not been used at all, it returns an empty string. This
|
||||
can be distinguished from a genuine zero-length substring by inspecting the
|
||||
appropriate offset in the ovector, which contain PCRE2_UNSET for unset
|
||||
substrings.
|
||||
substrings, or by calling <b>pcre2_substring_length_bynumber()</b>.
|
||||
<a name="extractbyname"></a></P>
|
||||
<br><a name="SEC30" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a><br>
|
||||
<P>
|
||||
|
@ -2350,14 +2393,22 @@ calling <b>pcre2_substring_number_from_name()</b>. The first argument is the
|
|||
compiled pattern, and the second is the name. The yield of the function is the
|
||||
subpattern number, PCRE2_ERROR_NOSUBSTRING if there is no subpattern of that
|
||||
name, or PCRE2_ERROR_NOUNIQUESUBSTRING if there is more than one subpattern of
|
||||
that name.
|
||||
that name. Given the number, you can extract the substring directly, or use one
|
||||
of the functions described above.
|
||||
</P>
|
||||
<P>
|
||||
Given the number, you can extract the substring directly, or use one of the
|
||||
functions described above. For convenience, there are also "byname" functions
|
||||
that correspond to the "bynumber" functions, the only difference being that the
|
||||
second argument is a name instead of a number. However, if PCRE2_DUPNAMES is
|
||||
set and there are duplicate names, the behaviour may not be what you want.
|
||||
For convenience, there are also "byname" functions that correspond to the
|
||||
"bynumber" functions, the only difference being that the second argument is a
|
||||
name instead of a number. If PCRE2_DUPNAMES is set and there are duplicate
|
||||
names, these functions scan all the groups with the given name, and return the
|
||||
first named string that is set.
|
||||
</P>
|
||||
<P>
|
||||
If there are no groups with the given name, PCRE2_ERROR_NOSUBSTRING is
|
||||
returned. If all groups with the name have numbers that are greater than the
|
||||
number of slots in the ovector, PCRE2_ERROR_UNAVAILABLE is returned. If there
|
||||
is at least one group with a slot in the ovector, but no group is found to be
|
||||
set, PCRE2_ERROR_UNSET is returned.
|
||||
</P>
|
||||
<P>
|
||||
<b>Warning:</b> If the pattern uses the (?| feature to set up multiple
|
||||
|
@ -2451,9 +2502,9 @@ documentation.
|
|||
<P>
|
||||
When duplicates are present, <b>pcre2_substring_copy_byname()</b> and
|
||||
<b>pcre2_substring_get_byname()</b> return the first substring corresponding to
|
||||
the given name that is set. If none are set, PCRE2_ERROR_NOSUBSTRING is
|
||||
returned. The <b>pcre2_substring_number_from_name()</b> function returns
|
||||
the error PCRE2_ERROR_NOUNIQUESUBSTRING.
|
||||
the given name that is set. Only if none are set is PCRE2_ERROR_UNSET is
|
||||
returned. The <b>pcre2_substring_number_from_name()</b> function returns the
|
||||
error PCRE2_ERROR_NOUNIQUESUBSTRING when there are duplicate names.
|
||||
</P>
|
||||
<P>
|
||||
If you want to get full details of all captured substrings for a given name,
|
||||
|
@ -2607,17 +2658,38 @@ is matched against the string
|
|||
</pre>
|
||||
the three matched strings are
|
||||
<pre>
|
||||
<something>
|
||||
<something> <something else>
|
||||
<something> <something else> <something further>
|
||||
<something> <something else>
|
||||
<something>
|
||||
</pre>
|
||||
On success, the yield of the function is a number greater than zero, which is
|
||||
the number of matched substrings. The offsets of the substrings are returned in
|
||||
the ovector, and can be extracted in the same way as for <b>pcre2_match()</b>.
|
||||
They are returned in reverse order of length; that is, the longest
|
||||
matching string is given first. If there were too many matches to fit into
|
||||
the ovector, the yield of the function is zero, and the vector is filled with
|
||||
the longest matches.
|
||||
the ovector, and can be extracted by number in the same way as for
|
||||
<b>pcre2_match()</b>, but the numbers bear no relation to any capturing groups
|
||||
that may exist in the pattern, because DFA matching does not support group
|
||||
capture.
|
||||
</P>
|
||||
<P>
|
||||
Calls to the convenience functions that extract substrings by name
|
||||
return the error PCRE2_ERROR_DFA_UFUNC (unsupported function) if used after a
|
||||
DFA match. The convenience functions that extract substrings by number never
|
||||
return PCRE2_ERROR_NOSUBSTRING, and the meanings of some other errors are
|
||||
slightly different:
|
||||
<pre>
|
||||
PCRE2_ERROR_UNAVAILABLE
|
||||
</pre>
|
||||
The ovector is not big enough to include a slot for the given substring number.
|
||||
<pre>
|
||||
PCRE2_ERROR_UNSET
|
||||
</pre>
|
||||
There is a slot in the ovector for this substring, but there were insufficient
|
||||
matches to fill it.
|
||||
</P>
|
||||
<P>
|
||||
The matched strings are stored in the ovector in reverse order of length; that
|
||||
is, the longest matching string is first. If there were too many matches to fit
|
||||
into the ovector, the yield of the function is zero, and the vector is filled
|
||||
with the longest matches.
|
||||
</P>
|
||||
<P>
|
||||
NOTE: PCRE2's "auto-possessification" optimization usually applies to character
|
||||
|
@ -2685,7 +2757,7 @@ Cambridge, England.
|
|||
</P>
|
||||
<br><a name="SEC37" href="#TOC1">REVISION</a><br>
|
||||
<P>
|
||||
Last updated: 01 December 2014
|
||||
Last updated: 14 December 2014
|
||||
<br>
|
||||
Copyright © 1997-2014 University of Cambridge.
|
||||
<br>
|
||||
|
|
958
doc/pcre2.txt
958
doc/pcre2.txt
File diff suppressed because it is too large
Load Diff
|
@ -29,10 +29,10 @@ success or one of the following error numbers:
|
|||
PCRE2_ERROR_NOSUBSTRING there are no groups of that name
|
||||
PCRE2_ERROR_UNAVAILBLE the ovector was too small for that group
|
||||
PCRE2_ERROR_UNSET the group did not participate in the match
|
||||
PCRE2_ERROR_NOMEMORY the buffer is not big enough
|
||||
PCRE2_ERROR_NOMEMORY the buffer is not big enough
|
||||
.sp
|
||||
If there is more than one group with the given name, the first one that is set
|
||||
is returned. In this situation PCRE2_ERROR_UNSET means that no group with the
|
||||
If there is more than one group with the given name, the first one that is set
|
||||
is returned. In this situation PCRE2_ERROR_UNSET means that no group with the
|
||||
given name was set.
|
||||
.P
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
|
|
|
@ -33,8 +33,8 @@ the following error numbers:
|
|||
PCRE2_ERROR_UNSET the group did not participate in the match
|
||||
PCRE2_ERROR_NOMEMORY memory could not be obtained
|
||||
.sp
|
||||
If there is more than one group with the given name, the first one that is set
|
||||
is returned. In this situation PCRE2_ERROR_UNSET means that no group with the
|
||||
If there is more than one group with the given name, the first one that is set
|
||||
is returned. In this situation PCRE2_ERROR_UNSET means that no group with the
|
||||
given name was set.
|
||||
.P
|
||||
There is a complete description of the PCRE2 native API in the
|
||||
|
|
|
@ -927,7 +927,7 @@ be referenced by the extraction functions. After running a match, you must not
|
|||
free a compiled pattern (or a subject string) until after all operations on the
|
||||
.\" HTML <a href="#matchdatablock">
|
||||
.\" </a>
|
||||
match data block
|
||||
match data block
|
||||
.\"
|
||||
have taken place.
|
||||
.P
|
||||
|
@ -2070,9 +2070,9 @@ returned value is 3. If there are no capturing subpatterns, the return value
|
|||
from a successful match is 1, indicating that just the first pair of offsets
|
||||
has been set.
|
||||
.P
|
||||
If a pattern uses the \eK escape sequence within a positive assertion, the
|
||||
reported start of the match can be greater than the end of the match. For
|
||||
example, if the pattern (?=ab\eK) is matched against "ab", the start and end
|
||||
If a pattern uses the \eK escape sequence within a positive assertion, the
|
||||
reported start of the match can be greater than the end of the match. For
|
||||
example, if the pattern (?=ab\eK) is matched against "ab", the start and end
|
||||
offset values for the match are 2 and 0.
|
||||
.P
|
||||
If a capturing subpattern group is matched repeatedly within a single match
|
||||
|
@ -2297,17 +2297,17 @@ extracting captured substrings by name. A substring that contains a binary zero
|
|||
is correctly extracted and has a further zero added on the end, but the result
|
||||
is not, of course, a C string.
|
||||
.P
|
||||
If a pattern uses the \eK escape sequence within a positive assertion, the
|
||||
reported start of the match can be greater than the end of the match. For
|
||||
example, if the pattern (?=ab\eK) is matched against "ab", the start and end
|
||||
offset values for the match are 2 and 0. In this situation, calling these
|
||||
If a pattern uses the \eK escape sequence within a positive assertion, the
|
||||
reported start of the match can be greater than the end of the match. For
|
||||
example, if the pattern (?=ab\eK) is matched against "ab", the start and end
|
||||
offset values for the match are 2 and 0. In this situation, calling these
|
||||
functions with a zero substring number extracts a zero-length empty string.
|
||||
.P
|
||||
You can find the length in code units of a captured substring without
|
||||
extracting it by calling \fBpcre2_substring_length_bynumber()\fP. The first
|
||||
argument is a pointer to the match data block, the second is the group number,
|
||||
and the third is a pointer to a variable into which the length is placed. If
|
||||
you just want to know whether or not the substring has been captured, you can
|
||||
and the third is a pointer to a variable into which the length is placed. If
|
||||
you just want to know whether or not the substring has been captured, you can
|
||||
pass the third argument as NULL.
|
||||
.P
|
||||
The \fBpcre2_substring_copy_bynumber()\fP function copies a captured substring
|
||||
|
@ -2338,13 +2338,13 @@ attempt to get memory failed for \fBpcre2_substring_get_bynumber()\fP.
|
|||
.sp
|
||||
PCRE2_ERROR_NOSUBSTRING
|
||||
.sp
|
||||
There is no substring with that number in the pattern, that is, the number is
|
||||
There is no substring with that number in the pattern, that is, the number is
|
||||
greater than the number of capturing parentheses.
|
||||
.sp
|
||||
PCRE2_ERROR_UNAVAILABLE
|
||||
.sp
|
||||
The substring number, though not greater than the number of captures in the
|
||||
pattern, is greater than the number of slots in the ovector, so the substring
|
||||
The substring number, though not greater than the number of captures in the
|
||||
pattern, is greater than the number of slots in the ovector, so the substring
|
||||
could not be captured.
|
||||
.sp
|
||||
PCRE2_ERROR_UNSET
|
||||
|
@ -2429,10 +2429,10 @@ name instead of a number. If PCRE2_DUPNAMES is set and there are duplicate
|
|||
names, these functions scan all the groups with the given name, and return the
|
||||
first named string that is set.
|
||||
.P
|
||||
If there are no groups with the given name, PCRE2_ERROR_NOSUBSTRING is
|
||||
returned. If all groups with the name have numbers that are greater than the
|
||||
number of slots in the ovector, PCRE2_ERROR_UNAVAILABLE is returned. If there
|
||||
is at least one group with a slot in the ovector, but no group is found to be
|
||||
If there are no groups with the given name, PCRE2_ERROR_NOSUBSTRING is
|
||||
returned. If all groups with the name have numbers that are greater than the
|
||||
number of slots in the ovector, PCRE2_ERROR_UNAVAILABLE is returned. If there
|
||||
is at least one group with a slot in the ovector, but no group is found to be
|
||||
set, PCRE2_ERROR_UNSET is returned.
|
||||
.P
|
||||
\fBWarning:\fP If the pattern uses the (?| feature to set up multiple
|
||||
|
@ -2706,7 +2706,7 @@ the number of matched substrings. The offsets of the substrings are returned in
|
|||
the ovector, and can be extracted by number in the same way as for
|
||||
\fBpcre2_match()\fP, but the numbers bear no relation to any capturing groups
|
||||
that may exist in the pattern, because DFA matching does not support group
|
||||
capture.
|
||||
capture.
|
||||
.P
|
||||
Calls to the convenience functions that extract substrings by name
|
||||
return the error PCRE2_ERROR_DFA_UFUNC (unsupported function) if used after a
|
||||
|
@ -2720,7 +2720,7 @@ The ovector is not big enough to include a slot for the given substring number.
|
|||
.sp
|
||||
PCRE2_ERROR_UNSET
|
||||
.sp
|
||||
There is a slot in the ovector for this substring, but there were insufficient
|
||||
There is a slot in the ovector for this substring, but there were insufficient
|
||||
matches to fill it.
|
||||
.P
|
||||
The matched strings are stored in the ovector in reverse order of length; that
|
||||
|
|
|
@ -201,7 +201,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#define PACKAGE_NAME "PCRE2"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "PCRE2 10.00-RC1"
|
||||
#define PACKAGE_STRING "PCRE2 10.00-RC2"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "pcre2"
|
||||
|
@ -210,7 +210,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
#define PACKAGE_URL ""
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "10.00-RC1"
|
||||
#define PACKAGE_VERSION "10.00-RC2"
|
||||
|
||||
/* The value of PARENS_NEST_LIMIT specifies the maximum depth of nested
|
||||
parentheses (of any kind) in a pattern. This limits the amount of system
|
||||
|
@ -288,7 +288,7 @@ sure both macros are undefined; an emulation function will then be used. */
|
|||
/* #undef SUPPORT_VALGRIND */
|
||||
|
||||
/* Version number of package */
|
||||
#define VERSION "10.00-RC1"
|
||||
#define VERSION "10.00-RC2"
|
||||
|
||||
/* Define to empty if `const' does not conform to ANSI C. */
|
||||
/* #undef const */
|
||||
|
|
|
@ -43,8 +43,8 @@ POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define PCRE2_MAJOR 10
|
||||
#define PCRE2_MINOR 00
|
||||
#define PCRE2_PRERELEASE -RC1
|
||||
#define PCRE2_DATE 2014-11-28
|
||||
#define PCRE2_PRERELEASE -RC2
|
||||
#define PCRE2_DATE 2014-12-19
|
||||
|
||||
/* When an application links to a PCRE DLL in Windows, the symbols that are
|
||||
imported have to be identified as such. When building PCRE2, the appropriate
|
||||
|
@ -80,20 +80,20 @@ uint8_t, UCHAR_MAX, etc are defined. */
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* The following options can be passed to pcre2_compile(), pcre2_match(), or
|
||||
pcre2_dfa_match(). PCRE2_NO_UTF_CHECK affects only the function to which it is
|
||||
passed. Put these bits at the most significant end of the options word so
|
||||
/* The following option bits can be passed to pcre2_compile(), pcre2_match(),
|
||||
or pcre2_dfa_match(). PCRE2_NO_UTF_CHECK affects only the function to which it
|
||||
is passed. Put these bits at the most significant end of the options word so
|
||||
others can be added next to them */
|
||||
|
||||
#define PCRE2_ANCHORED 0x80000000u
|
||||
#define PCRE2_NO_UTF_CHECK 0x40000000u
|
||||
|
||||
/* Other options that can be passed to pcre2_compile(). They may affect
|
||||
compilation, JIT compilation, and/or interpretive execution. The following tags
|
||||
indicate which:
|
||||
/* The following option bits can be passed only to pcre2_compile(). However,
|
||||
they may affect compilation, JIT compilation, and/or interpretive execution.
|
||||
The following tags indicate which:
|
||||
|
||||
C alters what is compiled
|
||||
J alters what JIT compiles
|
||||
C alters what is compiled by pcre2_compile()
|
||||
J alters what is compiled by pcre2_jit_compile()
|
||||
M is inspected during pcre2_match() execution
|
||||
D is inspected during pcre2_dfa_match() execution
|
||||
*/
|
||||
|
@ -212,19 +212,21 @@ context functions. */
|
|||
#define PCRE2_ERROR_DFA_BADRESTART (-38)
|
||||
#define PCRE2_ERROR_DFA_RECURSE (-39)
|
||||
#define PCRE2_ERROR_DFA_UCOND (-40)
|
||||
#define PCRE2_ERROR_DFA_UITEM (-41)
|
||||
#define PCRE2_ERROR_DFA_WSSIZE (-42)
|
||||
#define PCRE2_ERROR_INTERNAL (-43)
|
||||
#define PCRE2_ERROR_JIT_BADOPTION (-44)
|
||||
#define PCRE2_ERROR_JIT_STACKLIMIT (-45)
|
||||
#define PCRE2_ERROR_MATCHLIMIT (-46)
|
||||
#define PCRE2_ERROR_NOMEMORY (-47)
|
||||
#define PCRE2_ERROR_NOSUBSTRING (-48)
|
||||
#define PCRE2_ERROR_NOUNIQUESUBSTRING (-49)
|
||||
#define PCRE2_ERROR_NULL (-50)
|
||||
#define PCRE2_ERROR_RECURSELOOP (-51)
|
||||
#define PCRE2_ERROR_RECURSIONLIMIT (-52)
|
||||
#define PCRE2_ERROR_UNSET (-53)
|
||||
#define PCRE2_ERROR_DFA_UFUNC (-41)
|
||||
#define PCRE2_ERROR_DFA_UITEM (-42)
|
||||
#define PCRE2_ERROR_DFA_WSSIZE (-43)
|
||||
#define PCRE2_ERROR_INTERNAL (-44)
|
||||
#define PCRE2_ERROR_JIT_BADOPTION (-45)
|
||||
#define PCRE2_ERROR_JIT_STACKLIMIT (-46)
|
||||
#define PCRE2_ERROR_MATCHLIMIT (-47)
|
||||
#define PCRE2_ERROR_NOMEMORY (-48)
|
||||
#define PCRE2_ERROR_NOSUBSTRING (-49)
|
||||
#define PCRE2_ERROR_NOUNIQUESUBSTRING (-50)
|
||||
#define PCRE2_ERROR_NULL (-51)
|
||||
#define PCRE2_ERROR_RECURSELOOP (-52)
|
||||
#define PCRE2_ERROR_RECURSIONLIMIT (-53)
|
||||
#define PCRE2_ERROR_UNAVAILABLE (-54)
|
||||
#define PCRE2_ERROR_UNSET (-55)
|
||||
|
||||
/* Request types for pcre2_pattern_info() */
|
||||
|
||||
|
@ -434,16 +436,16 @@ PCRE2_EXP_DECL PCRE2_SIZE pcre2_get_startchar(pcre2_match_data *);
|
|||
PCRE2_EXP_DECL int pcre2_substring_copy_byname(pcre2_match_data *, \
|
||||
PCRE2_SPTR, PCRE2_UCHAR *, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_copy_bynumber(pcre2_match_data *, \
|
||||
unsigned int, PCRE2_UCHAR *, PCRE2_SIZE *); \
|
||||
uint32_t, PCRE2_UCHAR *, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL void pcre2_substring_free(PCRE2_UCHAR *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_get_byname(pcre2_match_data *, \
|
||||
PCRE2_SPTR, PCRE2_UCHAR **, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_get_bynumber(pcre2_match_data *, \
|
||||
unsigned int, PCRE2_UCHAR **, PCRE2_SIZE *); \
|
||||
uint32_t, PCRE2_UCHAR **, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_length_byname(pcre2_match_data *, \
|
||||
PCRE2_SPTR, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_length_bynumber(pcre2_match_data *, \
|
||||
unsigned int, PCRE2_SIZE *); \
|
||||
uint32_t, PCRE2_SIZE *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_nametable_scan(const pcre2_code *, \
|
||||
PCRE2_SPTR, PCRE2_SPTR *, PCRE2_SPTR *); \
|
||||
PCRE2_EXP_DECL int pcre2_substring_number_from_name(\
|
||||
|
|
|
@ -228,7 +228,7 @@ static const char match_error_texts[] =
|
|||
"NULL argument passed\0"
|
||||
"nested recursion at the same subject position\0"
|
||||
"recursion limit exceeded\0"
|
||||
"requested value is not available\0"
|
||||
"requested value is not available\0"
|
||||
"requested value is not set\0"
|
||||
;
|
||||
|
||||
|
|
|
@ -530,7 +530,7 @@ bytes in a code unit in that mode. */
|
|||
|
||||
enum { PCRE2_MATCHEDBY_INTERPRETER, /* pcre2_match() */
|
||||
PCRE2_MATCHEDBY_DFA_INTERPRETER, /* pcre2_dfa_match() */
|
||||
PCRE2_MATCHEDBY_JIT }; /* pcre2_jit_match() */
|
||||
PCRE2_MATCHEDBY_JIT }; /* pcre2_jit_match() */
|
||||
|
||||
/* Magic number to provide a small check against being handed junk. */
|
||||
|
||||
|
|
|
@ -620,7 +620,7 @@ typedef struct pcre2_real_match_data {
|
|||
PCRE2_SIZE leftchar; /* Offset to leftmost code unit */
|
||||
PCRE2_SIZE rightchar; /* Offset to rightmost code unit */
|
||||
PCRE2_SIZE startchar; /* Offset to starting code unit */
|
||||
uint16_t matchedby; /* Type of match (normal, JIT, DFA) */
|
||||
uint16_t matchedby; /* Type of match (normal, JIT, DFA) */
|
||||
uint16_t oveccount; /* Number of pairs */
|
||||
int rc; /* The return code from the match */
|
||||
PCRE2_SIZE ovector[1]; /* The first field */
|
||||
|
|
|
@ -65,7 +65,7 @@ Returns: if successful: zero
|
|||
if not successful, a negative error code:
|
||||
(1) an error from nametable_scan()
|
||||
(2) an error from copy_bynumber()
|
||||
(3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
|
||||
(3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
|
||||
(4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
|
||||
*/
|
||||
|
||||
|
@ -88,8 +88,8 @@ for (entry = first; entry <= last; entry += entrysize)
|
|||
{
|
||||
if (match_data->ovector[n*2] != PCRE2_UNSET)
|
||||
return pcre2_substring_copy_bynumber(match_data, n, buffer, sizeptr);
|
||||
failrc = PCRE2_ERROR_UNSET;
|
||||
}
|
||||
failrc = PCRE2_ERROR_UNSET;
|
||||
}
|
||||
}
|
||||
return failrc;
|
||||
}
|
||||
|
@ -114,7 +114,7 @@ Returns: if successful: 0
|
|||
PCRE2_ERROR_NOMEMORY: buffer too small
|
||||
PCRE2_ERROR_NOSUBSTRING: no such substring
|
||||
PCRE2_ERROR_UNAVAILABLE: ovector too small
|
||||
PCRE2_ERROR_UNSET: substring is not set
|
||||
PCRE2_ERROR_UNSET: substring is not set
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
|
@ -126,7 +126,7 @@ PCRE2_SIZE size;
|
|||
rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
|
||||
if (rc < 0) return rc;
|
||||
if (size + 1 > *sizeptr) return PCRE2_ERROR_NOMEMORY;
|
||||
memcpy(buffer, match_data->subject + match_data->ovector[stringnumber*2],
|
||||
memcpy(buffer, match_data->subject + match_data->ovector[stringnumber*2],
|
||||
CU2BYTES(size));
|
||||
buffer[size] = 0;
|
||||
*sizeptr = size;
|
||||
|
@ -152,8 +152,8 @@ Arguments:
|
|||
Returns: if successful: zero
|
||||
if not successful, a negative value:
|
||||
(1) an error from nametable_scan()
|
||||
(2) an error from get_bynumber()
|
||||
(3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
|
||||
(2) an error from get_bynumber()
|
||||
(3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
|
||||
(4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
|
||||
*/
|
||||
|
||||
|
@ -177,7 +177,7 @@ for (entry = first; entry <= last; entry += entrysize)
|
|||
if (match_data->ovector[n*2] != PCRE2_UNSET)
|
||||
return pcre2_substring_get_bynumber(match_data, n, stringptr, sizeptr);
|
||||
failrc = PCRE2_ERROR_UNSET;
|
||||
}
|
||||
}
|
||||
}
|
||||
return failrc;
|
||||
}
|
||||
|
@ -202,7 +202,7 @@ Returns: if successful: 0
|
|||
PCRE2_ERROR_NOMEMORY: failed to get memory
|
||||
PCRE2_ERROR_NOSUBSTRING: no such substring
|
||||
PCRE2_ERROR_UNAVAILABLE: ovector too small
|
||||
PCRE2_ERROR_UNSET: substring is not set
|
||||
PCRE2_ERROR_UNSET: substring is not set
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
|
@ -218,7 +218,7 @@ yield = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
|
|||
(size + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)match_data);
|
||||
if (yield == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
yield = (PCRE2_UCHAR *)(((char *)yield) + sizeof(pcre2_memctl));
|
||||
memcpy(yield, match_data->subject + match_data->ovector[stringnumber*2],
|
||||
memcpy(yield, match_data->subject + match_data->ovector[stringnumber*2],
|
||||
CU2BYTES(size));
|
||||
yield[size] = 0;
|
||||
*stringptr = yield;
|
||||
|
@ -281,7 +281,7 @@ for (entry = first; entry <= last; entry += entrysize)
|
|||
if (match_data->ovector[n*2] != PCRE2_UNSET)
|
||||
return pcre2_substring_length_bynumber(match_data, n, sizeptr);
|
||||
failrc = PCRE2_ERROR_UNSET;
|
||||
}
|
||||
}
|
||||
}
|
||||
return failrc;
|
||||
}
|
||||
|
@ -292,8 +292,8 @@ return failrc;
|
|||
* Get length of a numbered substring *
|
||||
*************************************************/
|
||||
|
||||
/* This function returns the length of a captured substring. If the start is
|
||||
beyond the end (which can happen when \K is used in an assertion), it sets the
|
||||
/* This function returns the length of a captured substring. If the start is
|
||||
beyond the end (which can happen when \K is used in an assertion), it sets the
|
||||
length to zero.
|
||||
|
||||
Arguments:
|
||||
|
@ -305,7 +305,7 @@ Returns: if successful: 0
|
|||
if not successful, a negative error code:
|
||||
PCRE2_ERROR_NOSUBSTRING: no such substring
|
||||
PCRE2_ERROR_UNAVAILABLE: ovector is too small
|
||||
PCRE2_ERROR_UNSET: substring is not set
|
||||
PCRE2_ERROR_UNSET: substring is not set
|
||||
*/
|
||||
|
||||
PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
|
||||
|
@ -317,9 +317,9 @@ PCRE2_SIZE left, right;
|
|||
if ((count = match_data->rc) < 0) return count; /* Match failed */
|
||||
if (match_data->matchedby != PCRE2_MATCHEDBY_DFA_INTERPRETER)
|
||||
{
|
||||
if (stringnumber > match_data->code->top_bracket)
|
||||
if (stringnumber > match_data->code->top_bracket)
|
||||
return PCRE2_ERROR_NOSUBSTRING;
|
||||
if (stringnumber >= match_data->oveccount)
|
||||
if (stringnumber >= match_data->oveccount)
|
||||
return PCRE2_ERROR_UNAVAILABLE;
|
||||
if (match_data->ovector[stringnumber*2] == PCRE2_UNSET)
|
||||
return PCRE2_ERROR_UNSET;
|
||||
|
@ -328,11 +328,11 @@ else /* Matched using pcre2_dfa_match() */
|
|||
{
|
||||
if (stringnumber >= match_data->oveccount) return PCRE2_ERROR_UNAVAILABLE;
|
||||
if (count != 0 && stringnumber >= (uint32_t)count) return PCRE2_ERROR_UNSET;
|
||||
}
|
||||
}
|
||||
left = match_data->ovector[stringnumber*2];
|
||||
right = match_data->ovector[stringnumber*2+1];
|
||||
if (sizeptr != NULL) *sizeptr = (left > right)? 0 : right - left;
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -382,8 +382,8 @@ for (i = 0; i < count2; i += 2)
|
|||
{
|
||||
size += sizeof(PCRE2_UCHAR *) + CU2BYTES(1);
|
||||
if (ovector[i+1] > ovector[i]) size += CU2BYTES(ovector[i+1] - ovector[i]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
memp = PRIV(memctl_malloc)(size, (pcre2_memctl *)match_data);
|
||||
if (memp == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
|
||||
|
@ -489,7 +489,7 @@ while (top > bot)
|
|||
if (PRIV(strcmp)(stringname, (last + entrysize + IMM2_SIZE)) != 0) break;
|
||||
last += entrysize;
|
||||
}
|
||||
if (firstptr == NULL) return (first == last)?
|
||||
if (firstptr == NULL) return (first == last)?
|
||||
(int)GET2(entry, 0) : PCRE2_ERROR_NOUNIQUESUBSTRING;
|
||||
*firstptr = first;
|
||||
*lastptr = last;
|
||||
|
|
|
@ -4142,7 +4142,7 @@ if (callout_capture)
|
|||
for (i = 0; i < cb->capture_top * 2; i += 2)
|
||||
{
|
||||
fprintf(f, "%2d: ", i/2);
|
||||
if (cb->offset_vector[i] == PCRE2_UNSET)
|
||||
if (cb->offset_vector[i] == PCRE2_UNSET)
|
||||
fprintf(f, "<unset>");
|
||||
else
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue